Exploring this possibility further. Something like this would do:
struct ArrayWrapper<T> {
var uuid: UUID = .zero
var array: [T] {
didSet { changed() }
}
mutating func changed() {
uuid = isEmpty ? .zero : UUID()
}
mutating func append(_ element: T) {
array.append(element)
}
...
}
If UUID() is not performant enough to be used on every change, then perhaps allocate a couple of random numbers and increment them on every change:
struct UniqueIdentifier: Equatable {
var a: Int = .random(in: .min ... .max)
var b: Int = .random(in: .min ... .max)
var c: Int = .random(in: .min ... .max)
mutating func change() {
a &+= 1
b &+= 1
c &+= 1
}
}
The most interesting bit:
extension ArrayWrapper {
// gives approximate result in O(1) time
func mostLikelyEqual(_ other: Self) -> Bool {
var this = self
var other = other
// quick positive check (comparing references)
if memcmp(&this, &other, MemoryLayout<Self>.size) == 0 {
return true
}
// quick positive check (comparing uuid's)
if uuid == other.uuid {
return true
}
// can't get result quickly
return false
}
}
However.... this still won't pass the above test:
var users: ArrayWrapper<User> = ...
let oldUsers = users
users.append(user)
users.remove(at: users.count - 1)
let changed = users.mostLikelyEqual(oldUsers) // false 🤔
Another very different approach that would solve more cases (included the above) is to maintain a quick and reasonably wide hash:
struct Array<T> {
var qhash: (Int, Int, Int, Int)
}
Compared to normal hash (that requires O(N) to calculate), this quick hash would be maintained and updated on every change operation, e.g. appending an element to the end of array would get the "quick hash" of the element, combine it with the index of the newly inserted element (for append operation that would be count - 1
), and merge it with existing quick hash of the array. Inserting an element in the middle of array would have to recalculate/recombine half of array qhash values, but in principle that would not increase the time complexity of the insertion operation (it would remain O(N) and would be dominated by the cost of memory I/O). Similarly for Strings, dictionaries, Data, etc. The "most likely equal" check would become:
// O(1) complexity
func mostLikelyEqual(_ other: Self) -> Bool {
// quick negative check (comparing counts)
if count != other.count {
return false
}
var this = self
var other = other
// quick positive check (comparing references)
if memcmp(&this, &other, MemoryLayout<Self>.size) == 0 {
return true
}
// quick positive check (comparing qhashes)
// can result in a false positive due to a hash collision
return qhash == other.qhash
}
This would be a probabilistic check with a possibility of false positives due to hash collisions but with no false negatives (false
result would mean the two values are definitely not equal). To reduce the probability of false positives (hash collisions) down to infinitesimal value, a wide hash value could be used, e.g. 256 bits.
Edit: mostLikelyEqual
check could also include the lhs.count == rhs.count
check, to further reduce the possibility of false positives to happen only on hash collisions of collections of the same sizes (updated above).
*Edit2: for small POD types (e.g. Int, or a struct / tuple of a few small POD values, etc) there's no need to maintain a stored qhash
variable as it could be calculated on the fly in a semi constant time:
protocol SmallPOD {}
extension SmallPOD {
func combine(hash: inout (Int, Int, Int, Int), byte: UInt8, index: Int) {
// ...
}
var qhash: (Int, Int, Int, Int) {
let size = MemoryLayout<Self>.size
precondition(size < 100) // let's keep this 100*O(1), which is still O(1)
var hash = (0, 0, 0, 0)
var this = self
withUnsafeBytes(of: &this) { buffer in
for i in 0 ..< size {
let byte = buffer.baseAddress!.load(fromByteOffset: i, as: UInt8.self)
combine(hash: &hash, byte: byte, index: i)
}
}
return hash
}
func mostLikelyEqual(_ other: Self) -> Bool {
let size = MemoryLayout<Self>.size
precondition(size < 100) // let's keep this 100*O(1), which is still O(1)
var this = self
var other = other
return memcmp(&this, &other, size) == 0
}
}
Floating point values would expose the infamous anomaly with Float.nan
being mostLikelyEqual
to another Float.nan
which would disagree with the normal EQ behaviour.