I have some initial benchmarking for mutating vs non-mutating versions. Code is below:
// Returns a tuple containing the high and low components to UInt addition
@inline(__always)
func addingFullWidth(_ x: UInt,_ y: UInt) -> (high: UInt, low: UInt) {
let sum = x.addingReportingOverflow(y)
return (sum.overflow ? 1 : 0, sum.partialValue)
}
extension RandomAccessCollection
where Self: MutableCollection,
Self: RangeReplaceableCollection,
Element == UInt,
Index == Int {
// in-place add rhs to self
mutating func Add<R: RandomAccessCollection> (_ rhs: R,_ uselastcarry:Bool = true)
where R.Element == UInt, R.Index == Int {
if self.count == 0 || (self.count == 1 && self.startIndex == 0) {
self = Self(Array(rhs))
return
}
if rhs.count == 0 || (rhs.count == 1 && rhs.startIndex == 0) {
self = Self(Array(self))
return
}
let (sc, rc) = (self.count, rhs.count)
let (si, ri) = (self.startIndex, rhs.startIndex)
let minc = Swift.min(sc, rc)
var rIndex = 0
var carry:UInt = 0
if sc >= rc {
while rIndex < minc {
(carry, self[rIndex + si]) = addingFullWidth(self[rIndex + si], rhs[rIndex + ri], carry)
rIndex += 1
}
if sc == rc {
if carry > 0 && uselastcarry { self.append(1)}
return
}
while carry > 0 && rIndex < rc {
(carry, self[rIndex + si]) = addingFullWidth(self[rIndex + si], carry)
rIndex += 1
}
if carry > 0 && uselastcarry { self.append(1)}
self = Self(Array(self))
return
} else {
var result = Array(rhs)
while rIndex < minc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], self[rIndex + si], carry)
rIndex += 1
}
while carry > 0 && rIndex < rc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], carry)
rIndex += 1
}
if carry > 0 && uselastcarry { result.append(1)}
self = Self(result)
return
}
}
}
// add two [UInt] arrays
func array64Add<C1: RandomAccessCollection, C2: RandomAccessCollection>(_ x: C1,_ xPtr: UnsafeMutablePointer<UInt>,_ y: C2,_ yPtr: UnsafeMutablePointer<UInt>,_ uselastcarry:Bool = true) -> [UInt]
where C1.Element == UInt, C1.Index == Int, C2.Element == UInt, C2.Index == Int {
if x.count == 0 || (x.count == 1 && x.startIndex == 0) { return Array(y)}
if y.count == 0 || (y.count == 1 && y.startIndex == 0) { return Array(x)}
let (xc, yc) = (x.count, y.count)
let minc = Swift.min(xc, yc)
var rIndex = 0
var result:[UInt]
var carry:UInt = 0
if xc >= yc {
result = Array(x)
while rIndex < minc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], yPtr[rIndex], carry)
rIndex += 1
}
if xc == yc {
if carry > 0 && uselastcarry { result.append(1)}
return result
}
while carry > 0 && rIndex < xc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], carry)
rIndex += 1
}
if carry > 0 && uselastcarry { result.append(1)}
} else {
result = Array(y)
while rIndex < minc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], xPtr[rIndex], carry)
rIndex += 1
}
while carry > 0 && rIndex < yc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], carry)
rIndex += 1
}
if carry > 0 && uselastcarry { result.append(1)}
}
return result
}
public func benchmarkTime(_ iter: Int, f:()->()) -> Double {
f() // warmup
let startTime = CFAbsoluteTimeGetCurrent()
for _ in 0..<iter {f()}
let elapsedTime = CFAbsoluteTimeGetCurrent() - startTime
return elapsedTime
}
public func benchmark(title:String,_ iter: Int, f:()->()) {
let averageTime = benchmarkTime(iter, f:f) / Double(iter)
if averageTime >= 1.0 {
print("\(title): \(String(averageTime)) seconds")
} else if averageTime >= 0.001 {
print("\(title): \(String(averageTime * 1000.0)) milliseconds")
} else {
print("\(title): \(String(averageTime * 1000000.0)) microseconds")
}
}
for wordLength in [10, 1000, 100000, 1000000] {
print("wordLength =", wordLength)
var u1:[UInt] = Array(repeating: UInt.max, count: wordLength)
var u2 = u1[...]
benchmark(title: "nun-mutating add", 100) {
let u3 = array64Add(u1, &u1[u1.startIndex], u2, &u2[u2.startIndex])
}
benchmark(title: "nun-mutating add reversed", 100) {
let u3 = array64Add(u2, &u2[u2.startIndex], u1, &u1[u1.startIndex])
}
u1 = Array(repeating: UInt.max, count: wordLength)
u2 = u1[...]
benchmark(title: "mutating add", 100) {
u1.Add(u2)
}
u1 = Array(repeating: UInt.max, count: wordLength)
u2 = u1[...]
benchmark(title: "mutating add reversed", 100) {
u2.Add(u1)
}
print(" ")
}
Benchmarks performed on 2013 MacBook pro with code in release mode with safety checks disabled and optimized for speed. Results below:
wordLength = 10
nun-mutating add: 0.9202957153320312 microseconds
nun-mutating add reversed: 1.0395050048828125 microseconds
mutating add: 4.9495697021484375 microseconds
mutating add reversed: 4.409551620483398 microseconds
wordLength = 1000
nun-mutating add: 6.110668182373047 microseconds
nun-mutating add reversed: 5.890130996704102 microseconds
mutating add: 444.4706439971924 microseconds
mutating add reversed: 441.7300224304199 microseconds
wordLength = 100000
nun-mutating add: 557.7099323272705 microseconds
nun-mutating add reversed: 562.0193481445312 microseconds
mutating add: 40.66767930984497 milliseconds
mutating add reversed: 39.13396954536438 milliseconds
wordLength = 1000000
nun-mutating add: 7.4018096923828125 milliseconds
nun-mutating add reversed: 7.455489635467529 milliseconds
mutating add: 411.7432498931885 milliseconds
mutating add reversed: 409.24807071685
Quite a dramatic difference. I thought it would be much closer.