OK, one last shot at comparing mutating versus non-mutating functions to merge array and array slice arguments. I modified the mutating code again. The two types of functions should be on an equal footing. New benchmarks below the code. The impact of pointers to avoid using .StartIndex in the non-mutating code can be seen. Also it can be seen that the mutating code is still about 2x slower than the non-mutating code (apples to apples with no pointers). I have seen a website that claimed hat Swift functions that involved "self" would always be slower than functions that do not but I can't find it presently.
So this leads to a couple of questions:
- Is there a way to avoid the penalty of using .StartIndex as required by array slice. Another poster suggested use of index(offset by:). I could not see how to use it.
The use of pointers that avoids .StartIndex offsetting makes a big difference in performance.
- Do functions that use "self" really have this big of a performance impact?
// Returns a tuple containing the high and low components to UInt addition
@inline(__always)
func addingFullWidth(_ x: UInt,_ y: UInt) -> (high: UInt, low: UInt) {
let sum = x.addingReportingOverflow(y)
return (sum.overflow ? 1 : 0, sum.partialValue)
}
// Returns the high and low parts of two seqeuential potentially overflowing additions
@inline(__always)
func addingFullWidth(_ x: UInt, _ y: UInt, _ z: UInt) -> (high: UInt, low: UInt) {
let xy = x.addingReportingOverflow(y)
let xyz = xy.partialValue.addingReportingOverflow(z)
let high:UInt = (xy.overflow ? 1 : 0) + (xyz.overflow ? 1 : 0)
return (high, xyz.partialValue)
}
extension RandomAccessCollection
where Self: MutableCollection,
Self: RangeReplaceableCollection,
Element == UInt,
Index == Int {
// in-place add rhs to self
mutating func Add<R: RandomAccessCollection> (_ rhs: R, _ uselastcarry:Bool = true)
where R.Element == UInt, R.Index == Int {
if rhs.count == 0 || (rhs.count == 1 && rhs.startIndex == 0) {
return
}
let (sc, rc) = (self.count, rhs.count)
let (si, ri) = (self.startIndex, rhs.startIndex)
let minc = Swift.min(sc, rc)
var rIndex = 0
var carry:UInt = 0
if sc >= rc {
while rIndex < minc {
(carry, self[rIndex + si]) = addingFullWidth(self[rIndex + si], rhs[rIndex + ri], carry)
rIndex += 1
}
if sc == rc {
if carry > 0 && uselastcarry { self.append(1)}
return
}
while carry > 0 && rIndex < rc {
(carry, self[rIndex + si]) = addingFullWidth(self[rIndex + si], carry)
rIndex += 1
}
if carry > 0 && uselastcarry { self.append(1)}
return
} else {
while rIndex < minc {
(carry, self[rIndex + si]) = addingFullWidth(self[rIndex + si], rhs[rIndex + ri], carry)
rIndex += 1
}
self = self + rhs[minc...]
while carry > 0 && rIndex < rc {
(carry, self[rIndex + si]) = addingFullWidth(self[rIndex + si], carry)
rIndex += 1
}
if carry > 0 && uselastcarry { self.append(1)}
return
}
}
}
// add two [UInt] arrays with pointers
func array64AddP<C1: RandomAccessCollection, C2: RandomAccessCollection>(_ x: C1,_ xPtr: UnsafeMutablePointer<UInt>,_ y: C2,_ yPtr: UnsafeMutablePointer<UInt>,_ uselastcarry:Bool = true) -> [UInt]
where C1.Element == UInt, C1.Index == Int, C2.Element == UInt, C2.Index == Int {
if x.count == 0 || (x.count == 1 && x.startIndex == 0) { return Array(y)}
if y.count == 0 || (y.count == 1 && y.startIndex == 0) { return Array(x)}
let (xc, yc) = (x.count, y.count)
let minc = Swift.min(xc, yc)
var rIndex = 0
var result:[UInt]
var carry:UInt = 0
if xc >= yc {
result = Array(x)
while rIndex < minc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], yPtr[rIndex], carry)
rIndex += 1
}
if xc == yc {
if carry > 0 && uselastcarry { result.append(1)}
return result
}
while carry > 0 && rIndex < xc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], carry)
rIndex += 1
}
if carry > 0 && uselastcarry { result.append(1)}
} else {
result = Array(y)
while rIndex < minc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], xPtr[rIndex], carry)
rIndex += 1
}
while carry > 0 && rIndex < yc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], carry)
rIndex += 1
}
if carry > 0 && uselastcarry { result.append(1)}
}
return result
}
// add two [UInt] arrays without pointers
func array64Add<C1: RandomAccessCollection, C2: RandomAccessCollection>(_ x: C1,_ y: C2,_ uselastcarry:Bool = true) -> [UInt]
where C1.Element == UInt, C1.Index == Int, C2.Element == UInt, C2.Index == Int {
if x.count == 0 || (x.count == 1 && x.startIndex == 0) { return Array(y)}
if y.count == 0 || (y.count == 1 && y.startIndex == 0) { return Array(x)}
let (xc, yc) = (x.count, y.count)
let minc = Swift.min(xc, yc)
let (xi, yi) = (x.startIndex, y.startIndex)
var rIndex = 0
var result:[UInt]
var carry:UInt = 0
if xc >= yc {
result = Array(x)
while rIndex < minc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], y[rIndex + yi], carry)
rIndex += 1
}
if xc == yc {
if carry > 0 && uselastcarry { result.append(1)}
return result
}
while carry > 0 && rIndex < xc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], carry)
rIndex += 1
}
if carry > 0 && uselastcarry { result.append(1)}
} else {
result = Array(y)
while rIndex < minc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], x[rIndex + xi], carry)
rIndex += 1
}
while carry > 0 && rIndex < yc {
(carry, result[rIndex]) = addingFullWidth(result[rIndex], carry)
rIndex += 1
}
if carry > 0 && uselastcarry { result.append(1)}
}
return result
}
public func benchmarkTime(_ iter: Int, f:()->()) -> Double {
f() // warmup
let startTime = CFAbsoluteTimeGetCurrent()
for _ in 0..<iter {f()}
let elapsedTime = CFAbsoluteTimeGetCurrent() - startTime
return elapsedTime
}
public func benchmark(title:String,_ iter: Int, f:()->()) {
let averageTime = benchmarkTime(iter, f:f) / Double(iter)
if averageTime >= 1.0 {
print("\(title): \(String(averageTime)) seconds")
} else if averageTime >= 0.001 {
print("\(title): \(String(averageTime * 1000.0)) milliseconds")
} else {
print("\(title): \(String(averageTime * 1000000.0)) microseconds")
}
}
for wordLength in [4, 10, 1000, 100000, 1000000] {
print("wordLength =", wordLength)
var u1:[UInt] = Array(repeating: UInt.max, count: wordLength)
var u2 = u1[...]
benchmark(title: "non-mutating add using pointers", 100) {
let u3 = array64AddP(u1, &u1[u1.startIndex], u2, &u2[u2.startIndex])
}
benchmark(title: "non-mutating add reversed using pointers", 100) {
let u3 = array64AddP(u2, &u2[u2.startIndex], u1, &u1[u1.startIndex])
}
benchmark(title: "non-mutating add", 100) {
let u3 = array64Add(u1, u2)
}
benchmark(title: "non-mutating add reversed", 100) {
let u3 = array64Add(u2, u1)
}
u1 = Array(repeating: UInt.max, count: wordLength)
u2 = u1[...]
benchmark(title: "mutating add", 100) {
u1.Add(u2)
}
u1 = Array(repeating: UInt.max, count: wordLength)
u2 = u1[...]
benchmark(title: "mutating add reversed", 100) {
u2.Add(u1)
}
print(" ")
}
wordLength = 4
non-mutating add using pointers: 0.8702278137207031 microseconds
non-mutating add reversed using pointers: 1.150369644165039 microseconds
non-mutating add: 1.6701221466064453 microseconds
non-mutating add reversed: 1.64031982421875 microseconds
mutating add: 2.0599365234375 microseconds
mutating add reversed: 1.9800662994384766 microseconds
wordLength = 10
non-mutating add using pointers: 1.1491775512695312 microseconds
non-mutating add reversed using pointers: 1.3506412506103516 microseconds
non-mutating add: 2.930164337158203 microseconds
non-mutating add reversed: 2.830028533935547 microseconds
mutating add: 4.35948371887207 microseconds
mutating add reversed: 4.31060791015625 microseconds
wordLength = 1000
non-mutating add using pointers: 5.700588226318359 microseconds
non-mutating add reversed using pointers: 5.890130996704102 microseconds
non-mutating add: 192.7196979522705 microseconds
non-mutating add reversed: 231.98962211608887 microseconds
mutating add: 448.7001895904541 microseconds
mutating add reversed: 436.0198974609375 microseconds
wordLength = 100000
non-mutating add using pointers: 602.8008460998535 microseconds
non-mutating add reversed using pointers: 559.7198009490967 microseconds
non-mutating add: 19.931190013885498 milliseconds
non-mutating add reversed: 21.635509729385376 milliseconds
mutating add: 43.83170962333679 milliseconds
mutating add reversed: 42.75159001350403 milliseconds
wordLength = 1000000
non-mutating add using pointers: 7.689861059188843 milliseconds
non-mutating add reversed using pointers: 7.640690803527832 milliseconds
non-mutating add: 209.53832030296326 milliseconds
non-mutating add reversed: 217.8787100315094 milliseconds
mutating add: 443.29445004463196 milliseconds
mutating add reversed: 460.3936803340912 milliseconds