I'm building a game using Swift. I've discovered that by separating the inner part of a pair of nested loops iterating over an array, the performance increases by nearly a whopping 10x. Why is this? Is it easier for the optimizer when it's separated in this way? Am I not understanding the reference counting overhead correctly here?
I'm on Xcode 12.5, but have also tested on Xcode 12.4 as well. Targeting macOS (Intel) release build.
I've recreated some sample code shown below. Test1 and Test2 classes show the 2 implementations, with Test2 performing nearly 10x faster.
Test1 outputs: 0.006973981857299805
Test2 outputs: 0.0007460117340087891
import Foundation
final class Body {
let value1: Int
let value2: Int
let value3: Int
let value4: Int
init(value1: Int, value2: Int, value3: Int, value4: Int) {
self.value1 = value1
self.value2 = value2
self.value3 = value3
self.value4 = value4
}
static func createBodies() -> [Body] {
var bodies: [Body] = []
for i in 1 ... 500 {
// Fill with some arbitrary values.
bodies.append(Body(value1: Int(i), value2: Int(i*10), value3: Int(i*100), value4: Int(i*1000)))
}
return bodies
}
}
final class Test1 {
var bodies: [Body]
init() {
self.bodies = Body.createBodies()
}
func run() -> Int {
var total: Int = 0
for body1 in bodies {
var innerTotal = 0
for body2 in bodies {
// some random computation
innerTotal += body1.value1*body2.value1+body1.value2*body2.value2+body1.value3*body2.value3+body1.value4*body2.value4
}
total += innerTotal
}
return total
}
}
final class Test2 {
var bodies: [Body]
init() {
self.bodies = Body.createBodies()
}
func helper(body1:Body, bodies: [Body]) -> Int {
var innerTotal: Int = 0
for body2 in bodies {
// some random computation
innerTotal += body1.value1*body2.value1+body1.value2*body2.value2+body1.value3*body2.value3+body1.value4*body2.value4
}
return innerTotal
}
func run() -> Int {
var total: Int = 0
for body1 in bodies {
total += helper(body1: body1, bodies: bodies)
}
return total
}
}
final class Main {
static func main() {
let test = Test1() // Change this to Test2 for roughly 10x more performance.
let startTime = CFAbsoluteTimeGetCurrent()
let total = test.run()
let elapsedTime = CFAbsoluteTimeGetCurrent() - startTime
print("elapsedTime: \(elapsedTime)")
print("total: \(total)")
}
}
Main.main()