Hello,
I've noticed a significant performance disparity between a function that produces a 4x4 rotation matrix in Swift, compared to its Obj-C equivalent.
To create 460800 matrices (which simulates a real use case where every frame needs to create 256 matrices and run for 30 seconds), the Swift version spends 75 ms of total CPU time. Its Obj-C equivalent spends 8 ms of total CPU time. (Measured in release mode with time profiler)
Of the 75 ms in Swift, 49 ms is spent in simd_float4x4.init(rows:), and 21 ms is spent in specialized ViewController.rotationMatrix(radians:axis:).
Is there anything in this implementation that seems like an obvious issue? If not, is this expected to improve?
Thanks for taking the time :)
Swift:
override func viewDidLoad() {
super.viewDidLoad()
for _ in 0..<460800 {
rotationMatrix(radians: 1, axis: .init(0,1,0))
}
}
func rotationMatrix(radians: Float, axis: SIMD3<Float>) -> simd_float4x4 {
let normalizedAxis = simd_normalize(axis)
let ct = cosf(radians)
let st = sinf(radians)
let ci = 1 - ct
let x = normalizedAxis.x
let y = normalizedAxis.y
let z = normalizedAxis.z
let row1 = SIMD4<Float>(ct + x * x * ci, x * y * ci - z * st, x * z * ci + y * st, 0)
let row2 = SIMD4<Float>(y * x * ci + z * st, ct + y * y * ci, y * z * ci - x * st, 0)
let row3 = SIMD4<Float>(z * x * ci - y * st, z * y * ci + x * st, ct + z * z * ci, 0)
let row4 = SIMD4<Float>(0, 0, 0, 1)
return .init(rows: [row1, row2, row3, row4])
}
Obj-C:
- (void)viewDidLoad {
[super viewDidLoad];
// Do any additional setup after loading the view.
for(int i = 0; i < 460800; i++) {
matrix4x4_rotation(1, vector_make(0, 1, 0));
}
}
matrix_float4x4 matrix4x4_rotation(float radians, vector_float3 axis) {
axis = vector_normalize(axis);
float ct = cosf(radians);
float st = sinf(radians);
float ci = 1 - ct;
float x = axis.x, y = axis.y, z = axis.z;
return matrix_make_rows(
ct + x * x * ci, x * y * ci - z * st, x * z * ci + y * st, 0,
y * x * ci + z * st, ct + y * y * ci, y * z * ci - x * st, 0,
z * x * ci - y * st, z * y * ci + x * st, ct + z * z * ci, 0,
0, 0, 0, 1);
}
matrix_float4x4 matrix_make_rows(
float m00, float m10, float m20, float m30,
float m01, float m11, float m21, float m31,
float m02, float m12, float m22, float m32,
float m03, float m13, float m23, float m33) {
return (matrix_float4x4){ {
{ m00, m01, m02, m03 }, // each line here provides column data
{ m10, m11, m12, m13 },
{ m20, m21, m22, m23 },
{ m30, m31, m32, m33 } } };
}
vector_float3 vector_make(float x, float y, float z) {
return (vector_float3){ x, y, z };
}