How Much Can the Compiler Optimise This Function?

How much can the compiler optimise the following function, which computes Int-division by subtraction?

// Compute division by subtraction
func div2 (_ u: Int, _ v: Int) -> Int {
    assert (v > 0)
    var q = 0
    var n = u
    while v <= n {
        n -= v
        q += 1
    }
    return q
}

I am asking because the above function seems to perform better, on average, than the following function, which performs Int-division by using the builtin / operator.

// Compute division by `/` operator
func div1 (_ u: Int, _ v: Int) -> Int {
    assert (v > 0)
    let q = u/v
    return q
}
1000000000 / D [i] for D = [1, 10, 100, 1000, 1000000, 1000000000]

M = 1000000 times {div1, div2}, {div2, div1}...
--> div1:  6.7671268916e-08 seconds
--> div2:  6.2785764e-08 seconds
Details
@main
enum Division {
    static func main ()  {
        // Repeat
        let M = 1_000_000
        
        // N / D [i]
        let N = 1_000_000_000
        let D = [1, 10, 100, 1_000, 1_000_000, 1_000_000_000]
        print ("\(N) / D [i]", "for D = \(D)")
        print ()
        
        var dv1: [ContinuousClock.Duration] = []
        var dv2: [ContinuousClock.Duration] = []

        let f_div1 = {(u: Int, v: Int) in
            let d = measure {
                let q = div1 (u, v)
                assert (q == u / v)
            }
            dv1.append(d)
        }
        
        let f_div2 = {(u: Int, v: Int) in
            let d = measure {
                let q = div2 (u, v)
                assert (q == u / v)
            }
            dv2.append(d)
        }
        
        print ("M =", M, "times {div1, div2}, {div2, div1}...")
        for _ in  0..<M {
            for d in D {
                f_div1 (N, d)
                f_div2 (N, d)
            }
            for d in D {
                f_div2 (N, d)
                f_div1 (N, d)
            }
        }
        
        let t1 = (dv1.reduce (.zero) { partialResult, d in
            partialResult + d
        }) / dv1.count
        
        let t2 = (dv2.reduce (.zero) { partialResult, d in
            partialResult + d
        }) / dv2.count
        
        print ("--> div1: ", t1)
        print ("--> div2: ", t2)
    }
}

func measure (f: () -> Void) -> ContinuousClock.Duration {
    let c = ContinuousClock ()
    let d = c.measure {
        f ()
    }
    return d
}

// Compute division by `/` operator
func div1 (_ u: Int, _ v: Int) -> Int {
    assert (v > 0)
    let q = u/v
    return q
}

// Compute division by subtraction
func div2 (_ u: Int, _ v: Int) -> Int {
    assert (v > 0)
    var q = 0
    var n = u
    while v <= n {
        n -= v
        q += 1
    }
    return q
}

The subtraction loop stays a loop, so it should be much slower (Godbolt: Compiler Explorer). I suspect you got those results because optimized builds don't invoke the @autoclosure in Swift.assert(_:), meaning the payload is unused and therefore doesn't have to be computed. Along the same lines, I wager that you'd get similar results if you replaced the payload with nothing and that you'd get results closer to reality if you replaced Swift.assert(_:) with Swift.precondition(_:).

1 Like
  • You’re comparing times at the 60ns range, which is within the realm of natural fluctuation on your computer. In particular, with an operation that short, you could easily be counting the overhead of ContinuousClock.measure as much as the actual operation.

  • Your implementation requires u to be positive as well, so you’re not quite computing the same function.

4 Likes

To be clear, the program would not finish in a day without elision of div2.

Same code but with `precondition` instead of `assert` to avoid elision.
@main
enum Division {
    static func main ()  {
        // Repeat
        let M = 1_000_000
        
        // N / D [i]
        let N = 1_000_000_000
        let D = [1, 10, 100, 1_000, 1_000_000, 1_000_000_000]
        print ("\(N) / D [i]", "for D = \(D)")
        print ()
        
        var dv1: [ContinuousClock.Duration] = []
        var dv2: [ContinuousClock.Duration] = []

        let f_div1 = {(u: Int, v: Int) in
            let d = measure {
                let q = div1 (u, v)
                precondition (q == u / v) // <-------- CHANGED
            }
            dv1.append(d)
        }
        
        let f_div2 = {(u: Int, v: Int) in
            let d = measure {
                let q = div2 (u, v)
                precondition (q == u / v) // <-------- CHANGED
            }
            dv2.append(d)
        }
        
        print ("M =", M, "times {div1, div2}, {div2, div1}...")
        for _ in  0..<M {
            for d in D {
                f_div1 (N, d)
                f_div2 (N, d)
            }
            for d in D {
                f_div2 (N, d)
                f_div1 (N, d)
            }
        }
        
        let t1 = (dv1.reduce (.zero) { partialResult, d in
            partialResult + d
        }) / dv1.count
        
        let t2 = (dv2.reduce (.zero) { partialResult, d in
            partialResult + d
        }) / dv2.count
        
        print ("--> div1: ", t1)
        print ("--> div2: ", t2)
    }
}

func measure (f: () -> Void) -> ContinuousClock.Duration {
    let c = ContinuousClock ()
    let d = c.measure {
        f ()
    }
    return d
}

// Compute division by `/` operator
func div1 (_ u: Int, _ v: Int) -> Int {
    assert (v > 0)
    let q = u/v
    return q
}

// Compute division by subtraction
func div2 (_ u: Int, _ v: Int) -> Int {
    assert (v > 0)
    var q = 0
    var n = u
    while v <= n {
        n -= v
        q += 1
    }
    return q
}
1 Like