Assigning a dictionary without re-hashing

Out of curiosity, I did some microbenchmarking of my own.

Click to see benchmark sources
extension Optional {
  mutating func setIfNil(_ newValue: () -> Wrapped) -> Wrapped {
    if let v = self { return v }
    let v = newValue()
    self = v
    return v
  }
}

inline(__always)
func modify<T>(_ value: inout T) -> T { value }

extension Dictionary {
  mutating func memoizedValue0(
    forKey key: Key,
    generator: (Key) -> Value
  ) -> Value {
    if let value = self[key] { return value }
    let value = generator(key)
    self[key] = value
    return value
  }

  mutating func memoizedValue1(
    forKey key: Key,
    generator: (Key) -> Value
  ) -> Value {
    self[key].setIfNil { generator(key) }
  }

  mutating func memoizedValue2(
    forKey key: Key,
    generator: (Key) -> Value
  ) -> Value {
    modify(&self[key, default: generator(key)])
  }
}

var benchmark = Benchmark(title: "Collection Benchmarks")
defer { benchmark.main() }

benchmark.add(
  title: "Dictionary<Int, Int> memoization - 2-phase",
  input: [Int].self
) { input in
  return { timer in
    var d: [Int: Int] = [:]
    timer.measure {
      for i in input {
        let v = d.memoizedValue0(forKey: i, generator: { 2 * $0 })
        precondition(v == 2 * i)
      }
    }
    precondition(d.count == input.count)
    blackHole(d)
  }
}

benchmark.add(
  title: "Dictionary<Int, Int> memoization - regular subscript",
  input: [Int].self
) { input in
  return { timer in
    var d: [Int: Int] = [:]
    timer.measure {
      for i in input {
        let v = d.memoizedValue1(forKey: i, generator: { 2 * $0 })
        precondition(v == 2 * i)
      }
    }
    precondition(d.count == input.count)
    blackHole(d)
  }
}

benchmark.add(
  title: "Dictionary<Int, Int> memoization - defaulted subscript",
  input: [Int].self
) { input in
  return { timer in
    var d: [Int: Int] = [:]
    timer.measure {
      for i in input {
        let v = d.memoizedValue2(forKey: i, generator: { 2 * $0 })
        precondition(v == 2 * i)
      }
    }
    precondition(d.count == input.count)
    blackHole(d)
  }
}

@lukasa's approach using the regular subscript's _modify proved measurably faster than the two-phase variant. Surprisingly, using the defaulted subscript turned out to be 2x slower than the regular subscript on this microbenchmark. The defaulted subscript is significantly simpler than the regular one, so I'd have expected the opposite! This is a performance anomaly we'll need to look into.

As a Swift user, I wouldn't draw too many conclusions from these results -- in these microbenchmarks, the cached function is a trivial 2 * $0 closure, so these are dominated by hash table operations. In actual use cases, the generator function will likely be much slower, and there will be no measurable difference between the three variants.

7 Likes