I tried some things with the code (since I've ran into similar issues that I never managed to isolate), but the only thing I could find out was that the regression disappears if you annotate the get and set of BitVector
s subscript with @_transparent
. I don't know why this is, but I guess it does demonstrate a bug (or at least an opportunity for improvement) in the optimizer. But it would be nice if someone more skilled could find a smaller reproducing example, cc @Slava_Pestov, @Joe_Groff, @Andrew_Trick.
I'm attaching the code (it's the files of your MWE-branch, joined into one) in the state where I left it after finding this workaround (and after doing some other things that I tested but which made no difference, eg removing all public
, sticking the test code in a func, fixing the warnings that then appeared about changing some vars to lets etc.).
import Dispatch
import QuartzCore
struct BitVector {
private var bits: Array<Int>
private let blockSize = Int.bitWidth
init(repeating: Bool, count: Int) {
let fillInt = repeating ? ~0 : 0
let nBlocks: Int = (count / blockSize) + 1
bits = [Int](repeating: fillInt, count: nBlocks)
}
private func getBlockAndOffset(of bitIndex: Int) -> (Int, Int) {
return (bitIndex / blockSize, bitIndex % blockSize)
}
var startIndex: Int { return 0 }
var endIndex: Int { return bits.count * blockSize }
// NOTE: Adding @_transparent to the get and set of this subscript
// removes the slowdown that is otherwise magically introduced by
// conforming BitVector to RandomAccessCollection (see further down).
// So if you comment out the two @_transparent and keep the conformance
// to RandomAccessCollection, the slowdown will be there again.
subscript(_ bitIndex: Int) -> Bool {
@_transparent
get {
let (block, offset) = getBlockAndOffset(of: bitIndex)
let mask = 1 << offset
return mask & bits[block] != 0
}
@_transparent
set {
let (block, offset) = getBlockAndOffset(of: bitIndex)
let mask = 1 << offset
if newValue {
bits[block] |= mask
} else {
bits[block] &= ~mask
}
}
}
}
struct Edge<T: BinaryInteger> {
let src: T
let dst: T
init(_ s: T, _ d: T) {
src = s
dst = d
}
var ordered: Edge {
if src > dst {
return Edge(dst, src)
} else { return self }
}
var reverse: Edge {
return Edge(dst, src)
}
}
struct Graph<T: BinaryInteger> {
let rowidx: Array<T>
let colptr: Array<Array<T>.Index>
static var eltype: Any.Type { return T.self }
var nv: T { return T((colptr.count - 1)) }
var ne: Int { return rowidx.count / 2 }
var vertices: StrideTo<T> {
return stride(from: T(0), to: nv, by: +1)
}
init(fromBinaryFile fileName: String) {
let file = URL(fileURLWithPath: fileName)
let fileHandle = try! FileHandle(forReadingFrom: file)
let magic = fileHandle.readData(ofLength: 4)
guard magic.elementsEqual("GRPH".utf8) else {
fatalError("\(file) was not a graph file")
}
let colSize = fileHandle.readData(ofLength: MemoryLayout<UInt32>.size).withUnsafeBytes { (ptr: UnsafePointer<UInt32>) -> Int in
return Int(ptr.pointee.bigEndian)
}
colptr = fileHandle.readData(ofLength: MemoryLayout<UInt32>.size * colSize).withUnsafeBytes({ (ptr: UnsafePointer<UInt32>) -> [Int] in
let bufferPointer = UnsafeBufferPointer(start: ptr, count: colSize)
return [Int](bufferPointer.lazy.map { Int($0.bigEndian) })
})
let rowSize = fileHandle.readData(ofLength: MemoryLayout<UInt32>.size).withUnsafeBytes { (ptr: UnsafePointer<UInt32>) -> Int in
return Int(ptr.pointee.bigEndian)
}
rowidx = fileHandle.readData(ofLength: MemoryLayout<UInt32>.size * rowSize).withUnsafeBytes({ (ptr: UnsafePointer<UInt32>) -> [T] in
let bufferPointer = UnsafeBufferPointer(start: ptr, count: rowSize)
return [T](bufferPointer.lazy.map { T($0.bigEndian) })
})
}
private func vecRange(_ s: Array<T>.Index) -> Range<Array<T>.Index> {
let rStart = colptr[s]
let rEnd = colptr[s + 1]
return rStart ..< rEnd
}
func neighbors(of vertex: T) -> ArraySlice<T> {
let range = vecRange(Array<T>.Index(vertex))
return rowidx[range]
}
func BFS(from sourceVertex: Int) -> [T] {
let numVertices = Int(nv)
let maxT = ~T()
var visited = BitVector(repeating: false, count: numVertices)
var vertLevel = Array<T>(repeating: maxT, count: numVertices)
var nLevel: T = 1
var curLevel = [T]()
curLevel.reserveCapacity(numVertices)
var nextLevel = [T]()
nextLevel.reserveCapacity(numVertices)
visited[sourceVertex] = true
vertLevel[sourceVertex] = 0
curLevel.append(T(sourceVertex))
while !curLevel.isEmpty {
for vertex in curLevel {
for neighbor in neighbors(of: vertex) {
if !visited[Int(neighbor)] {
visited[Int(neighbor)] = true
nextLevel.append(neighbor)
vertLevel[Int(neighbor)] = nLevel
}
}
}
nLevel += 1
curLevel.removeAll(keepingCapacity: true)
(curLevel, nextLevel) = (nextLevel, curLevel)
curLevel.sort()
}
return vertLevel
}
}
extension Graph: CustomStringConvertible {
var description: String {
return "{\(nv), \(ne)} Graph"
}
}
// NOTE: This conformance is not necessary for the test to succeed, but
// without the workaround of adding @_transparent to the get and set of
// BitVector's subscript (see above), this conformance will somehow cause
// a performance regression in the below test.
extension BitVector : RandomAccessCollection {}
func test() {
let start = DispatchTime.now()
let h = Graph<UInt32>(fromBinaryFile: "/Users/jens/Downloads/indptrvecs-4m-30m.0based.bin")
let end = DispatchTime.now()
print("graph read took \(Double(end.uptimeNanoseconds - start.uptimeNanoseconds) / 1_000_000.0) ms")
print(h)
var times = [Double]()
for i in 1 ... 40 {
let start = CACurrentMediaTime()
let bfs1 = h.BFS(from: 0)
let end = CACurrentMediaTime()
let timediff = (end - start) * 1000
print("Run \(i): BFS from vertex 0 to \(timediff) ms; sum = \(bfs1.reduce(0, +))")
times.append(timediff)
}
let timeAvg = times.reduce(0, +) / Double(times.count)
let sumOfSquaredAvgDiff = times.map { pow($0 - timeAvg, 2.0) }.reduce(0, +)
let timeStd = sqrt(sumOfSquaredAvgDiff / Double(times.count - 1))
print("Times: min: \(times.min()!), max: \(times.max()!), avg: \(timeAvg), std: \(timeStd)")
}
test()
For anyone wanting to look into this: In order to run the above demo program, you also need the graph data file linked to here.