I'm learning Swift using Advent of Code 2020. From day 23, an ideal data structure is a linked list in an array. But the following code runs really slow with Swift 5.3 in Xcode 12.3 (12C33).
class LLInArray {
var a = Array<Int>()
var ex1: Int = 0
var ex2: Int = 0
var ex3: Int = 0
var head = 0
var tail = 0
init(initial: [Int], size: Int) {
a = Array(repeating: -1, count: size)
var first = true
for i in initial {
if first {
head = i - 1
tail = i - 1
first = false
} else {
a[tail] = i - 1
tail = i - 1
}
}
a[tail] = initial.count
for i in initial.count..<size {
a[i] = i + 1
}
tail = size - 1
a[tail] = head
}
func extract() {
var p = head
var prev = tail
p = a[p]
prev = a[prev]
ex1 = p + 1
p = a[p]
prev = a[prev]
ex2 = p + 1
p = a[p]
prev = a[prev]
ex3 = p + 1
a[head] = a[p]
a[prev] = a[head]
}
func insert(after: Int) {
a[ex3 - 1] = a[after - 1]
a[ex2 - 1] = ex3 - 1
a[ex1 - 1] = ex2 - 1
a[after - 1] = ex1 - 1
}
func rotate() {
head = a[head]
tail = a[tail]
}
func twoAfter1() -> (Int, Int) {
return (a[0] + 1, a[a[0]] + 1)
}
func contains(_ i: Int) -> Bool {
return (i - 1 == ex1) || (i - 1 == ex2) || (i - 1 == ex3)
}
var description: String {
get {
var s = ""
var first = true
var h = self.head
repeat {
if !first {
s += ","
} else {
first = false
}
s += String(h+1)
h = a[h]
} while h != self.head
return s
}
}
var current: Int {
get {
return head + 1
}
}
}
public func day23problem2() {
let cups = LLInArray(initial: [6, 4, 3, 7, 1, 9, 2, 5, 8], size: 1000000)
// print(cups.description)
for _ in 1...10000 /*000*/ {
// print("round \(i)")
cups.extract()
// print("after pickup \(pickUp) is \(cups.description)")
// let current = cups.current
var target:Int = cups.current
repeat {
target = target - 1
if target == 0 {
target = 1000000
}
} while cups.contains(target)
cups.insert(after: target)
cups.rotate()
// print("after round \(i) current \(current) picked up \(pickUp) inserted after \(target) got \(cups.description)")
}
print(cups.twoAfter1())
}
I have been unable to meaningfully simplify the code and still reproduce the behavior.
Profiling in Xcode 12.3 (12C33) with Instruments, I get the following, which looks like a copy on write is being triggered.
Weight Self Weight Symbol Name
9.81 s 100.0% 0 s start
9.81 s 100.0% 0 s main
9.81 s 100.0% 0 s day23problem2()
6.57 s 66.9% 0 s LLInArray.extract()
3.27 s 33.3% 0 s specialized Array.subscript.modify
3.27 s 33.3% 0 s specialized Array._makeMutableAndUnique()
3.27 s 33.3% 1.00 ms specialized Array._createNewBuffer(bufferIsUnique:minimumCapacity:growForAppend:)
3.27 s 33.3% 0 s specialized _ArrayBuffer._copyContents(subRange:initializing:)
3.27 s 33.3% 0 s specialized _ContiguousArrayBuffer._copyContents(subRange:initializing:)
3.27 s 33.3% 0 s specialized UnsafeMutablePointer.initialize(from:count:)
3.27 s 33.3% 3.27 s _platform_memmove$VARIANT$Haswell
1.00 ms 0.0% 1.00 ms swift_retain
3.24 s 32.9% 0 s specialized Array.subscript.modify
3.24 s 32.9% 0 s specialized Array._makeMutableAndUnique()
3.24 s 32.9% 0 s specialized Array._createNewBuffer(bufferIsUnique:minimumCapacity:growForAppend:)
3.24 s 32.9% 0 s specialized _ArrayBuffer._copyContents(subRange:initializing:)
3.24 s 32.9% 0 s specialized _ContiguousArrayBuffer._copyContents(subRange:initializing:)
3.24 s 32.9% 0 s specialized UnsafeMutablePointer.initialize(from:count:)
3.24 s 32.9% 3.24 s _platform_memmove$VARIANT$Haswell
54.00 ms 0.5% 0 s _swift_release_dealloc
48.00 ms 0.4% 1.00 ms free_large
47.00 ms 0.4% 47.00 ms madvise
2.00 ms 0.0% 1.00 ms free
1.00 ms 0.0% 1.00 ms szone_size
1.00 ms 0.0% 1.00 ms madvise
1.00 ms 0.0% 1.00 ms large_entry_for_pointer_no_lock
1.00 ms 0.0% 1.00 ms _ContiguousArrayStorage.__deallocating_deinit
1.00 ms 0.0% 0 s swift_deallocClassInstance
1.00 ms 0.0% 0 s objc_destructInstance
1.00 ms 0.0% 1.00 ms _object_remove_assocations
1.00 ms 0.0% 1.00 ms DYLD-STUB$$pthread_getspecific
1.00 ms 0.0% 1.00 ms swift_beginAccess
1.00 ms 0.0% 1.00 ms pthread_getspecific
1.00 ms 0.0% 1.00 ms swift_endAccess
3.24 s 33.0% 0 s LLInArray.insert(after:)
3.21 s 32.6% 0 s specialized Array.subscript.modify
3.21 s 32.6% 0 s specialized Array._makeMutableAndUnique()
3.21 s 32.6% 0 s specialized Array._createNewBuffer(bufferIsUnique:minimumCapacity:growForAppend:)
3.21 s 32.6% 0 s specialized _ArrayBuffer._copyContents(subRange:initializing:)
3.21 s 32.6% 0 s specialized _ContiguousArrayBuffer._copyContents(subRange:initializing:)
3.21 s 32.6% 0 s specialized UnsafeMutablePointer.initialize(from:count:)
3.21 s 32.6% 3.21 s _platform_memmove$VARIANT$Haswell
1.00 ms 0.0% 0 s specialized _ContiguousArrayBuffer.init(_uninitializedCount:minimumCapacity:)
1.00 ms 0.0% 1.00 ms __swift_instantiateConcreteTypeFromMangledName
31.00 ms 0.3% 0 s _swift_release_dealloc
25.00 ms 0.2% 0 s free_large
25.00 ms 0.2% 25.00 ms madvise
2.00 ms 0.0% 0 s swift_deallocClassInstance
2.00 ms 0.0% 0 s objc_destructInstance
1.00 ms 0.0% 1.00 ms free
1.00 ms 0.0% 1.00 ms _object_remove_assocations
2.00 ms 0.0% 1.00 ms free
1.00 ms 0.0% 1.00 ms szone_size
1.00 ms 0.0% 0 s <Unknown Address>
1.00 ms 0.0% 1.00 ms szone_size_try_large
1.00 ms 0.0% 1.00 ms _ContiguousArrayStorage.__deallocating_deinit
3.00 ms 0.0% 2.00 ms swift_endAccess
1.00 ms 0.0% 1.00 ms pthread_getspecific
2.00 ms 0.0% 2.00 ms DYLD-STUB$$pthread_getspecific
1.00 ms 0.0% 0 s print(_:separator:terminator:)
1.00 ms 0.0% 0 s print(_:separator:terminator:)
1.00 ms 0.0% 0 s specialized _print<A>(_:separator:terminator:to:)
1.00 ms 0.0% 0 s _print_unlocked<A, B>(_:_:)
1.00 ms 0.0% 0 s Mirror.init(reflecting:)
1.00 ms 0.0% 0 s __swift_instantiateConcreteTypeFromMangledName
1.00 ms 0.0% 0 s swift_getTypeByMangledNameInContext
1.00 ms 0.0% 0 s swift::swift_getTypeByMangledName(swift::MetadataRequest, llvm::StringRef, void const* const*, std::__1::function<swift::TargetMetadata<swift::InProcess> const* (unsigned int, unsigned int)>, std::__1::function<swift::TargetWitnessTable<swift::InProcess> const* (swift::TargetMetadata<swift::InProcess> const*, unsigned int)>)
1.00 ms 0.0% 0 s swift_getTypeByMangledNameImpl(swift::MetadataRequest, llvm::StringRef, void const* const*, std::__1::function<swift::TargetMetadata<swift::InProcess> const* (unsigned int, unsigned int)>, std::__1::function<swift::TargetWitnessTable<swift::InProcess> const* (swift::TargetMetadata<swift::InProcess> const*, unsigned int)>)
1.00 ms 0.0% 0 s swift::swift_getTypeByMangledNode(swift::MetadataRequest, swift::Demangle::Demangler&, swift::Demangle::Node*, void const* const*, std::__1::function<swift::TargetMetadata<swift::InProcess> const* (unsigned int, unsigned int)>, std::__1::function<swift::TargetWitnessTable<swift::InProcess> const* (swift::TargetMetadata<swift::InProcess> const*, unsigned int)>)
1.00 ms 0.0% 0 s swift_getTypeByMangledNodeImpl(swift::MetadataRequest, swift::Demangle::Demangler&, swift::Demangle::Node*, void const* const*, std::__1::function<swift::TargetMetadata<swift::InProcess> const* (unsigned int, unsigned int)>, std::__1::function<swift::TargetWitnessTable<swift::InProcess> const* (swift::TargetMetadata<swift::InProcess> const*, unsigned int)>)
1.00 ms 0.0% 0 s swift::Demangle::TypeDecoder<(anonymous namespace)::DecodedMetadataBuilder>::decodeMangledType(swift::Demangle::Node*)
1.00 ms 0.0% 0 s (anonymous namespace)::DecodedMetadataBuilder::createBoundGenericType(swift::TargetContextDescriptor<swift::InProcess> const*, llvm::ArrayRef<swift::TargetMetadata<swift::InProcess> const*>, swift::TargetMetadata<swift::InProcess> const*) const
1.00 ms 0.0% 0 s _gatherGenericParameters(swift::TargetContextDescriptor<swift::InProcess> const*, llvm::ArrayRef<swift::TargetMetadata<swift::InProcess> const*>, swift::TargetMetadata<swift::InProcess> const*, llvm::SmallVectorImpl<unsigned int>&, llvm::SmallVectorImpl<void const*>&, swift::Demangle::Demangler&)
1.00 ms 0.0% 0 s swift::_checkGenericRequirements(llvm::ArrayRef<swift::TargetGenericRequirementDescriptor<swift::InProcess> >, llvm::SmallVectorImpl<void const*>&, std::__1::function<swift::TargetMetadata<swift::InProcess> const* (unsigned int, unsigned int)>, std::__1::function<swift::TargetWitnessTable<swift::InProcess> const* (swift::TargetMetadata<swift::InProcess> const*, unsigned int)>)
1.00 ms 0.0% 0 s swift::_conformsToProtocol(swift::OpaqueValue const*, swift::TargetMetadata<swift::InProcess> const*, swift::TargetProtocolDescriptorRef<swift::InProcess>, swift::TargetWitnessTable<swift::InProcess> const**)
1.00 ms 0.0% 0 s swift_conformsToProtocolImpl(swift::TargetMetadata<swift::InProcess> const*, swift::TargetProtocolDescriptor<swift::InProcess> const*)
1.00 ms 0.0% 0 s swift_conformsToSwiftProtocolImpl(swift::TargetMetadata<swift::InProcess> const*, swift::TargetProtocolDescriptor<swift::InProcess> const*, llvm::StringRef)
1.00 ms 0.0% 0 s ConformanceState::cacheSuccess(void const*, swift::TargetProtocolDescriptor<swift::InProcess> const*, swift::TargetProtocolConformanceDescriptor<swift::InProcess> const*)
1.00 ms 0.0% 0 s malloc
1.00 ms 0.0% 0 s malloc_zone_malloc
1.00 ms 0.0% 0 s szone_malloc_should_clear
1.00 ms 0.0% 1.00 ms tiny_malloc_should_clear
1.00 ms 0.0% 0 s <Unknown Address>
1.00 ms 0.0% 1.00 ms swift_bridgeObjectRetain
From the profile, it looks like there are two copy on writes in the extract() function.
A simpler program that allocates an array and has two functions that mutate it called in a loop does not trigger the problem.
class HasArray {
var a = [Int]()
init(size: Int) {
a = [Int](0..<size)
}
func mutate(_ i: Int) {
a[i] = i + 1
}
func mutate2(_ i: Int) {
a[i + 100] = i + 2
}
}
func testHasArray() {
let h = HasArray(size: 1000000)
for i in 1...10000 {
h.mutate(i)
h.mutate2(i)
}
}
Profiling this second program shows that it does not seem to copy the data, the only measurable time is spent in the initializer.
Weight Self Weight Symbol Name
7.00 ms 17.0% 0 s start
7.00 ms 17.0% 0 s main
7.00 ms 17.0% 0 s testHasArray()
7.00 ms 17.0% 0 s HasArray.__allocating_init(size:)
7.00 ms 17.0% 0 s HasArray.init(size:)
7.00 ms 17.0% 0 s specialized Array.init<A>(_:)
7.00 ms 17.0% 0 s specialized protocol witness for Sequence._copyToContiguousArray() in conformance <> Range<A>
7.00 ms 17.0% 0 s specialized Collection._copyToContiguousArray()
7.00 ms 17.0% 0 s specialized _copyCollectionToContiguousArray<A>(_:)
7.00 ms 17.0% 0 s specialized protocol witness for Sequence._copyContents(initializing:) in conformance <> Range<A>
7.00 ms 17.0% 2.00 ms specialized Sequence._copyContents(initializing:)
4.00 ms 9.7% 4.00 ms specialized UnsafeMutablePointer.initialize(to:)
1.00 ms 2.4% 1.00 ms specialized UnsafeMutablePointer.initialize(to:)
Is copy on write getting triggered in the first program? Or is it something else?