This is a continuation of making a lazy version of Collection.split.
I started with an Iterator that wraps another, and takes in the other parameters of split, then vends each non-separator element paired with an index for the sub-sequence it would go into. There are also marker values for empty sub-sequences (still with the index).
enum LazySplitElement<Element> {
case empty(rank: Int)
case element(rank: Int, value: Element)
var rank: Int {
switch self {
case .empty(let r):
return r
case .element(let r, _):
return r
}
}
var value: Element? {
switch self {
case .empty:
return nil
case .element(_, let v):
return v
}
}
}
enum LazySplitCoreProgress<Element> {
case untraversed
case running(last: Element)
case finished
}
struct LazySplitIndicatingIterator<Base: IteratorProtocol> {
var base: Base
var splitCounter: Int
let omitEmptySubsequences: Bool
let isSeparator: (Base.Element) -> Bool
var progress: LazySplitCoreProgress<Base.Element> = .untraversed
init(_ base: Base, maxSplits: Int = 9, omittingEmptySubsequences: Bool = true, whereSeparator isSeparator: @escaping (Base.Element) -> Bool) {
precondition(maxSplits >= 0)
self.base = base
splitCounter = maxSplits
omitEmptySubsequences = omittingEmptySubsequences
self.isSeparator = isSeparator
}
}
extension LazySplitIndicatingIterator: IteratorProtocol {
mutating func next() -> LazySplitElement<Base.Element>? {
while splitCounter > 0 {
guard let latest = base.next() else {
defer { progress = .finished }
switch (omitEmptySubsequences, progress) {
case (false, .running(let last)) where isSeparator(last):
fallthrough
case (false, .untraversed):
defer { splitCounter -= 1 }
return .empty(rank: splitCounter)
default:
return nil
}
}
defer { progress = .running(last: latest) }
switch progress {
case .untraversed:
if isSeparator(latest) {
if omitEmptySubsequences {
continue
} else {
defer { splitCounter -= 1 }
return .empty(rank: splitCounter)
}
} else {
return .element(rank: splitCounter, value: latest)
}
case .running(let last):
switch (omitEmptySubsequences, isSeparator(last), isSeparator(latest)) {
case (_, _, false):
return .element(rank: splitCounter, value: latest)
case (_, false, true):
splitCounter -= 1
fallthrough
case (true, true, true):
continue
case (false, true, true):
defer { splitCounter -= 1 }
return .empty(rank: splitCounter)
}
case .finished:
preconditionFailure("A wrapping iterator should not be finished before its base")
}
}
return base.next().map { .element(rank: splitCounter, value: $0) }
}
}
Next was an iterator, and wrapping sequence, that lazily visited each split sub-sequence, but generated each sub-sequence eagerly as an Array.
public struct SlightlyLazySplitIterator<Base: IteratorProtocol> {
var indicators: LazySplitIndicatingIterator<Base>
var previous: LazySplitElement<Base.Element>? = nil
init(_ base: Base, maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true, whereSeparator isSeparator: @escaping (Base.Element) -> Bool) {
// For some reason this couldn't work without either "<Base>" or ".init" after the type name.
indicators = LazySplitIndicatingIterator<Base>(base, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences, whereSeparator: isSeparator)
}
}
extension SlightlyLazySplitIterator: IteratorProtocol {
mutating public func next() -> [Base.Element]? {
if previous == nil {
previous = indicators.next()
}
guard let last = previous else { return nil }
previous = nil
var elements = [last]
let elementRank = last.rank
while let latest = indicators.next() {
if elementRank == latest.rank {
elements.append(latest)
} else {
previous = latest
break
}
}
switch elements.first! {
case .empty:
precondition(elements.count == 1)
return []
case .element:
return elements.map { $0.value! }
}
}
}
public struct SlightlyLazySplitSequence<Base: Sequence> {
var base: Base
let maxSplitCount: Int
let omitEmptySubsequences: Bool
let isSeparator: (Base.Element) -> Bool
init(_ base: Base, maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true, whereSeparator isSeparator: @escaping (Base.Element) -> Bool) {
precondition(maxSplits >= 0)
self.base = base
maxSplitCount = maxSplits
omitEmptySubsequences = omittingEmptySubsequences
self.isSeparator = isSeparator
}
}
extension SlightlyLazySplitSequence: Sequence {
public func makeIterator() -> SlightlyLazySplitIterator<Base.Iterator> {
return SlightlyLazySplitIterator(base.makeIterator(), maxSplits: maxSplitCount, omittingEmptySubsequences: omitEmptySubsequences, whereSeparator: isSeparator)
}
}
extension SlightlyLazySplitSequence: LazySequenceProtocol {}
extension LazySequenceProtocol {
public func ssplit(maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true, whereSeparator matches: @escaping (Element) -> Bool) -> SlightlyLazySplitSequence<Elements> {
return SlightlyLazySplitSequence(elements, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences, whereSeparator: matches)
}
}
extension LazySequenceProtocol where Element: Equatable {
public func ssplit(separator: Element, maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true) -> SlightlyLazySplitSequence<Elements> {
return ssplit(maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences, whereSeparator: { $0 == separator })
}
}
Can anyone explain why the compiler can't implicitly figure out the correct version of LazySplitIndicatingIterator in the init of SlightlyLazySplitIterator?
This version of lazy-split works fine, although taking chunks that are gigabytes long is a risk with eager implementations. So this makes a fully-lazy version useful:
public struct ReallyLazySplitSubsequencesIterator<Base: IteratorProtocol> {
final public class Element {
let rank: Int
unowned let source: Core
init(rank: Int, core: Core) {
self.rank = rank
source = core
}
}
final class Core {
var indicators: LazySplitIndicatingIterator<Base>
var currentSubsequence: Element?
var onDeckElement: LazySplitCoreProgress<LazySplitElement<Base.Element>>
init(_ base: Base, maxSplits: Int, omittingEmptySubsequences: Bool, whereSeparator isSeparator: @escaping (Base.Element) -> Bool) {
indicators = LazySplitIndicatingIterator(base, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences, whereSeparator: isSeparator)
currentSubsequence = nil
onDeckElement = .untraversed
}
}
let core: Core
init(_ base: Base, maxSplits: Int, omittingEmptySubsequences: Bool, whereSeparator isSeparator: @escaping (Base.Element) -> Bool) {
core = Core(base, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences, whereSeparator: isSeparator)
}
}
extension ReallyLazySplitSubsequencesIterator: IteratorProtocol {
mutating public func next() -> Element? {
switch core.onDeckElement {
case .untraversed:
precondition(core.currentSubsequence == nil)
if let first = core.indicators.next() {
core.onDeckElement = .running(last: first)
core.currentSubsequence = Element(rank: first.rank, core: core)
} else {
core.onDeckElement = .finished
}
case .running(let current):
if core.currentSubsequence!.rank == current.rank {
while case .some = core.currentSubsequence!.next() {
// Nothing; however, the calls to core.currentSubsequence!.next() change core.onDeckElement.
}
}
switch core.onDeckElement {
case .untraversed:
preconditionFailure("This case should never be re-reached")
case .running(let upcoming):
precondition(core.currentSubsequence!.rank == upcoming.rank + 1)
core.currentSubsequence = Element(rank: upcoming.rank, core: core)
case .finished:
core.currentSubsequence = nil
}
case .finished:
core.currentSubsequence = nil
}
return core.currentSubsequence
}
}
extension ReallyLazySplitSubsequencesIterator.Element: Sequence, IteratorProtocol {
public func next() -> Base.Element? {
switch source.onDeckElement {
case .untraversed:
preconditionFailure("source should have passed this state before this subsequence was vended")
case .running(let upcoming) where upcoming.rank == rank:
precondition(source.currentSubsequence === self)
defer {
if let nextElement = source.indicators.next() {
source.onDeckElement = .running(last: nextElement)
} else {
source.onDeckElement = .finished
}
}
return upcoming.value
case .finished, .running:
// For .running, precondition(upcoming.rank < rank), but we're not bothering to check.
return nil
}
}
}
extension ReallyLazySplitSubsequencesIterator.Element: LazySequenceProtocol {}
public struct ReallyLazySplitSequence<Base: Sequence> {
var base: Base
let maxSplitCount: Int
let omitEmptySubsequences: Bool
let isSeparator: (Base.Element) -> Bool
init(_ base: Base, maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true, whereSeparator isSeparator: @escaping (Base.Element) -> Bool) {
precondition(maxSplits >= 0)
self.base = base
maxSplitCount = maxSplits
omitEmptySubsequences = omittingEmptySubsequences
self.isSeparator = isSeparator
}
}
extension ReallyLazySplitSequence: Sequence {
public func makeIterator() -> ReallyLazySplitSubsequencesIterator<Base.Iterator> {
return ReallyLazySplitSubsequencesIterator(base.makeIterator(), maxSplits: maxSplitCount, omittingEmptySubsequences: omitEmptySubsequences, whereSeparator: isSeparator)
}
}
extension ReallyLazySplitSequence: LazySequenceProtocol {}
extension LazySequenceProtocol {
public func hsplit(maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true, whereSeparator matches: @escaping (Element) -> Bool) -> ReallyLazySplitSequence<Elements> {
return ReallyLazySplitSequence(elements, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences, whereSeparator: matches)
}
}
extension LazySequenceProtocol where Element: Equatable {
public func hsplit(separator: Element, maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true) -> ReallyLazySplitSequence<Elements> {
return hsplit(maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences, whereSeparator: { $0 == separator })
}
}
This version may need some work. Whenever another sub-sequence needs to be accessed, the previous one needs to be invalidated. For instance:
Array(line.lazy.hsplit(separator: spaceChar, omittingEmptySubsequences: false))
creates an Array of the lazy-proxy sub-sequence type, i.e. an array of dead references! This is proven when
Array(line.lazy.hsplit(separator: spaceChar, omittingEmptySubsequences: false)).map { String($0) }
crashes. However, if I trigger a conversion while doing the lazy traversal:
Array(line.lazy.hsplit(separator: spaceChar, omittingEmptySubsequences: false).map { String($0) })
I get full objects instead of dangling references.
How are lazy sequences of lazy sequences supposed to work; do I have to rework the design to force eagerness in some uses, or should we tell users not to do that?