Tree Iterator

OK I know trees tend to need recursive solutions and I have certainly been recursing (or is that just cursing?), trying to implement IteratorProtocol in such a way that it is able to enumerate a tree structure (nodes which contain nodes, etc)

Can anyone please point me to a decent implementation?

I have not really checked the implementation myself but this might be a start: https://github.com/attaswift/BTree/blob/master/Sources/BTreeIterator.swift

Thanks for the link Adrian but 1. it's for a b-tree, not a "straightforward" tree and, 2. Wow! it looks complicated and difficult to follow if I have any chance of modifying it for my purposes.

Implementing Sequence and IteratorProtocol is relatively straightforward for "linear" types; it's just trying to work out how to cope with hierarchical structures.

Oh, how I long for something like the C# yield functionality :kissing_heart:

Can you share some of the code of your tree structure? I've done this before and I'd be happy to help, but it's easier if I know what we're working with.

Hi Tim

Here's the tree node:

public class Node<itemT : Equatable> : Equatable, Sequence
{
  internal var parent: Node?
  
  internal lazy var children: [Node] = .init()
  
  internal var data: itemT
  
  internal init(data: itemT, parent: Node?)
  {
    self.data = data
    
    self.parent = parent
    
    if let parent = parent
    {
      parent.children.append(self)
    }
  }
  
  internal var isLeaf: Bool
  {
    return children.count == 0
  }
  
  public static func == (lhs: Node, rhs: Node) -> Bool
  {
    return lhs.data == rhs.data
  }
  
  private lazy var _iterator: NodeIterator<itemT> = .init(rootNode: self)
  
  public func makeIterator() -> NodeIterator<itemT>
  {
    return _iterator
  }
}

And here's my attempt, so far, at an iterator:

public class NodeIterator<itemT: Equatable> : IteratorProtocol
{
  var rootNode: Node<itemT>
  
  var currentChildNode: Node<itemT>?
  
  var hasReturnedRoot: Bool = false
  
  var isReturningChildren: Bool = false
  
  var traversingIterator: IndexingIterator<[Node<itemT>]>?
  
  var childIterator: NodeIterator<itemT>?
  
  internal init(rootNode: Node<itemT>)
  {
    self.rootNode = rootNode
  }
  
  
  public func next() -> Node<itemT>?
  {
    print("in \(rootNode.data) : has returned root \(hasReturnedRoot)")
    
    if !hasReturnedRoot
    {
      hasReturnedRoot = true
      
      return rootNode
    }
    
    if rootNode.isLeaf
    {
      return nil
    }
    
    if traversingIterator == nil
    {
      traversingIterator = rootNode.children.makeIterator()
      
      currentChildNode = traversingIterator?.next()
      
      isReturningChildren = true
    }
    
    if !isReturningChildren
    {
      currentChildNode = traversingIterator?.next()
    }
    
    if currentChildNode == nil
    {
      isReturningChildren = false
      
      return nil
    }
    
    if let currentChildNode = currentChildNode,
       !currentChildNode.isLeaf,
       childIterator == nil
    {
      childIterator = NodeIterator<itemT>(rootNode: currentChildNode)
      
      isReturningChildren = true
    }
    
    if let childNode = childIterator?.next()
    {
      currentChildNode = childNode
      
      return childNode
    }
    
    currentChildNode = nil
    
    childIterator = nil
    
    return nil
  }
}

I'm getting to the stage where things seem to be getting complicated; boolean state flags are one of my pet peeves but, hey, you never know :wink:

Hey Tim

I would be really interested to compare notes on your ideas but, after a few more hours head-scratching, I finally got it all working :yum::sunglasses:

Here's the updated Iterator code and I've even managed a few comments :wink::

public class NodeIterator<itemT: Equatable> : IteratorProtocol
{
  var rootNode: Node<itemT>
  
  var currentChildNode: Node<itemT>?
  
  var hasReturnedRoot: Bool = false
  
  var traversingIterator: IndexingIterator<[Node<itemT>]>?
  
  var currentChildIterator: NodeIterator<itemT>?
  
  internal init(rootNode: Node<itemT>)
  {
    self.rootNode = rootNode
  }
  
  public func next() -> Node<itemT>?
  {
    // first call to next() returns root node
    if !hasReturnedRoot
    {
      hasReturnedRoot = true
      
      return rootNode
    }
    
    // bail out if there are no children
    if rootNode.isLeaf
    {
      return nil
    }
    
    // this iterator's root node has children
    // create an iterator for the children and return the first child
    if traversingIterator == nil
    {
      traversingIterator = rootNode.children.makeIterator()
      
      currentChildNode = traversingIterator?.next()
      
      return currentChildNode
    }
    
    // if child has children, create an iterator for them
    if let currentChildNode = currentChildNode,
       !currentChildNode.isLeaf,
       currentChildIterator == nil
    {
      currentChildIterator = currentChildNode.makeIterator()
      
      // first call to next() returns the rootNode which has
      // already been retrieved at the top of this method
      let _ = currentChildIterator?.next()
    }
    
    // if the iterator on the children returns a child return it
    if let node = currentChildIterator?.next()
    {
      return node
    }
    else
    {
      // reset child iterator for previous child
      // this will be recreated lazily for the new child
      currentChildIterator = nil
      
      // move to next child in children
      currentChildNode = traversingIterator?.next()
      
      return currentChildNode
    }
  }
}

And here is the test code:

    let rootNode = Node(data: "Root", parent: nil)

    for i in 1...2
    {
      let iNode = Node(data: String(i), parent: rootNode)

      for j in 1...2
      {
        let jNode = Node(data: "\(i).\(j)", parent: iNode)

        for k in 1...2
        {
          let _ = Node(data: "\(i).\(j).\(k)", parent: jNode)
        }
      }
    }
    
    for node in rootNode
    {
      print(node.data)
    }

And the results are:

Root
1
1.1
1.1.1
1.1.2
1.2
1.2.1
1.2.2
2
2.1
2.1.1
2.1.2
2.2
2.2.1
2.2.2

Hi Joanna —

I tried implementing the iterator using a stack of node/index pairs—my tree was a simplified version of yours:

class Node<T> {
    var value: T
    var children: [Node]
}

The iterator works like this (it was a bit more complex than I expected):

extension Node: Sequence {
  struct Iterator: IteratorProtocol {
    var nodes: [(Node, Int)]
    
    init(_ root: Node) {
      self.nodes = [(root, 0)]
    }
    
    mutating func next() -> T? {
      // Grab the value to return before mutating the list of nodes
      guard let currentValue = nodes.last?.0.value
        else { return nil }
      
      // Pop any leaf or exhausted nodes
      while let (node, index) = nodes.last, index >= node.children.count {
        nodes.popLast()
      }
      
      // Check to see if we've emptied the entire stack
      guard let (currentNode, i) = nodes.last
        else { return currentValue }
      
      // Increment the index of the current node, then descend one level
      nodes[nodes.count - 1].1 = i + 1
      nodes.append((currentNode.children[i], 0))
      
      return currentValue
    }
  }

  func makeIterator() -> Iterator {
    return Iterator(self)
  }
}
1 Like

I agree with Nate that it's definitely more complex than expected. Coroutines would clean this up a lot!

Here's my version, it's pretty similar:

final class Node<Element> {
    var value: Element
    var children: [Node]
    
    init(value: Element, children: [Node] = []) {
        self.value = value
        self.children = children
    }
}

extension Node: Sequence {
    func makeIterator() -> Iterator {
        return Iterator(root: self)
    }
    
    struct Iterator: IteratorProtocol {
        var nodes: [(index: Int, node: Node)]
        
        init(root: Node) {
            nodes = [(0, root)]
        }
        
        mutating func next() -> Element? {
            defer {
                while let (index, node) = nodes.last, index >= node.children.count {
                    nodes.removeLast()
                }
                
                if let (index, node) = nodes.popLast() {
                    nodes.append((index + 1, node))
                    nodes.append((0, node.children[index]))
                }
            }
            
            return nodes.last?.node.value
        }
    }
}
1 Like

Hi Tim and Nate

Yes, most of the examples I found were written using a stack. However, I set myself the challenge of being different :sunglasses:

After a night's sleep, here's the final (so far) version:

extension Node : Sequence
{
  public class Iterator : IteratorProtocol
  {
    weak var rootNode: Node<itemT>!
    
    var currentChildNode: Node<itemT>?
    
    var hasReturnedRoot: Bool = false
    
    var traversingIterator: IndexingIterator<[Node<itemT>]>?
    
    var currentChildIterator: Iterator?
    
    internal init(rootNode: Node<itemT>)
    {
      self.rootNode = rootNode
    }
    
    public func next() -> Node<itemT>?
    {
      // special case to include the root node
      if !hasReturnedRoot,
         rootNode.parent == nil
      {
        hasReturnedRoot = rootNode.parent == nil
        
        return rootNode
      }
      
      // bail out if there are no children
      if rootNode.isLeaf
      {
        return nil
      }
      
      // this iterator's root node has children
      // create an iterator for the children and return the first child
      if traversingIterator == nil
      {
        traversingIterator = rootNode.children.makeIterator()
        
        currentChildNode = traversingIterator?.next()
        
        return currentChildNode
      }
      
      // if child has children, create an iterator for them
      if let currentChildNode = currentChildNode,
         !currentChildNode.isLeaf,
         currentChildIterator == nil
      {
        currentChildIterator = currentChildNode.makeIterator()
      }
      
      // if the iterator on the children returns a child return it
      if let node = currentChildIterator?.next()
      {
        return node
      }
      else
      {
        // reset child iterator for previous child
        // this will be recreated lazily for the new child
        currentChildIterator = nil
        
        // move to next child in children
        currentChildNode = traversingIterator?.next()
        
        return currentChildNode
      }
    }
  }
  
  public func makeIterator() -> Iterator
  {
    return .init(rootNode: self)
  }
}

The key to avoiding the need for a stack seems to be the use of the iterator from the children array and keeping hold of it to maintain state on where I was in the traversal. I tried to recurse using just the NodeIterator but ended up in all sorts of knots.

What I hadn't realised about implementing Sequence on the tree was that it allowed me to use things like map, compactMap, filter, contains, etc. all for free :heart_eyes:

This means I don't have to take any more effort to implement the search for all nodes that contain a certain value :ok_hand:

    let filteredNodes: [Node<String>] = rootNode.filter
    {
      $0.data.contains("2.2")
    }

… which gives me:

1.2.2
2.2
2.2.1
2.2.2
1 Like

Here's a fun implementation:

extension Node {
    public func makeIterator() -> Iterator { return Iterator.parent(self) }

    public enum Iterator: IteratorProtocol {
        case parent(Node)
        indirect case remainingChildren(Iterator, ArraySlice<Node>)
        case done

        public mutating func next() -> Node? {
            switch self {
            case .parent(let parent):
                if let first = parent.children.first {
                    self = .remainingChildren(first.makeIterator(), parent.children.dropFirst())
                } else {
                    self = .done
                }
                return parent
            case .remainingChildren(var childIterator, let remainingChildren):
                if let nextValue = childIterator.next() {
                    self = .remainingChildren(childIterator, remainingChildren)
                    return nextValue
                } else if let nextChild = remainingChildren.first {
                    childIterator = nextChild.makeIterator()
                    let nextValue = childIterator.next()
                    // Note: nextValue cannot be nil because nextChild != nil && nextChild.makeIterator().next() === nextChild
                    self = .remainingChildren(childIterator, remainingChildren.dropFirst())
                    return nextValue
                } else {
                    self = .done
                    return nil
                }
            case .done: return nil
            }
        }
    }
}

Here's my attempt, in which I try to reduce the number of conditionals and tracking informations. This is the longest Swift code I wrote so far, so it likely far from perfect.

The code have an inconsistent use of Nodule and Node<T> as the first draft returned T instead of Node<T> and I wanted the two versions to have a minimal delta.

I had to create OptionalReference to allow struct NodeIterator to refer to itself.

class OptionalReference<Wrapped> : ExpressibleByNilLiteral
{
    var reference: Wrapped?
    required init(nilLiteral: ()) {
        reference = .none
    }
}

class Node<T>
{
    var children : [Node<T>] = []
    var data: T
    init(_ content : T) { data = content }
}

extension Node : Sequence
{
    func makeIterator() -> NodeIterator<Node<T>,T> {
        return NodeIterator(self)
    }
    func makeHeadlessIterator() -> NodeIterator<Node<T>,T> {
        return NodeIterator(headless:self)
    }
}
struct NodeIterator<Nodule : Node<T>,T> : IteratorProtocol
{
    typealias Element = Node<T>
    
    var root : Node<T>?
    var scrubChildren : IndexingIterator<[Node<T>]>?
    var scanGrandchildren : OptionalReference<NodeIterator<Node<T>,T>> = nil
    
    init(_ base : Nodule) { root = base }
    init(headless base : Node<T>) { scrubChildren = base.children.makeIterator() }
    
    mutating func next() -> Node<T>? {
        if let node = scanGrandchildren.reference?.next()
        {
            return node
        }
        if let node = scrubChildren?.next()
        {
            scanGrandchildren.reference = node.makeHeadlessIterator()
            return node
        }
        if let node = root // Only for top-level root
        {
            scrubChildren = node.children.makeIterator()
            root = nil
            return node
        }
        return nil
    }
}

I avoided the self-referencing problem with the iterator by simply making the iterator a class instead of a struct.