RFC: SwiftLexicalLookup

Description:

After working with @Douglas_Gregor on SwiftLexicalLookup since GSoC 2024, the development of the library has arrived at the point where the API is stable, and it’s possible to run the validation of the current implementation for a great amount of integration tests inside the compiler using the automatic validation tool recently added to the compiler. Another milestone was running successful validation for the entire Swift Standard Library compilation process recently.

The library now closely resembles ASTScope behavior with minor deviations rarely popping out, usually due to imperfections in the validation algorithm or intended differences between SwiftLexicalLookup and ASTScope. The library should be mature enough to start implementing functionality around it and, through bug reports, community feedback, and involvement, ensure its continuous improvement.

Link to PR:

Swift Interface:

Initially, I think it would be a good idea to not expose the underlying scope structure as it still might change substantially. That’s why the PR makes public the following: ImplicitDecl, LookupName, LookInMembersScopeSyntax , and LookupResult that form the query result data structure, an extension to SyntaxProtocol that forms an entry point for the API and LookupConfig enables clients to tweak the exact behavior of the query.

/// An entity that is implicitly declared based on the syntactic structure of the program.
public enum ImplicitDecl {
  /// `self` keyword representing object instance.
  /// Could be associated with type declaration, extension,
  /// or closure captures. Introduced at function edge.
  case `self`(DeclSyntaxProtocol)
  /// `Self` keyword representing object type.
  /// Could be associated with type declaration or extension.
  case `Self`(DeclSyntaxProtocol)
  /// `error` value caught by a `catch`
  /// block that does not specify a catch pattern.
  case error(CatchClauseSyntax)
  /// `newValue` available by default inside `set` and `willSet`.
  case newValue(AccessorDeclSyntax)
  /// `oldValue` available by default inside `didSet`.
  case oldValue(AccessorDeclSyntax)

  // ...
}
public enum LookupName {
  /// Identifier associated with the name.
  /// Could be an identifier of a variable, function or closure parameter and more.
  case identifier(IdentifiableSyntax, accessibleAfter: AbsolutePosition?)
  /// Declaration associated with the name.
  /// Could be class, struct, actor, protocol, function and more.
  case declaration(NamedDeclSyntax)
  /// Name introduced implicitly by certain syntax nodes.
  case implicit(ImplicitDecl)
  /// Dollar identifier introduced by a closure without parameters.
  case dollarIdentifier(ClosureExprSyntax, strRepresentation: String)
  /// Represents equivalent names grouped together.
  /// - Important: The array should be non-empty.
  ///
  /// ### Example:
  /// ```swift
  /// switch X {
  /// case .a(let x), .b(let x):
  ///   print(x) // <-- lookup here
  /// }
  /// ```
  /// For lookup at the given position, the result
  /// contains only one name, that represents both `let x` declarations.
  case equivalentNames([LookupName])

  // ...
}
/// Represents result from a specific scope.
public enum LookupResult {
  /// Scope and the names that matched lookup.
  case fromScope(SyntaxProtocol, withNames: [LookupName])
  /// Indicates where to perform member lookup.
  case lookInMembers(LookInMembersScopeSyntax)
  /// Indicates to lookup generic parameters of extended type.
  ///
  /// ### Example
  /// ```swift
  /// extension Foo {
  ///   func bar() {
  ///     let a = A() // <-- lookup here
  ///   }
  /// }
  /// ```
  /// For a lookup started at the marked position, `lookInGenericParametersOfExtendedType`
  /// will be included as one of the results prompting the client
  /// to lookup the generic parameters of of the extended `Foo` type.
  case lookInGenericParametersOfExtendedType(ExtensionDeclSyntax)
  /// Indicates this closure expression could introduce dollar identifiers.
  ///
  /// ### Example
  /// ```swift
  /// func foo() {
  ///   let a = {
  ///     $0 // <-- lookup here
  ///   }
  /// }
  /// ```
  /// When looking up for any identifier at the indicated position,
  /// the result will include `mightIntroduceDollarIdentifiers`
  /// result kind. If it's performed for a dollar identifier, `LookupName.dollarIdentifier`
  /// with the appropriate identifier will be used in the
  /// result associated with the closure expression inside `a`.
  case mightIntroduceDollarIdentifiers(ClosureExprSyntax)

  // ...
}
extension SyntaxProtocol {
  /// Returns all names that `for` refers to at this syntax node.
  /// Optional configuration can be passed as `config` to customize the lookup behavior.
  ///
  /// - Returns: An array of `LookupResult` for `identifier`  at this syntax node,
  /// ordered by visibility. If `identifier` is set to `nil`, returns all available names ordered by visibility.
  /// The order is from the innermost to the outermost scope,
  /// and within each result, names are ordered by their introduction
  /// in the source code.
  ///
  /// Example usage:
  /// ```swift
  /// class C {
  ///   var a = 42
  ///
  ///   func a(a: Int) {
  ///     a // <--- lookup here
  ///
  ///     let a = 0
  ///   }
  ///
  ///   func a() {
  ///     // ...
  ///   }
  /// }
  /// ```
  /// When calling this function on the declaration reference `a` within its name,
  /// the function returns the parameter first, then the identifier of the variable
  /// declaration, followed by the first function name, and then the second function name,
  /// in this exact order. The constant declaration within the function body is omitted
  /// due to the ordering rules that prioritize visibility within the function body.
  public func lookup(
    _ identifier: Identifier?,
    with config: LookupConfig = LookupConfig()
  ) -> [LookupResult] {
    scope?.lookup(identifier, at: self.position, with: config) ?? []
  }
}
public protocol LookInMembersScopeSyntax: SyntaxProtocol {
  /// Position used for member lookup.
  var lookupMembersPosition: AbsolutePosition { get }
}
public struct LookupConfig {
  /// Specifies whether lookup should finish in the closest sequential scope.
  ///
  /// ### Example
  /// ```swift
  /// class X {
  ///   let a = 42
  ///
  ///   func (a: Int) {
  ///     let a = 123
  ///
  ///     a // <-- lookup here
  ///   }
  /// }
  /// ```
  /// When looking up at the specified position with `finishInSequentialScope`
  /// set to `false`, lookup will return declaration from inside function body,
  /// function parameter and the `a` declaration from `class X` member block.
  /// If `finishInSequentialScope` would be set to `false`, the only name
  /// returned by lookup would be the `a` declaration from inside function body.
  public var finishInSequentialScope: Bool
  public var configuredRegions: ConfiguredRegions?

  /// Creates a new lookup configuration.
  ///
  /// - `finishInSequentialScope` - specifies whether lookup should finish
  ///   in the closest sequential scope. `false` by default.
  public init(
    finishInSequentialScope: Bool = false,
    configuredRegions: ConfiguredRegions? = nil
  ) {
    self.finishInSequentialScope = finishInSequentialScope
    self.configuredRegions = configuredRegions
  }
}

I’d also like to start here a discussion on IdentifiableSyntax that’s part of SwiftLexicalLookup and is also included as a public protocol in the PR. Swift-syntax already has NamedDeclSyntax which can be used to refer to nominal type declarations, but I’m not aware of a counterpart for variables, function, closure, generic parameters etc. IdentifiableSyntax is that missing common protocol for those syntax nodes:

public protocol IdentifiableSyntax: SyntaxProtocol {
  var identifier: TokenSyntax { get }
}

extension IdentifierPatternSyntax: IdentifiableSyntax {}

extension ClosureParameterSyntax: IdentifiableSyntax {
  public var identifier: TokenSyntax {
    secondName ?? firstName
  }
}

extension FunctionParameterSyntax: IdentifiableSyntax {
  public var identifier: TokenSyntax {
    secondName ?? firstName
  }
}

extension ClosureShorthandParameterSyntax: IdentifiableSyntax {
  public var identifier: TokenSyntax {
    name
  }
}

// ...

I think this protocol, after a bit of refining/refactoring, could be worth moving to SwiftSyntax. Having a central unified protocol representation for those nodes could be valuable not only for SwiftLexicalLookup, but other clients as well.

Commonality:

The library provides a long list of potential clients, starting with the compiler, through IDEs, and even potentially developers seeking to develop new, more sophisticated macros.

Discoverability:

Any already existing syntax node can be used as an entry point for the API. The method also contains extensive documentation, making it easy to start with.

Not trivially composable:

The only counterpart for SwiftLexicalLookup is ASTScope, which is part of the compiler and not accessible to outside clients. A detached implementation of unqualified lookup would be especially beneficial once the compiler adopts it. Making it the canonical implementation would mean the query is guaranteed to be correct, and so could be all the clients using it.

11 Likes

Hi Jakub,

Thanks for all the work you have done on SwiftLexicalLookup during your GSoC project and afterwards.

Could you include the public declarations that should be exposed in ImplicitDecl etc. in this RFC instead of doing // .... As these members will be part of the public interface, they should be part of the RFC.

Regarding the declarations that are currently in the RFC, I have a few comments:

  • ImplicitDecl.self and ImplicitDecl.Self have protocols as associated values. In the past, we have found that existentials like this can be a significant performance burden and that’s why DeclSyntax exists as a struct. To avoid locking us into a performance problem here, I would prefer if those associated types were DeclSyntax.
  • For the cases in ImplicitDecl, could you add comments what the associated values refer to, especially for self and Self.
  • LookupName.dollarIdentifier: We should not use the abbreviation str here. Also, would it make sense for the second associated value to be Identifier or SyntaxText? We generally don’t use String to refer to code from the syntax tree due to its ability to represent invalid UTF-8.
  • LookupResult: Similar to my comment on ImplicitDecl.self, I am worried about the performance implications of using protocols as associated types here.
  • LookupResult.mightIntroduceDollarIdentifiers: It is unclear to me why we need both this and LookupName.dollarIdentifier.
  • SyntaxProtocol.lookup: The doc comment refers to for as the parameter but it should probably be identifier.

I would like to keep the discussion whether IdentifiableSyntax should be moved to SwiftSyntax separate from this thread. It is extremely unlikely that a client would import SwiftLexcialLookup without importing SwiftSyntax and thus, moving the protocol to SwiftSyntax would have a very low source-breaking impact.

My biggest concern at the moment is that we might need to change the public API in non-trivial ways to optimize performance. Have you performed any performance measurements for SwiftLexcialLookup yet? If not, I am wondering whether we should keep the declarations as SPI until we are not only confident with the correctness of the library but also its performance characteristics.

Hi Alex,

Thank you for the remarks. I’ve addressed them in the PR accordingly. I also include the full public interface below with the new changes included.

  • (1, 4) Thank you for suggesting and providing a reason why we shouldn’t use existentials in this case. I changed it in all relevant places.
  • (2) I added additional comments for ImplicitDecl with exhaustive lists of scopes that could be associated with those cases.
  • (3, 5) I removed LookupName.dollarIdentifier. We initially wanted to match the behavior of ASTScope when performing lookup with a $x identifier and that’s why there were two cases, a more general which just said „there could be dollar identifiers introduced here” LookupResult.mightIntroduceDollarIdentifiers and the concrete one LookupName.dollarIdentifier. To avoid this redundancy, we’ve decided with Doug to remove it now.
  • (6) Thank you for noticing that. I updated the documentation

I would like to keep the discussion whether IdentifiableSyntax should be moved to SwiftSyntax separate from this thread.

Since we’re using more general structs now for the result data structure now, I made IdentifiableSyntax internal again.

Have you performed any performance measurements for SwiftLexcialLookup yet?

It’s hard to say for now what’s the performance difference between ASTScope and SwiftLexicalLookup. When trying to measure that, I’ve got a whole bunch of outliers due to the way ASTScope is intertwined with logic it triggers (as far as I understand) and sometimes triggers more work than in other cases. It’s also a bit tricky to measure time end-to-end because we have to parse source files twice if we want to use SwiftLexicalLookup. It could still give us an upper bound of the performance though. If I find some time, I'll try to make those measurements.

So far, I think the most valuable and reliable were the tests I run with Instruments to determine the relative performance between scopes. I think the most significant improvement we can make is by caching/optimizing the sequential scope. When usually most scopes were evaluated in no more than 100 us, sequential scopes consistently run in <300, 400> us range. I don’t think it would require any big changes to the API signatures though. I think I’d like the API to remain stateless with an optional parameter to provide a cache object. Something similar to SwiftIfConfig having ConfiguredRegions stored in ExportedSourceFile. What could be the kinds of non-trivial changes that we should consider before making the API public?

Here is the full, updated public interface:

extension SyntaxProtocol {
  /// Returns all names that `identifier` refers to at this syntax node.
  /// Optional configuration can be passed as `config` to customize the lookup behavior.
  ///
  /// - Returns: An array of `LookupResult` for `identifier`  at this syntax node,
  /// ordered by visibility. If `identifier` is set to `nil`, returns all available names ordered by visibility.
  /// The order is from the innermost to the outermost scope,
  /// and within each result, names are ordered by their introduction
  /// in the source code.
  ///
  /// Example usage:
  /// ```swift
  /// class C {
  ///   var a = 42
  ///
  ///   func a(a: Int) {
  ///     let a = a
  ///     a // <--- lookup here
  ///
  ///     let a = 0
  ///   }
  /// }
  /// ```
  /// When calling this function on the declaration reference `a` within its name,
  /// the function returns the variable declaration `let a = a` first, then the function parameter,
  /// and lastly `LookupResult.lookInMembers` in this exact order.
  /// The second declaration within the function body is omitted
  /// due to the ordering rules within the function body.
  public func lookup(
    _ identifier: Identifier?,
    with config: LookupConfig = LookupConfig()
  ) -> [LookupResult] {
    scope?.lookup(identifier, at: self.position, with: config) ?? []
  }
}
public struct LookupConfig {
  /// Specifies whether lookup should finish in the closest sequential scope.
  ///
  /// ### Example
  /// ```swift
  /// class X {
  ///   let a = 42
  ///
  ///   func (a: Int) {
  ///     let a = 123
  ///
  ///     a // <-- lookup here
  ///   }
  /// }
  /// ```
  /// When looking up at the specified position with `finishInSequentialScope`
  /// set to `false`, lookup will return declaration from inside function body,
  /// function parameter and the `a` declaration from `class X` member block.
  /// If `finishInSequentialScope` would be set to `false`, the only name
  /// returned by lookup would be the `a` declaration from inside function body.
  public var finishInSequentialScope: Bool
  public var configuredRegions: ConfiguredRegions?

  /// Creates a new lookup configuration.
  ///
  /// - `finishInSequentialScope` - specifies whether lookup should finish
  ///   in the closest sequential scope. `false` by default.
  public init(
    finishInSequentialScope: Bool = false,
    configuredRegions: ConfiguredRegions? = nil
  ) {
    self.finishInSequentialScope = finishInSequentialScope
    self.configuredRegions = configuredRegions
  }
}
/// An entity that is implicitly declared based on the syntactic structure of the program.
public enum ImplicitDecl {
  /// `self` keyword representing object instance.
  /// Introduced at member boundary.
  /// Associated syntax node could be: `FunctionDeclSyntax`,
  /// `AccessorDeclSyntax`, `SubscriptDeclSyntax`,
  /// `DeinitializerDeclSyntax`, or `InitializerDeclSyntax`.
  case `self`(DeclSyntax)
  /// `Self` keyword representing object type.
  /// Associated syntax node could be: `ExtensionDeclSyntax`,
  /// or `ProtocolDeclSyntax`.
  case `Self`(DeclSyntax)
  /// `error` available by default inside `catch`
  /// block that does not specify a catch pattern.
  case error(CatchClauseSyntax)
  /// `newValue` available by default inside `set` and `willSet`.
  case newValue(AccessorDeclSyntax)
  /// `oldValue` available by default inside `didSet`.
  case oldValue(AccessorDeclSyntax)

  /// Syntax associated with this name.
  public var syntax: SyntaxProtocol {
    switch self {
    case .self(let syntax):
      return syntax
    case .Self(let syntax):
      return syntax
    case .error(let syntax):
      return syntax
    case .newValue(let syntax):
      return syntax
    case .oldValue(let syntax):
      return syntax
    }
  }

  /// The name of the implicit declaration.
  public var name: StaticString {
    switch self {
    case .self:
      return "self"
    case .Self:
      return "Self"
    case .error:
      return "error"
    case .newValue:
      return "newValue"
    case .oldValue:
      return "oldValue"
    }
  }

  /// Identifier used for name comparison.
  ///
  /// Note that `self` and `Self` are treated as identifiers for name lookup purposes
  /// and that a variable named `self` can shadow the `self` keyword. For example.
  /// ```swift
  /// class Foo {
  ///     func test() {
  ///     let `Self` = "abc"
  ///     print(Self.self)
  ///
  ///     let `self` = "def"
  ///     print(self)
  ///   }
  /// }
  ///
  /// Foo().test()
  /// ```
  /// prints:
  /// ```
  /// abc
  /// def
  /// ```
  /// `self` and `Self` identifers override implicit `self` and `Self` introduced by
  /// the `Foo` class declaration.
  public var identifier: Identifier {
    Identifier(canonicalName: name)
  }

  /// Position of this implicit name.
  public var position: AbsolutePosition {
    switch self {
    case .self(let declSyntax):
      switch Syntax(declSyntax).as(SyntaxEnum.self) {
      case .functionDecl(let functionDecl):
        return functionDecl.name.positionAfterSkippingLeadingTrivia
      case .initializerDecl(let initializerDecl):
        return initializerDecl.initKeyword.positionAfterSkippingLeadingTrivia
      case .subscriptDecl(let subscriptDecl):
        return subscriptDecl.accessorBlock?.positionAfterSkippingLeadingTrivia
          ?? subscriptDecl.endPositionBeforeTrailingTrivia
      case .variableDecl(let variableDecl):
        return variableDecl.bindings.first?.accessorBlock?.positionAfterSkippingLeadingTrivia
          ?? variableDecl.bindings.positionAfterSkippingLeadingTrivia
      case .accessorDecl(let accessorDecl):
        return accessorDecl.accessorSpecifier.positionAfterSkippingLeadingTrivia
      case .deinitializerDecl(let deinitializerDecl):
        return deinitializerDecl.deinitKeyword.positionAfterSkippingLeadingTrivia
      default:
        return declSyntax.positionAfterSkippingLeadingTrivia
      }
    case .Self(let declSyntax):
      switch Syntax(declSyntax).as(SyntaxEnum.self) {
      case .protocolDecl(let protocolDecl):
        return protocolDecl.name.positionAfterSkippingLeadingTrivia
      case .extensionDecl(let extensionDecl):
        return extensionDecl.extensionKeyword.positionAfterSkippingLeadingTrivia
      default:
        return declSyntax.positionAfterSkippingLeadingTrivia
      }
    case .newValue(let accessorDecl), .oldValue(let accessorDecl):
      return accessorDecl.accessorSpecifier.positionAfterSkippingLeadingTrivia
    case .error(let catchClause):
      return catchClause.catchItems.positionAfterSkippingLeadingTrivia
    }
  }
}

public enum LookupName {
  /// Identifier associated with the name.
  /// Could be an identifier of a variable, function or closure parameter and more.
  case identifier(Syntax, accessibleAfter: AbsolutePosition?)
  /// Declaration associated with the name.
  /// Could be class, struct, actor, protocol, function and more.
  case declaration(Syntax)
  /// Name introduced implicitly by certain syntax nodes.
  case implicit(ImplicitDecl)
  /// Represents equivalent names grouped together.
  /// - Important: The array should be non-empty.
  ///
  /// ### Example:
  /// ```swift
  /// switch X {
  /// case .a(let x), .b(let x):
  ///   print(x) // <-- lookup here
  /// }
  /// ```
  /// For lookup at the given position, the result
  /// contains only one name, that represents both `let x` declarations.
  case equivalentNames([LookupName])

  /// Syntax associated with this name.
  public var syntax: SyntaxProtocol {
    switch self {
    case .identifier(let syntax, _):
      return syntax
    case .declaration(let syntax):
      return syntax
    case .implicit(let implicitName):
      return implicitName.syntax
    case .equivalentNames(let names):
      return names.first!.syntax
    }
  }

  /// Identifier used for name comparison.
  public var identifier: Identifier {
    switch self {
    case .identifier(let syntax, _):
      return Identifier((syntax.asProtocol(SyntaxProtocol.self) as! IdentifiableSyntax).identifier)!
    case .declaration(let syntax):
      return Identifier((syntax.asProtocol(SyntaxProtocol.self) as! NamedDeclSyntax).name)!
    case .implicit(let kind):
      return kind.identifier
    case .equivalentNames(let names):
      return names.first!.identifier
    }
  }

  /// Position of this name.
  ///
  /// For some syntax nodes, their position doesn't reflect
  /// the position at which a particular name was introduced at.
  /// Such cases are function parameters (as they can
  /// contain two identifiers) and function declarations (where name
  /// is precided by access modifiers and `func` keyword).
  public var position: AbsolutePosition {
    switch self {
    case .identifier(let syntax, _):
      return (syntax.asProtocol(SyntaxProtocol.self) as! IdentifiableSyntax).identifier
        .positionAfterSkippingLeadingTrivia
    case .declaration(let syntax):
      return (syntax.asProtocol(SyntaxProtocol.self) as! NamedDeclSyntax).name.positionAfterSkippingLeadingTrivia
    case .implicit(let implicitName):
      return implicitName.position
    case .equivalentNames(let names):
      return names.first!.position
    }
  }

  /// Debug description of this lookup name.
  public var debugDescription: String {
    let sourceLocationConverter = SourceLocationConverter(fileName: "", tree: syntax.root)
    let location = sourceLocationConverter.location(for: position)
    let strName = identifier.name + " at: \(location.line):\(location.column)"

    switch self {
    case .identifier:
      let str = "identifier: \(strName)"

      if let accessibleAfter {
        let location = sourceLocationConverter.location(for: accessibleAfter)
        return str + " after: \(location.line):\(location.column)"
      } else {
        return str
      }
    case .declaration:
      return "declaration: \(strName)"
    case .implicit:
      return "implicit: \(strName)"
    case .equivalentNames(let names):
      return "Composite name: [ \(names.map(\.debugDescription).joined(separator: ", ")) ]"
    }
  }
}
/// Represents result from a specific scope.
public enum LookupResult {
  /// Scope and the names that matched lookup.
  case fromScope(Syntax, withNames: [LookupName])
  /// Indicates where to perform member lookup.
  case lookInMembers(Syntax)
  /// Indicates to lookup generic parameters of extended type.
  ///
  /// ### Example
  /// ```swift
  /// extension Foo {
  ///   func bar() {
  ///     let a = A() // <-- lookup here
  ///   }
  /// }
  /// ```
  /// For a lookup started at the marked position, `lookInGenericParametersOfExtendedType`
  /// will be included as one of the results prompting the client
  /// to lookup the generic parameters of of the extended `Foo` type.
  case lookInGenericParametersOfExtendedType(ExtensionDeclSyntax)
  /// Indicates this closure expression could introduce dollar identifiers.
  ///
  /// ### Example
  /// ```swift
  /// func foo() {
  ///   let a = {
  ///     $0 // <-- lookup here
  ///   }
  /// }
  /// ```
  /// When looking up for any identifier at the indicated position,
  /// the result will include `mightIntroduceDollarIdentifiers`
  /// result kind. If it's performed for a dollar identifier, `LookupName.dollarIdentifier`
  /// with the appropriate identifier will be used in the
  /// result associated with the closure expression inside `a`.
  case mightIntroduceDollarIdentifiers(ClosureExprSyntax)

  /// Associated scope.
  public var scope: SyntaxProtocol {
    switch self {
    case .fromScope(let scopeSyntax, _):
      return scopeSyntax
    case .lookInMembers(let lookInMemb):
      return lookInMemb
    case .lookInGenericParametersOfExtendedType(let extensionDecl):
      return extensionDecl
    case .mightIntroduceDollarIdentifiers(let closureExpr):
      return closureExpr
    }
  }

  /// Names that matched lookup.
  public var names: [LookupName] {
    switch self {
    case .fromScope(_, let names):
      return names
    case .lookInMembers(_),
      .lookInGenericParametersOfExtendedType(_),
      .mightIntroduceDollarIdentifiers(_):
      return []
    }
  }

  /// Debug description of this lookup name.
  public var debugDescription: String {
    var description =
      resultKindDebugName + ": "
      + ((Syntax(scope).asProtocol(SyntaxProtocol.self) as? ScopeSyntax)?.scopeDebugDescription ?? "NOT-A-SCOPE")

    switch self {
    case .lookInMembers:
      break
    default:
      if !names.isEmpty {
        description += "\n"
      }
    }

    for (index, name) in names.enumerated() {
      if index + 1 == names.count {
        description += "`-" + name.debugDescription
      } else {
        description += "|-" + name.debugDescription + "\n"
      }
    }

    return description
  }
}

extension [LookupResult] {
  /// Debug description this array of lookup results.
  public var debugDescription: String {
    return self.map(\.debugDescription).joined(separator: "\n")
  }
}