SE-0382: Expression Macros

Max_Desiatov · January 4, 2023, 12:13pm

Big +1 to the proposal from me.

Here's one use case I'd like to highlight. With the example repository I've been able to implement a compile-time conversion from string literals to arrays of bytes. This seems like a reasonable solution for environments where String type with Unicode handling is too heavyweight. Here's the example code for MacroExamplesPlugin.

import SwiftSyntax
import _SwiftSyntaxMacros

public struct UTF8ArrayMacro: ExpressionMacro {
  public static func expansion(
    of macro: MacroExpansionExprSyntax, in context: inout MacroExpansionContext
  ) throws -> ExprSyntax {
      guard let firstElement = macro.argumentList.first,
        let stringLiteral = firstElement.expression
          .as(StringLiteralExprSyntax.self),
        stringLiteral.segments.count == 1,
        case let .stringSegment(messageString)? = stringLiteral.segments.first
      else {
        throw CustomError.message("#utf8 macro requires a string literal")
      }

      return "\(raw: messageString.syntaxTextBytes)"
  }
}

Then in MacroExamplesLib:

public macro utf8(_ string: String) -> [UInt8] = MacroExamplesPlugin.UTF8ArrayMacro

And at the place of use:

_ = #utf8("Hello, world!").withUnsafeBytes {
    fputs($0.baseAddress, stdout)
}

When dead code elimination becomes advanced enough to strip out unused Unicode tables on platforms where everything is linked statically, this would allow producing binaries as small as those you can get with Rust or C. Of course, there are downsides to this approach, as you're losing interpolation and other niceties of String. But for applications where binary size is a top priority, this is still a big improvement.

Douglas_Gregor · January 4, 2023, 8:23pm

Your macro implementations looks good, thank you for working on it! I (very recently) fixed a compiler bug that caused the incorrect error messages, and with my locally-built compiler (and adding a bunch of @expression utterances), your result-builder macro works. My fix hasn't made it into a snapshot yet, but here's a one-off toolchain that contains it.

Yes, they do! I updated your example macro to this:

let stringClosure = #apply(resultBuilder: StringAppender.self) {
    "This"
    "is"
    "a"
    "sentence."
}

and taught the macro implementation to look for a trailing closure as well:

        guard
            let resultBuilderSelfExpr = node.argumentList.first?.expression.as(MemberAccessExprSyntax.self),
            let resultBuilderName = resultBuilderSelfExpr.base?.withoutTrivia().description,
            let originalClosure = node.argumentList.dropFirst().first?.expression.as(ClosureExprSyntax.self) ??
              node.trailingClosure
        else {
            throw SomeError()
        }

and it works fine.

Doug

allevato · January 4, 2023, 8:50pm

I finally had an opportunity to try writing some macros. Some thoughts:

JSON literal conversion

I was inspired by the JSON example above, so I wrote one that converts a JSON string literal to nested calls to JSON case initializers. I don't think this is precisely what the author of that post had in mind (it sounds like they want to construct specific types from the literal, more like Codable), but since we don't have the semantic info for that, I tried this simpler idea instead.

Expand for implementation

// In the macro module
public enum JSON {
  case null
  case string(String)
  case number(Double)
  case array([JSON])
  case object([String: JSON])
}

public macro jsonLiteral(_ string: String) -> JSON = MacroExamplesPlugin.JSONLiteralMacro

// Plug-in implementation
import Foundation
import SwiftSyntax
import SwiftSyntaxBuilder
import _SwiftSyntaxMacros

private func jsonExpr(for jsonValue: Any) throws -> ExprSyntax {
  switch jsonValue {
  case is NSNull:
    return "JSON.null"

  case let string as String:
    return "JSON.string(\(literal: string))"

  case let number as Double:
    return "JSON.number(\(literal: number))"

  case let array as [Any]:
    var elements = [ArrayElementSyntax]()
    for element in array {
      let elementExpr = try jsonExpr(for: element)
      elements.append(
        ArrayElementSyntax(
          expression: elementExpr,
          trailingComma: .commaToken()))
    }
    let arrayLiteral = ArrayExprSyntax(
      elements: ArrayElementListSyntax(elements))
    return "JSON.array(\(arrayLiteral))"

  case let dictionary as [String: Any]:
    guard !dictionary.isEmpty else {
      return "JSON.object([:])"
    }

    var elements = [DictionaryElementSyntax]()
    for (key, value) in dictionary {
      let keyExpr = StringLiteralExprSyntax(content: key)
      let valueExpr = try jsonExpr(for: value)
      elements.append(
        DictionaryElementSyntax(
          keyExpression: keyExpr,
          valueExpression: valueExpr,
          trailingComma: .comma))
    }
    let dictionaryLiteral = DictionaryExprSyntax(
      content: .elements(DictionaryElementListSyntax(elements)))
    return "JSON.object(\(dictionaryLiteral))"

  default:
    throw CustomError.message("Invalid type in deserialized JSON: \(type(of: jsonValue))")
  }
}

public struct JSONLiteralMacro: ExpressionMacro {
  public static func expansion(
    of macro: MacroExpansionExprSyntax,
    in context: inout MacroExpansionContext
  ) throws -> ExprSyntax {
    guard let firstElement = macro.argumentList.first,
      let stringLiteral = firstElement.expression
        .as(StringLiteralExprSyntax.self),
      stringLiteral.segments.count == 1,
      case let .stringSegment(jsonString)? = stringLiteral.segments.first
    else {
      throw CustomError.message("#jsonLiteral macro requires a string literal")
    }

    let json = try JSONSerialization.jsonObject(
      with: jsonString.content.text.data(using: .utf8)!,
      options: [.fragmentsAllowed])
    let jsonCaseExpr = try jsonExpr(for: json)
    if let leadingTrivia = macro.leadingTrivia {
      return jsonCaseExpr.withLeadingTrivia(leadingTrivia)
    }
    return jsonCaseExpr
  }
}

This worked really nicely (after stumbling on some SwiftSyntax corruption issues ). The following invocation produced the source code I expected:

let json: JSON = #jsonLiteral("""
  {
    "name": "Bojack Horseman",
    "species": "horse",
    "age": 59,
    "friends": [
      "Diane Nguyen",
      "Mr. Peanutbutter",
      "Todd Chavez"
    ],
    "selfControl": null
  }
  """)
// JSON.object(["selfControl":JSON.null,"name":JSON.string("Bojack Horseman"),
// "species":JSON.string("horse"),"friends":JSON.array([
// JSON.string("Diane Nguyen"),JSON.string("Mr. Peanutbutter"),
// JSON.string("Todd Chavez"),]),"age":JSON.number(59.0),])

(Naturally there are some problems like key ordering being different due to JSONSerialization and NSDictionary but that's not relevant here.)

My takeaways here are:

Using string interpolation to construct nodes from a combination of literal Swift code and substituted content is a joy to use.
When you have to drop down a level to raw initializers, SwiftSyntax/SwiftSyntaxBuilder still provides nice defaults in many places for fixed structural tokens. For example, when creating an ArrayLiteralExpr, you don't have to provide the [ and ] tokens manually.
But, we should extend the builder functionality in SwiftSyntax and/or provide additional helpers to make common functionality much more approachable for users. The average macro author shouldn't have to deal with subtle things like including trailing commas in arrays/dictionaries/argument lists, nor have to be aware of the different syntax node representation used for the content of an empty dictionary vs. a dictionary with elements. If SwiftSyntaxBuilder already has some of this, I've missed it, in which case it's a documentation problem instead. My code is probably not the simplest form possible; since SwiftSyntax is such a large API surface, we should figure out how to strongly push users toward the simplest/cleanest APIs.

Trivia handling

I observed that in your FontLiteralMacro implementation, you write this:

if let leadingTrivia = macro.leadingTrivia {
    return initSyntax.withLeadingTrivia(leadingTrivia)
}
return initSyntax

What is the purpose of retaining the leading trivia—to preserve any comments that may precede it if the macro expansion is printed for debugging? I can see this being more important for declaration macros where you'd want to be able to scrape for documentation comments after expansion.

If trivia had to be manually preserved, I would have expected something like this to fail:

let x = #someMacro(...) + y

Where if the expansion didn't preserve the space in #someMacro(...)'s trailing trivia, you'd end up with an expansion let x = VALUE+ y, which would fail to parse because + is now a postfix operator instead of an infix operator. But I tested that and it appears to be still parsed as though it was let x = VALUE + y, so I'm unclear on what the actual trivia behavior is here.

EDIT: I may be able to answer my own question here. Since the macro is applied to the already type-checked AST, it knows that #macro(...) + y must already be an infix operator even if the trivia for the expansion changes?

Can the macro infrastructure manage trivia automatically so that <leading trivia>#macro(...)<trailing trivia> is always transformed to <leading trivia><expanded node><trailing trivia>, so that the macro can never remove or replace trivia? I could see us wanting to merge the original trivia with the trivia attached to the expanded node, but not allow it to be completely replaced.

In fact, I wonder if it's problematic for macro expansions to be able to see trivia at all. I wrote a really stupid macro:

// Returns `string` as an integer, but also add the number in the preceding comment if there is one.
public macro theSameNumber(_ string: String) -> Int = MacroExamplesPlugin.TheSameNumberMacro

let n1 = #theSameNumber("123")
print(n1)  // 123

let n2 =
  // 5
  #theSameNumber("123")
print(n2)  // 128

Allowing folks to have semantically significant comments feels like a bad idea, but I can also see value in having access to trivia for other kinds of macros. For example, a declaration macro could treat a preceding doc comment as a template to splat out new doc comments for the declarations that it generates. I'm not sure what's the best way to square these goals; should we only provide trivia to certain types of macros (e.g., declarations but not expressions)? Should we require any kind of macro to explicitly opt-in if it wants the trivia? Or do we just accept that people can do bizarre things with it?

George · January 4, 2023, 9:22pm

One curiosity I noticed here is that you can have macros which do something different if a closure is specified as an argument vs as a trailing closure. Not sure what conclusions to draw from this, just feels a bit off to me.

Alejandro_Martinez · January 4, 2023, 10:17pm

This made me wonder, how will we know which version if SwiftSyntax are we coding against?

Douglas_Gregor · January 4, 2023, 10:39pm

Yes, that’s a good point. We should strip trivia when providing the node to the macro, because one should not be able to affect the surrounding trivia. I'll fix this in the implementation.

Personally, I'm of the opinion that we should accept that people can do bizarre things with comments, and be okay with that.

Doug

Douglas_Gregor · January 4, 2023, 10:47pm

In the final implementation, the version of SwiftSyntax you're coding against is whatever your package depends on. In today's prototype (where the macro implementations are built against the shared libraries in the toolchain), there's no good answer.

Doug

allevato · January 4, 2023, 10:55pm

We could go so far as to strip the trivia from all the tokens recursively under the macro expansion node, but perhaps that extra processing overhead isn't worth it.

Although, since the final implementation (using a standalone executable instead of a dylib) will need to do some kind of processing anyway to convert the in-memory tree to something that can be sent over IPC, maybe that would lessen the impact.

I don't have a strong opinion here though, and I'm leaning weakly toward keeping the implementation efficient even if the cost is letting people have weird dependencies on comments and whitespace.

ktoso · January 4, 2023, 11:14pm

I'm not sure where things like custom runtime metadata attributes end up in syntax... is that as trivia or something else?

I can definitely foresee needing to handle these in macros, comments not so much though.

allevato · January 4, 2023, 11:22pm

"Trivia" encompasses whitespace, comments, and the occasional garbage/unexpected text: swift-syntax/Trivia.swift at main · apple/swift-syntax · GitHub.

Anything with syntactic/semantic significance will have its own syntax node; attributes for example are represented by AttributeSyntax (there's currently a distinction between those which are built-in attributes and CustomAttributeSyntax for property wrappers, but it looks like that distinction is being removed).

ktoso · January 5, 2023, 12:17am

Thank you for clarifying!

Douglas_Gregor · January 5, 2023, 12:21am

s-k:

Accessing the nearest definition would help. However, if #printArguments also wanted to print the type name of self in case of a method, this would not be enough.
[snip]
There are some solutions I can think of. I am not sure if any of them are feasible:

Macros could declare the need to access the parent nodes, disabling incremental builds.

Maybe the compiler could provide some sort of declaration hierarchy to the macro, such as name, signature and attributes of the enclosing function, then attributes, type (class, struct, enum, etc.), name and inheritance of the type containing the function, maybe even the type containing this type, etc. This would solve example 3 only in part.

Maybe the whole syntax tree could be cached and the relevant parts be updated during incremental compilation.

I don't think the third option is really feasible, because it means that macro implementations would become much more deeply entwined in incremental compilation. And I worry that the first option is an anti-feature: one errant macro could blow up your incremental build times.

I suspect the middle bullet might lead toward the right answer. From an incremental build standpoint, if any if your parents changes directly---say, you're in a function that adds a new parameter, or an extension that gets a new conditional requirement---you're going to have to get recompiled no matter what. So what if we kept the "spine" on the parent nodes in tact, but removed all child nodes that could be independent? So if we started with this code:

struct A<T> { }
extension A where T: Addable {
  func f() { }
  func g(a: Int, b: Int) {
    let a = 1
    let b = #myMacro + a
  }
}

we would give the implementation of myMacro something like this:

extension A where T: Addable {
  func g(a: Int, b: Int) {
    #myMacro
  }
}

this would let you ask syntactic questions about the parents of your macro expansion, without exposing parts of the source file that should be able to evolve independently of the code seen by the macro implementation. Now, this isn't going to solve all of the problems you enumerated. For example, you won't know whether A is a class, struct, protocol, or whatever, and you won't know whether self is accessible at all, but it does provide quite a bit of context.

I went ahead and write up a pitch for declaration macros so we can explore further down the path and, hopefully, learn more. That said, the Language Working Group (and in the past, the Core Team) has been clear that for large features that are split across several different proposals (like this one), it's absolutely willing to go back and review amendments to accepted proposals if we learn more while considering later proposals. We did this with concurrency two years back, and regular expressions last year, and will do the same for macros.

Yes, I think we can automate away a lot of the boilerplate checking of macro arguments. However, it won't be done by the compiler, because the compiler isn't going to be able to form calls to arbitrary functions like this when invoking the macro. Instead, I would view this as some improvements we can make to the SwiftSyntaxMacros library itself. For example:

protocol SingleArgumentExpressionMacro: ExpressionMacro {
  associatedtype ArgumentSyntax: ExprSyntaxProtocol
  static func expansion(of node: MacroExpansionExprSyntax, in context: inout MacroExpansionContext,
                                     argument: ArgumentType) throws -> ExprSyntax
}

extension ExpressionMacro where Self: SingleArgumentExpressionMacro {
  static func expansion(of node: MacroExpansionExprSyntax, in context: inout MacroExpansionContext)
      throws -> ExprSyntax {
    guard let arg = node.argumentList.first?.element?.as(ArgumentSyntax.self) else {
      throw BadArgumentError()
    }
    return try expansion(of: node, in: context, argument: arg)
  }
}

If we had associated type packs, we could make this handle an arbitrary number of arguments, which would be... amazing. Anyway, I think this is a good direction, and it's something we should pursue within SwiftSyntaxMacros. However, I don't think these improvements should be part of this proposal, or even part of the evolution process... we should treat it like any other API improvements and extensions to swift-syntax, through pull requests and discussion there.

soumyamahunt:

But what about scenario where I might want to construct type based on the input provided, i.e. building a type from json string:
macro jsonObj<T>(_: String) -> T

let obj = #jsonObj("""
{
"one": 1,
"two": 2
}
""")

I suppose it depends on what you want to do with the type. You could declare some local types, form a result from that, and have jsonObj return an any <some protocol> to hide the returned type. If you want to actually create types that are usable elsewhere and aren't erased, you'll need something more like my follow-up pitch for declaration macros.

soumyamahunt:

Or registering routes in a server framework:

app.get("hello/:name") { req in
    // req could be inferred from the provided string as
    // SomeGenericStruct<(name: String)>
    let name = req.parameters.name
    return "Hello, \(name)!"
}

This, again, might be something that would need declaration macros, if the goal is to have the macro parse the string and produce new type information for it. Alternatively, and I suppose I could cover this as a future direction, I could imagine an annotation for a macro parameter that says "arguments to this parameter are not type checked at all". Arguments to such parameters would lose all of the benefits discussed in the section on type-checking arguments, but it would let you form more-specific type information for use in checking those arguments later. I'm not sure how I feel about such a feature, but it is an extension that could fit into the model.

Neat! Thank you.

I'm going to backtrack a bit on my earlier enthusiasm for this change. Macros are so much more expression-like that allowing one to omit the () for macros, when it has a different behavior for functions, that introducing an inconsistency here feels worse than having macro authors decide between : and ->.

Doug

Douglas_Gregor · January 5, 2023, 1:02am

A couple of things have come up in the review that warrant small proposal revisions. I've collected them in this pull request. They are:

Make MacroExpansionContext a class, because the state involving diagnostics and unique names needs to be shared.
Allow macro parameters to have default arguments, with restrictions on what can occur within a default argument.
Clarify that macro expansion cannot be recursive.
Rename createUniqueLocalName to createUniqueName; the names might not always be local in scope.

Doug

tgoyne · January 5, 2023, 7:58pm

Stopping people from doing weird ill-advised things with macros is pretty much impossible. I don't see a good use-case for accessing comments, but if not exposing comments requires any amount of effort at all then I think that effort is much better invested in making even the smallest amount of progress towards providing a proper way to do whatever they'd be doing with comments.

tera · January 5, 2023, 8:26pm

Cool stuff. Assuming function returning a value can this somehow print:

    enter: doSomething(42, b: 256)
    exit:  doSomething(42, b: 256) -> 53

where 53 is the function result?

Hope we end up with something much nicer than the often hated C macros.

blangmuir · January 5, 2023, 8:49pm

I worry that unless we invent a parallel type hierarchy for "incomplete" syntax nodes that do not provide the same interface, this will be an attractive nuisance.

It looks like you can access all children of a parent node, but you cannot
It looks like you could determine relative source locations within the parent chain, but you cannot (the lengths may be shortened due to children).
You can sometimes get more information than others; e.g. in your example you don't know it's a struct/class, but you would sometimes know that if you use a macro inside the original body. This is a somewhat general issue with syntax tress of course.

Jon_Shier · January 5, 2023, 9:14pm

This is late, and it seems like this proposal should have an official revision, but for posterity:

What is your evaluation of the proposal?

nil, as this proposal has far reaching impact that is impossible to assess without full development flow integration.

-1 as proposed due to no guarantee of said flow integration. As is, this proposal seems to introduce APIs that are rather hard to build and impossible to debug.

Is the problem being addressed significant enough to warrant a change to Swift?

Given the motivation for the proposal seems to be "we should have expression macros", it, of course, addresses that problem. However, the larger, implicit issue of, some expressions can't be built in Swift may or may not be a problem. It doesn't seem to be a problem, per se. At most it's a missing feature with a variety of solutions. I would say some types of expressions are probably desirable to be expressed directly in code, but not all, so all I can say here is "maybe, ".

There are probably better solutions for the vast majority of examples provided in this thread, including more limited versions of compile time evaluation.

Does this proposal fit well with the feel and direction of Swift?

In that result builders are precedent for enabling special language modes in Swift, perhaps, but overall I don't think macros fit well in Swift's previously espoused goals, at least at a high level. Given a macro's rather impenetrable nature, especially now, they're basically magic that can be introduced from any package. Is progressive disclosure really just "accept the magic until you want to learn about it"?

If you have used other languages or libraries with a similar feature, how do you feel that this proposal compares to those?

I haven't used modern macros from Rust. I still have nightmares about Obj-C codebases that used C macros extensively. These are different and much better in many ways, but worse in others (textual macros were easily inspectable and could be reasoned about, if only because macro authors needed to reason about them in the same way).

How much effort did you put into your review? A glance, a quick reading, or an in-depth study?

Can't say I have expertise here but I did try to read the pitch and proposal and tried the example repo. It was largely inscrutable given there's no documentation and no tooling help, as well as no debugger, so it doesn't seem super useful for anyone who isn't already intimately familiar with SwiftSyntax.

s-k · January 6, 2023, 7:35am

Thank you! I will try to make time to work on it some more and give feedback on what I learned.

I think this would need a declaration macro that must be applied to the function itself.

esummers · January 6, 2023, 6:00pm

I'm +1 on the vision for macros so far.

I personally think this still fits in with progressive disclosure. Particularly since I don't see macro writing as something one will generally need to learn. They are in the domain of DSL writers where it is natural to work with syntax nodes. I'd rather see this then some sort of macro sub-language which I think is the only other alternative.

Swift needs magic for things like Codable and I'd rather see that magic in libraries than in the compiler. By making it difficult to write macros it makes them less likely to be abused or used in ways that don't result in straightforward transformation.

I think debugging for a macro creator will be reasonable, but I think it would be nice if IDEs like Xcode could somehow preview the syntax node transformation to make it easier to peek behind the curtain.

allevato · January 6, 2023, 6:26pm

I was able to get surprisingly far with just an expression macro that would "wrap" the function body in a trailing closure, which at least makes it easy to capture the return value and do something with it before returning. But yeah, printing the arguments isn't possible unless you pass them to the macro explicitly. By abusing the heck out of closures and implicit single-expression returns, I was able to write this:

func adder(_ a: Int, _ b: Int) -> Int {
  #trace(a, b) {
    let result = a + b
    return result
  }
}

which gets transformed to this:

func adder(_ a: Int, _ b: Int) -> Int {
  {
    print("enter: \(#function) <- ", terminator: "");
    {print("a = \(a), ", terminator: "");print("b = \(b), ", terminator: "");}()
    print()
    let __macro_local_0 = {
      let result = a + b
      return result
    }()
    print("exit:  \(#function) -> \(__macro_local_0)")
    return __macro_local_0
  }()
}

with this macro:

@expression public macro trace<Result>(
  _ args: Any..., body: () -> Result
) -> Result = #externalMacro(module: "MacroExamplesPlugin", type: "TraceMacro")

Then, if I invoke let x = adder(100, 200), I get this output:

enter: adder(_:_:) <- a = 100, b = 200, 
exit:  adder(_:_:) -> 300

Expand for plug-in implementation

import SwiftSyntax
import SwiftSyntaxBuilder
import _SwiftSyntaxMacros

public struct TraceMacro: ExpressionMacro {
  public static func expansion(
    of macro: MacroExpansionExprSyntax,
    in context: inout MacroExpansionContext
  ) throws -> ExprSyntax {
    guard let closure = macro.trailingClosure else {
      throw CustomError.message("#trace must have a trailing closure")
    }

    let argPrinterClosure = ClosureExprSyntax(
      statements: CodeBlockItemListSyntax {
        for arg in macro.argumentList {
          let argString = String(describing: arg.withTrailingComma(false))
          let printExpr: ExprSyntax =
            """
            print("\(raw: argString) = \\(\(arg.expression)), ", terminator: "")
            """
          CodeBlockItemSyntax(item: .expr(printExpr), semicolon: .semicolonToken())
        }
      }
    )

    let resultIdentifier = context.createUniqueLocalName()
    return """
      {
        print("enter: \\(#function) <- ", terminator: "");
        \(argPrinterClosure)()
        print()
        let \(resultIdentifier) = \(closure)()
        print("exit:  \\(#function) -> \\(\(resultIdentifier))")
        return \(resultIdentifier)
      }()
      """ as ExprSyntax
  }
}

Note: I had to use the toolchain from this comment above from Doug to get around expanded variable declaration issues.

I'm not exactly proud of the implementation of that macro, but it was a really fun exercise to see how far I could get with just the expression features.