Still no alternatives to writing ASCII literals in hexadecimal?

i really hate to beat the ASCII literals horse again. but i recently did some investigation into generated assembly when using higher-level constructs like Unicode.Scalar, and the results are still quite discouraging.

here’s a pretty simple “UTF-8” buffer type:

extension HTML
{
    @frozen public
    struct UTF8
    {
        @usableFromInline internal
        var bytes:[UInt8]

        @inlinable internal
        init(bytes:[UInt8] = [])
        {
            self.bytes = bytes
        }
    }
}
extension HTML.UTF8
{
    @inlinable public mutating
    func append(_ codeunit:UInt8)
    {
        self.bytes.append(codeunit)
    }
    @inlinable public mutating
    func append(_ codepoint:Unicode.Scalar)
    {
        self.bytes.append(contentsOf: codepoint.utf8)
    }
    @inlinable public mutating
    func append(_ string:some StringProtocol)
    {
        self.bytes.append(contentsOf: string.utf8)
    }
}

we can use this type either by appending a Unicode.Scalar literal, or a raw UInt8 UTF-8 code unit:

public static
func test1() -> UTF8
{
    var utf8:UTF8 = .init()
    utf8.append("x" as Unicode.Scalar)
    return utf8
}
public static
func test2() -> UTF8
{
    var utf8:UTF8 = .init()
    utf8.append(0x78)
    return utf8
}

i would have really hoped the first spelling, with the Unicode.Scalar literal would be just as good as the version that appends a hexadecimal integer literal. but alas, this is not the case.

here’s the version with the hexadecimal literal:

godbolt

static output.HTML.test2() -> output.HTML.UTF8:
        push    r14
        push    rbx
        push    rax
        mov     rcx, qword ptr [rip + _swiftEmptyArrayStorage@GOTPCREL]
        mov     esi, 1
        xor     edi, edi
        mov     edx, 1
        call    (generic specialization <serialized, Swift.UInt8> of Swift._ContiguousArrayBuffer._consumeAndCreateNew(bufferIsUnique: Swift.Bool, minimumCapacity: Swift.Int, growForAppend: Swift.Bool) -> Swift._ContiguousArrayBuffer<A>)
        mov     r14, qword ptr [rax + 16]
        mov     rcx, qword ptr [rax + 24]
        mov     rdx, rcx
        shr     rdx
        lea     rbx, [r14 + 1]
        cmp     rdx, r14
        jbe     .LBB4_1
.LBB4_2:
        mov     qword ptr [rax + 16], rbx
        mov     byte ptr [rax + r14 + 32], 120
        add     rsp, 8
        pop     rbx
        pop     r14
        ret
.LBB4_1:
        xor     edi, edi
        cmp     rcx, 2
        setae   dil
        mov     rsi, rbx
        mov     edx, 1
        mov     rcx, rax
        call    (generic specialization <serialized, Swift.UInt8> of Swift._ContiguousArrayBuffer._consumeAndCreateNew(bufferIsUnique: Swift.Bool, minimumCapacity: Swift.Int, growForAppend: Swift.Bool) -> Swift._ContiguousArrayBuffer<A>)
        jmp     .LBB4_2

generic specialization <serialized, Swift.UInt8> of Swift._ContiguousArrayBuffer._consumeAndCreateNew(bufferIsUnique: Swift.Bool, minimumCapacity: Swift.Int, growForAppend: Swift.Bool) -> Swift._ContiguousArrayBuffer<A>:
        push    rbp
        push    r15
        push    r14
        push    rbx
        push    rax
        mov     rbx, rcx
        mov     r15, rsi
        mov     ebp, edi
        test    dl, 1
        je      .LBB13_5
        mov     rax, qword ptr [rbx + 24]
        mov     rcx, rax
        shr     rcx
        cmp     rcx, r15
        jge     .LBB13_2
        movabs  rdx, 4611686018427387904
        add     rcx, rdx
        js      .LBB13_15
        and     rax, -2
        cmp     rax, r15
        cmovg   r15, rax
        jmp     .LBB13_5
.LBB13_2:
        mov     r15, rcx
.LBB13_5:
        mov     r14, qword ptr [rbx + 16]
        cmp     r15, r14
        cmovle  r15, r14
        test    r15, r15
        je      .LBB13_6
        lea     rdi, [rip + (demangling cache variable for type metadata for Swift._ContiguousArrayStorage<Swift.UInt8>)]
        call    __swift_instantiateConcreteTypeFromMangledName
        add     r15, 32
        mov     edx, 7
        mov     rdi, rax
        mov     rsi, r15
        call    swift_allocObject@PLT
        mov     r15, rax
        mov     rdi, rax
        call    malloc_usable_size@PLT
        add     rax, rax
        add     rax, -64
        mov     qword ptr [r15 + 16], r14
        mov     qword ptr [r15 + 24], rax
        jmp     .LBB13_8
.LBB13_6:
        mov     r15, qword ptr [rip + _swiftEmptyArrayStorage@GOTPCREL]
.LBB13_8:
        lea     rdi, [r15 + 32]
        lea     rsi, [rbx + 32]
        test    bpl, 1
        je      .LBB13_9
        cmp     r15, rbx
        jne     .LBB13_12
        lea     rax, [rsi + r14]
        cmp     rax, rdi
        ja      .LBB13_13
.LBB13_12:
        mov     rdx, r14
        call    memmove@PLT
.LBB13_13:
        mov     qword ptr [rbx + 16], 0
        jmp     .LBB13_14
.LBB13_9:
        mov     rdx, r14
        call    memcpy@PLT
.LBB13_14:
        mov     rdi, rbx
        call    swift_release@PLT
        mov     rax, r15
        add     rsp, 8
        pop     rbx
        pop     r14
        pop     r15
        pop     rbp
        ret
.LBB13_15:
        ud2

and here is the version with "x":

static output.HTML.test1() -> output.HTML.UTF8:
        push    r13
        sub     rsp, 16
        mov     rax, qword ptr [rip + _swiftEmptyArrayStorage@GOTPCREL]
        mov     qword ptr [rsp + 8], rax
        lea     r13, [rsp + 8]
        mov     edi, 120
        call    (generic specialization <serialized, Swift.UInt8, Swift.Unicode.Scalar.UTF8View> of Swift.Array.append<A where A == A1.Element, A1: Swift.Sequence>(contentsOf: __owned A1) -> ())
        mov     rax, qword ptr [rsp + 8]
        add     rsp, 16
        pop     r13
        ret

generic specialization <serialized, Swift.UInt8, Swift.Unicode.Scalar.UTF8View> of Swift.Array.append<A where A == A1.Element, A1: Swift.Sequence>(contentsOf: __owned A1) -> ():
        push    rbp
        push    r15
        push    r14
        push    r12
        push    rbx
        sub     rsp, 64
        mov     ebp, edi
        mov     r12d, 1
        cmp     edi, 128
        jb      .LBB10_3
        mov     r12d, 2
        cmp     ebp, 2047
        jbe     .LBB10_3
        cmp     ebp, 65536
        mov     r12d, 4
        sbb     r12, 0
.LBB10_3:
        mov     rbx, qword ptr [r13]
        mov     r15, qword ptr [rbx + 16]
        add     r15, r12
        jo      .LBB10_11
        mov     rdi, rbx
        call    swift_isUniquelyReferenced_nonNull_native@PLT
        test    al, al
        je      .LBB10_6
        mov     r14, qword ptr [rbx + 24]
        shr     r14
        cmp     r14, r15
        jge     .LBB10_7
.LBB10_6:
        mov     rcx, qword ptr [rbx + 16]
        cmp     rcx, r15
        cmovg   r15, rcx
        movzx   edi, al
        mov     rsi, r15
        mov     edx, 1
        mov     rcx, rbx
        call    (generic specialization <serialized, Swift.UInt8> of Swift._ContiguousArrayBuffer._consumeAndCreateNew(bufferIsUnique: Swift.Bool, minimumCapacity: Swift.Int, growForAppend: Swift.Bool) -> Swift._ContiguousArrayBuffer<A>)
        mov     rbx, rax
        mov     r14, qword ptr [rax + 24]
        shr     r14
.LBB10_7:
        mov     rax, qword ptr [rbx + 16]
        sub     r14, rax
        lea     rsi, [rbx + rax]
        add     rsi, 32
        lea     rdi, [rsp + 48]
        mov     rdx, r14
        mov     ecx, ebp
        call    (generic specialization <serialized, Swift.Unicode.Scalar.UTF8View> of (extension in Swift):Swift.Sequence._copySequenceContents(initializing: Swift.UnsafeMutableBufferPointer<A.Element>) -> (A.Iterator, Swift.Int))
        cmp     rax, r12
        jl      .LBB10_12
        mov     rbp, qword ptr [rbx + 16]
        add     rbp, rax
        jo      .LBB10_13
        mov     qword ptr [rbx + 16], rbp
        cmp     rax, r14
        je      .LBB10_14
.LBB10_10:
        mov     qword ptr [r13], rbx
        add     rsp, 64
        pop     rbx
        pop     r12
        pop     r14
        pop     r15
        pop     rbp
        ret
.LBB10_11:
        ud2
.LBB10_12:
        ud2
.LBB10_13:
        ud2
.LBB10_14:
        lea     rdi, [rip + (demangling cache variable for type metadata for [Swift.UInt8])]
        call    __swift_instantiateConcreteTypeFromMangledName
        cmp     rax, qword ptr [rip + ($ss7UnicodeO6ScalarV8UTF8ViewVN)@GOTPCREL]
        je      .LBB10_10
        mov     r15, qword ptr [rsp + 56]
        mov     esi, dword ptr [rsp + 48]
        cmp     esi, 127
        ja      .LBB10_17
        mov     r14d, 1
        cmp     r15, 1
        je      .LBB10_10
        jmp     .LBB10_22
.LBB10_17:
        cmp     esi, 2047
        ja      .LBB10_19
        mov     r14d, 2
        cmp     r15, 2
        je      .LBB10_10
        jmp     .LBB10_22
.LBB10_19:
        cmp     esi, 65535
        ja      .LBB10_21
        mov     r14d, 3
        cmp     r15, 3
        je      .LBB10_10
        jmp     .LBB10_22
.LBB10_21:
        mov     r14d, 4
        cmp     r15, 4
        je      .LBB10_10
.LBB10_22:
        mov     rdi, r15
        mov     dword ptr [rsp + 4], esi
        call    ($ss7UnicodeO6ScalarV8UTF8ViewVys5UInt8VSicig)@PLT
        cmp     r15, r14
        jae     .LBB10_46
        mov     rdx, r15
        inc     rdx
        jo      .LBB10_47
        mov     edi, eax
        mov     ecx, dword ptr [rsp + 4]
        cmp     ecx, 65536
        mov     eax, 4
        sbb     rax, 0
        mov     qword ptr [rsp + 8], rax
        jmp     .LBB10_26
.LBB10_25:
        mov     rax, rbp
        mov     qword ptr [rbx + 16], rbp
.LBB10_26:
        mov     rax, qword ptr [rbx + 24]
        mov     r8, rax
        shr     r8
        lea     rsi, [rbp + 1]
        cmp     r8, rsi
        jl      .LBB10_42
.LBB10_27:
        mov     rax, rbp
        sub     rax, r8
        mov     qword ptr [rsp + 40], rax
        jge     .LBB10_25
        mov     qword ptr [rsp + 16], r8
        lea     rax, [rbx + rbp]
        add     rax, 32
        mov     qword ptr [rsp + 32], rax
        xor     r14d, r14d
        mov     qword ptr [rsp + 24], rdx
.LBB10_29:
        mov     rax, qword ptr [rsp + 32]
        mov     byte ptr [rax + r14], dil
        cmp     ecx, 127
        ja      .LBB10_31
        lea     rax, [rdx + r14]
        dec     rax
        mov     r15d, 1
        jmp     .LBB10_37
.LBB10_31:
        cmp     ecx, 2047
        ja      .LBB10_33
        lea     rax, [rdx + r14]
        add     rax, -2
        mov     r15d, 2
        jmp     .LBB10_37
.LBB10_33:
        cmp     ecx, 65535
        ja      .LBB10_35
        lea     rax, [rdx + r14]
        add     rax, -3
        jmp     .LBB10_36
.LBB10_35:
        lea     rax, [rdx + r14]
        add     rax, -4
.LBB10_36:
        mov     r15, qword ptr [rsp + 8]
.LBB10_37:
        test    rax, rax
        je      .LBB10_43
        lea     r12, [rdx + r14]
        mov     rdi, r12
        mov     esi, ecx
        call    ($ss7UnicodeO6ScalarV8UTF8ViewVys5UInt8VSicig)@PLT
        cmp     r12, r15
        jae     .LBB10_44
        inc     r12
        jo      .LBB10_45
        mov     edi, eax
        inc     r14
        mov     rax, qword ptr [rsp + 40]
        add     rax, r14
        mov     ecx, dword ptr [rsp + 4]
        mov     rdx, qword ptr [rsp + 24]
        jne     .LBB10_29
        mov     rdx, r12
        mov     rax, qword ptr [rsp + 16]
        mov     rbp, rax
        mov     qword ptr [rbx + 16], rax
        jmp     .LBB10_26
.LBB10_42:
        mov     r14, rdx
        mov     r15d, edi
        xor     edi, edi
        cmp     rax, 2
        setae   dil
        mov     edx, 1
        mov     rcx, rbx
        call    (generic specialization <serialized, Swift.UInt8> of Swift._ContiguousArrayBuffer._consumeAndCreateNew(bufferIsUnique: Swift.Bool, minimumCapacity: Swift.Int, growForAppend: Swift.Bool) -> Swift._ContiguousArrayBuffer<A>)
        mov     edi, r15d
        mov     rdx, r14
        mov     ecx, dword ptr [rsp + 4]
        mov     rbx, rax
        mov     r8, qword ptr [rax + 24]
        shr     r8
        jmp     .LBB10_27
.LBB10_43:
        lea     rax, [r14 + rbp]
        inc     rax
        mov     qword ptr [rbx + 16], rax
        jmp     .LBB10_10
.LBB10_44:
        ud2
.LBB10_45:
        ud2
.LBB10_46:
        ud2
.LBB10_47:
        ud2

obviously most of this assembly never executes on the commonly-taken path. but i am especially alarmed by this section of the Unicode.Scalar-based invocation:

        sub     rsp, 64
        mov     ebp, edi
        mov     r12d, 1
        cmp     edi, 128 // <- we are still checking ASCII-ness ???
        jb      .LBB10_3

it seems that the compiler refuses to inline the append(_ codepoint:Unicode.Scalar) function, probably because the logic needed to handle multi-byte encodings would result in a ridiculous amount of binary bloat, and this inability to inline has disasterous effects for a caller that just needs to write a '<' or a '>' to the output buffer.

i think it is inexcusable that in swift 5.8, we are still forced to spell ASCII characters in hexadecimal, which makes for one of the few situations where C++ code is more readable than its swift equivalent. how can we do better?

1 Like

My general recommendation is just to use UInt8(ascii: "x") if you don't need an actual scalar literal.

2 Likes

well, that’s not a particularly fluent API. and we probably don’t want to make a habit of force-unwrapping optionals at the call site.

i could push it into the append(_ codepoint:Unicode.Scalar) function. but then i would have a public API that claims to support any unicode scalar input, but crashes on non-ASCII input.

You can now write an expression macro to spell this #ascii("x")! Woooooo

2 Likes

…well,, we can’t now, since expression macros are still an experimental feature.

but in the longer term, why can’t the expression macro be subsumed into an ExpressibleByUnicodeScalarLiteral conformance? i would much rather have an struct _CompileTimeKnownASCII shim that has an ExpressibleByUnicodeScalarLiteral conformance hooked up with the macro, than write #ascii("<") at every call site.

    @_documentation(visibility: public)
    @inlinable public mutating
    func append(_ ascii:_CompileTimeKnownASCII)
    {
        self.bytes.append(ascii.value)
    }
1 Like

A character might consist of several Unicode scalars, and a Unicode scalar might consist of several UInt8 in its UTF-8 representation. When you have a Unicode scalar, this obviously cannot be appended to a UInt8 buffer. It seems that you would like to have an efficient “ASCII” type this is actually a (limited) UInt8 value and where the compiler is smart about "x" as ASCII.

But I do not completely understand your use case. When you are operating with ASCII characters in your application that you would like to efficiently add to your UTF-8 buffer, shouldn‘t those ASCII characters already be UInt8 (and checked to be less than 128)? If you only add a small amount of specific ASCII character e.g. < for your HTML tags, you might as well work with some accordingly named UInt8 constants (I suppose not every character to be appended to your HTML has to be ASCII).

1 Like

…Also note that ”ASCII” is colloquial for “ISO/IEC 646:1991 US ASCII”, i.e. the “US” version of ISO/IEC 646 (in the current version), which equivalent to the Unicode block „C0 Controls and Basic Latin“ in UTF-8 encoding without BOM. There would surely be a better name for it instead of the organization which participated in that part of ISO/IEC 646 :wink:

Update: The name “ASCII” confuses an encoding (which at least should be called “US ASCII”) with subset of Unicode scalars. It could be named “US ASCII encodable”. Sorry for the finickiness.

Firstly - is there actually a performance gain to be had?

The only difference I can see is whether the array appends a single byte, or potentially multiple bytes from Unicode.Scalar.UTF8View. I would expect that view to efficiently produce a single byte for ASCII.

But when you're talking about appending things to an array, that generally becomes irrelevant next to the overhead of checking the array's capacity and accounting for having to reallocate the storage.

Can you actually measure a significant difference between appending one byte vs. appending the contents of a view which has a fast-path for when it contains only one byte?

Second - assuming there is a performance gain to be had, you can hoist this check from the bowels of Array's append function in to your own code like so:

@inlinable public mutating
func append(_ codepoint:Unicode.Scalar)
{
    if codepoint.isASCII {
        return append(UInt8(truncatingIfNeeded: codepoint.value))
    }
    self.bytes.append(contentsOf: codepoint.utf8)
}

When written this way, the isASCII check can be constant-folded, so both test1 and test2 functions compile to exactly the same instructions. The compiler even merges the implementations:

static output.HTML.test1() -> output.HTML.UTF8:
        jmp     (merged static output.HTML.test1() -> output.HTML.UTF8)

<...>

static output.HTML.test2() -> output.HTML.UTF8:
        jmp     (merged static output.HTML.test1() -> output.HTML.UTF8)
1 Like

Taking a look at the implementation of UTF8View, ASCII is the one thing it should actually do well.

It's actually somewhat suboptimal for non-ASCII text - every subscript operation encodes the scalar. IMO, a better implementation would eagerly encode the scalar in to a fixed-size buffer, and then just access bytes from that. We have that utility in the standard library already (UTF8.EncodedScalar a.k.a _ValidUTF8Buffer), and the UTF8View actually uses it to implement the subscript.

Unfortunately, this type is frozen, so I don't think there's any opportunity to change it. Still, if this is proving to be a performance bottleneck for you, I would recommend trying UTF8.EncodedScalar instead.

In any case, ASCII shouldn't be too bad.

I just need to clarify this: UInt8(ascii:) does not return an optional. It crashes if you lied. Its job is to be used with compile-time constants, and it optimizes perfectly in that case.

While fewer characters is nice, @xwu's suggestion is the long-term best outcome for getting UInt8.

1 Like

If you want fewer characters, consider defining an operator like this:

prefix operator ^

prefix func ^(_ s: Unicode.Scalar) -> UInt8 {
    UInt8(ascii: s)
}

^"A" // same as UInt8(ascii: "A") 

or some other valid prefix / postfix symbol.
Almost as good as 'A'