i really hate to beat the ASCII literals horse again. but i recently did some investigation into generated assembly when using higher-level constructs like Unicode.Scalar
, and the results are still quite discouraging.
here’s a pretty simple “UTF-8” buffer type:
extension HTML
{
@frozen public
struct UTF8
{
@usableFromInline internal
var bytes:[UInt8]
@inlinable internal
init(bytes:[UInt8] = [])
{
self.bytes = bytes
}
}
}
extension HTML.UTF8
{
@inlinable public mutating
func append(_ codeunit:UInt8)
{
self.bytes.append(codeunit)
}
@inlinable public mutating
func append(_ codepoint:Unicode.Scalar)
{
self.bytes.append(contentsOf: codepoint.utf8)
}
@inlinable public mutating
func append(_ string:some StringProtocol)
{
self.bytes.append(contentsOf: string.utf8)
}
}
we can use this type either by appending a Unicode.Scalar
literal, or a raw UInt8
UTF-8 code unit:
public static
func test1() -> UTF8
{
var utf8:UTF8 = .init()
utf8.append("x" as Unicode.Scalar)
return utf8
}
public static
func test2() -> UTF8
{
var utf8:UTF8 = .init()
utf8.append(0x78)
return utf8
}
i would have really hoped the first spelling, with the Unicode.Scalar
literal would be just as good as the version that appends a hexadecimal integer literal. but alas, this is not the case.
here’s the version with the hexadecimal literal:
static output.HTML.test2() -> output.HTML.UTF8:
push r14
push rbx
push rax
mov rcx, qword ptr [rip + _swiftEmptyArrayStorage@GOTPCREL]
mov esi, 1
xor edi, edi
mov edx, 1
call (generic specialization <serialized, Swift.UInt8> of Swift._ContiguousArrayBuffer._consumeAndCreateNew(bufferIsUnique: Swift.Bool, minimumCapacity: Swift.Int, growForAppend: Swift.Bool) -> Swift._ContiguousArrayBuffer<A>)
mov r14, qword ptr [rax + 16]
mov rcx, qword ptr [rax + 24]
mov rdx, rcx
shr rdx
lea rbx, [r14 + 1]
cmp rdx, r14
jbe .LBB4_1
.LBB4_2:
mov qword ptr [rax + 16], rbx
mov byte ptr [rax + r14 + 32], 120
add rsp, 8
pop rbx
pop r14
ret
.LBB4_1:
xor edi, edi
cmp rcx, 2
setae dil
mov rsi, rbx
mov edx, 1
mov rcx, rax
call (generic specialization <serialized, Swift.UInt8> of Swift._ContiguousArrayBuffer._consumeAndCreateNew(bufferIsUnique: Swift.Bool, minimumCapacity: Swift.Int, growForAppend: Swift.Bool) -> Swift._ContiguousArrayBuffer<A>)
jmp .LBB4_2
generic specialization <serialized, Swift.UInt8> of Swift._ContiguousArrayBuffer._consumeAndCreateNew(bufferIsUnique: Swift.Bool, minimumCapacity: Swift.Int, growForAppend: Swift.Bool) -> Swift._ContiguousArrayBuffer<A>:
push rbp
push r15
push r14
push rbx
push rax
mov rbx, rcx
mov r15, rsi
mov ebp, edi
test dl, 1
je .LBB13_5
mov rax, qword ptr [rbx + 24]
mov rcx, rax
shr rcx
cmp rcx, r15
jge .LBB13_2
movabs rdx, 4611686018427387904
add rcx, rdx
js .LBB13_15
and rax, -2
cmp rax, r15
cmovg r15, rax
jmp .LBB13_5
.LBB13_2:
mov r15, rcx
.LBB13_5:
mov r14, qword ptr [rbx + 16]
cmp r15, r14
cmovle r15, r14
test r15, r15
je .LBB13_6
lea rdi, [rip + (demangling cache variable for type metadata for Swift._ContiguousArrayStorage<Swift.UInt8>)]
call __swift_instantiateConcreteTypeFromMangledName
add r15, 32
mov edx, 7
mov rdi, rax
mov rsi, r15
call swift_allocObject@PLT
mov r15, rax
mov rdi, rax
call malloc_usable_size@PLT
add rax, rax
add rax, -64
mov qword ptr [r15 + 16], r14
mov qword ptr [r15 + 24], rax
jmp .LBB13_8
.LBB13_6:
mov r15, qword ptr [rip + _swiftEmptyArrayStorage@GOTPCREL]
.LBB13_8:
lea rdi, [r15 + 32]
lea rsi, [rbx + 32]
test bpl, 1
je .LBB13_9
cmp r15, rbx
jne .LBB13_12
lea rax, [rsi + r14]
cmp rax, rdi
ja .LBB13_13
.LBB13_12:
mov rdx, r14
call memmove@PLT
.LBB13_13:
mov qword ptr [rbx + 16], 0
jmp .LBB13_14
.LBB13_9:
mov rdx, r14
call memcpy@PLT
.LBB13_14:
mov rdi, rbx
call swift_release@PLT
mov rax, r15
add rsp, 8
pop rbx
pop r14
pop r15
pop rbp
ret
.LBB13_15:
ud2
and here is the version with "x"
:
static output.HTML.test1() -> output.HTML.UTF8:
push r13
sub rsp, 16
mov rax, qword ptr [rip + _swiftEmptyArrayStorage@GOTPCREL]
mov qword ptr [rsp + 8], rax
lea r13, [rsp + 8]
mov edi, 120
call (generic specialization <serialized, Swift.UInt8, Swift.Unicode.Scalar.UTF8View> of Swift.Array.append<A where A == A1.Element, A1: Swift.Sequence>(contentsOf: __owned A1) -> ())
mov rax, qword ptr [rsp + 8]
add rsp, 16
pop r13
ret
generic specialization <serialized, Swift.UInt8, Swift.Unicode.Scalar.UTF8View> of Swift.Array.append<A where A == A1.Element, A1: Swift.Sequence>(contentsOf: __owned A1) -> ():
push rbp
push r15
push r14
push r12
push rbx
sub rsp, 64
mov ebp, edi
mov r12d, 1
cmp edi, 128
jb .LBB10_3
mov r12d, 2
cmp ebp, 2047
jbe .LBB10_3
cmp ebp, 65536
mov r12d, 4
sbb r12, 0
.LBB10_3:
mov rbx, qword ptr [r13]
mov r15, qword ptr [rbx + 16]
add r15, r12
jo .LBB10_11
mov rdi, rbx
call swift_isUniquelyReferenced_nonNull_native@PLT
test al, al
je .LBB10_6
mov r14, qword ptr [rbx + 24]
shr r14
cmp r14, r15
jge .LBB10_7
.LBB10_6:
mov rcx, qword ptr [rbx + 16]
cmp rcx, r15
cmovg r15, rcx
movzx edi, al
mov rsi, r15
mov edx, 1
mov rcx, rbx
call (generic specialization <serialized, Swift.UInt8> of Swift._ContiguousArrayBuffer._consumeAndCreateNew(bufferIsUnique: Swift.Bool, minimumCapacity: Swift.Int, growForAppend: Swift.Bool) -> Swift._ContiguousArrayBuffer<A>)
mov rbx, rax
mov r14, qword ptr [rax + 24]
shr r14
.LBB10_7:
mov rax, qword ptr [rbx + 16]
sub r14, rax
lea rsi, [rbx + rax]
add rsi, 32
lea rdi, [rsp + 48]
mov rdx, r14
mov ecx, ebp
call (generic specialization <serialized, Swift.Unicode.Scalar.UTF8View> of (extension in Swift):Swift.Sequence._copySequenceContents(initializing: Swift.UnsafeMutableBufferPointer<A.Element>) -> (A.Iterator, Swift.Int))
cmp rax, r12
jl .LBB10_12
mov rbp, qword ptr [rbx + 16]
add rbp, rax
jo .LBB10_13
mov qword ptr [rbx + 16], rbp
cmp rax, r14
je .LBB10_14
.LBB10_10:
mov qword ptr [r13], rbx
add rsp, 64
pop rbx
pop r12
pop r14
pop r15
pop rbp
ret
.LBB10_11:
ud2
.LBB10_12:
ud2
.LBB10_13:
ud2
.LBB10_14:
lea rdi, [rip + (demangling cache variable for type metadata for [Swift.UInt8])]
call __swift_instantiateConcreteTypeFromMangledName
cmp rax, qword ptr [rip + ($ss7UnicodeO6ScalarV8UTF8ViewVN)@GOTPCREL]
je .LBB10_10
mov r15, qword ptr [rsp + 56]
mov esi, dword ptr [rsp + 48]
cmp esi, 127
ja .LBB10_17
mov r14d, 1
cmp r15, 1
je .LBB10_10
jmp .LBB10_22
.LBB10_17:
cmp esi, 2047
ja .LBB10_19
mov r14d, 2
cmp r15, 2
je .LBB10_10
jmp .LBB10_22
.LBB10_19:
cmp esi, 65535
ja .LBB10_21
mov r14d, 3
cmp r15, 3
je .LBB10_10
jmp .LBB10_22
.LBB10_21:
mov r14d, 4
cmp r15, 4
je .LBB10_10
.LBB10_22:
mov rdi, r15
mov dword ptr [rsp + 4], esi
call ($ss7UnicodeO6ScalarV8UTF8ViewVys5UInt8VSicig)@PLT
cmp r15, r14
jae .LBB10_46
mov rdx, r15
inc rdx
jo .LBB10_47
mov edi, eax
mov ecx, dword ptr [rsp + 4]
cmp ecx, 65536
mov eax, 4
sbb rax, 0
mov qword ptr [rsp + 8], rax
jmp .LBB10_26
.LBB10_25:
mov rax, rbp
mov qword ptr [rbx + 16], rbp
.LBB10_26:
mov rax, qword ptr [rbx + 24]
mov r8, rax
shr r8
lea rsi, [rbp + 1]
cmp r8, rsi
jl .LBB10_42
.LBB10_27:
mov rax, rbp
sub rax, r8
mov qword ptr [rsp + 40], rax
jge .LBB10_25
mov qword ptr [rsp + 16], r8
lea rax, [rbx + rbp]
add rax, 32
mov qword ptr [rsp + 32], rax
xor r14d, r14d
mov qword ptr [rsp + 24], rdx
.LBB10_29:
mov rax, qword ptr [rsp + 32]
mov byte ptr [rax + r14], dil
cmp ecx, 127
ja .LBB10_31
lea rax, [rdx + r14]
dec rax
mov r15d, 1
jmp .LBB10_37
.LBB10_31:
cmp ecx, 2047
ja .LBB10_33
lea rax, [rdx + r14]
add rax, -2
mov r15d, 2
jmp .LBB10_37
.LBB10_33:
cmp ecx, 65535
ja .LBB10_35
lea rax, [rdx + r14]
add rax, -3
jmp .LBB10_36
.LBB10_35:
lea rax, [rdx + r14]
add rax, -4
.LBB10_36:
mov r15, qword ptr [rsp + 8]
.LBB10_37:
test rax, rax
je .LBB10_43
lea r12, [rdx + r14]
mov rdi, r12
mov esi, ecx
call ($ss7UnicodeO6ScalarV8UTF8ViewVys5UInt8VSicig)@PLT
cmp r12, r15
jae .LBB10_44
inc r12
jo .LBB10_45
mov edi, eax
inc r14
mov rax, qword ptr [rsp + 40]
add rax, r14
mov ecx, dword ptr [rsp + 4]
mov rdx, qword ptr [rsp + 24]
jne .LBB10_29
mov rdx, r12
mov rax, qword ptr [rsp + 16]
mov rbp, rax
mov qword ptr [rbx + 16], rax
jmp .LBB10_26
.LBB10_42:
mov r14, rdx
mov r15d, edi
xor edi, edi
cmp rax, 2
setae dil
mov edx, 1
mov rcx, rbx
call (generic specialization <serialized, Swift.UInt8> of Swift._ContiguousArrayBuffer._consumeAndCreateNew(bufferIsUnique: Swift.Bool, minimumCapacity: Swift.Int, growForAppend: Swift.Bool) -> Swift._ContiguousArrayBuffer<A>)
mov edi, r15d
mov rdx, r14
mov ecx, dword ptr [rsp + 4]
mov rbx, rax
mov r8, qword ptr [rax + 24]
shr r8
jmp .LBB10_27
.LBB10_43:
lea rax, [r14 + rbp]
inc rax
mov qword ptr [rbx + 16], rax
jmp .LBB10_10
.LBB10_44:
ud2
.LBB10_45:
ud2
.LBB10_46:
ud2
.LBB10_47:
ud2
obviously most of this assembly never executes on the commonly-taken path. but i am especially alarmed by this section of the Unicode.Scalar
-based invocation:
sub rsp, 64
mov ebp, edi
mov r12d, 1
cmp edi, 128 // <- we are still checking ASCII-ness ???
jb .LBB10_3
it seems that the compiler refuses to inline the append(_ codepoint:Unicode.Scalar)
function, probably because the logic needed to handle multi-byte encodings would result in a ridiculous amount of binary bloat, and this inability to inline has disasterous effects for a caller that just needs to write a '<'
or a '>'
to the output buffer.
i think it is inexcusable that in swift 5.8, we are still forced to spell ASCII characters in hexadecimal, which makes for one of the few situations where C++ code is more readable than its swift equivalent. how can we do better?