So, I re-wrote it like this:
mutating
func
get()
-> UInt16
{
let v: UInt16
if self.bigEndian
{
v = UInt16(self.data[self.idx]) << 8
| UInt16(self.data[self.idx + 1])
}
else
{
v = UInt16(self.data[self.idx]) << 0
| UInt16(self.data[self.idx + 1]) << 8
}
self.idx += 2
return v
}
and I still get an insane number of calls:
$ xcrun -sdk macosx swiftc -O -emit-assembly BinaryReader.swift
<snip>
.private_extern _$s12BinaryReaderAAV3gets6UInt16VyF
.globl _$s12BinaryReaderAAV3gets6UInt16VyF
.p2align 4, 0x90
_$s12BinaryReaderAAV3gets6UInt16VyF:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
.cfi_offset %rbx, -48
.cfi_offset %r12, -40
.cfi_offset %r14, -32
.cfi_offset %r15, -24
movb 24(%r13), %r12b
movq (%r13), %rbx
movq 8(%r13), %r14
movq 16(%r13), %r15
movq %rbx, %rdi
movq %r14, %rsi
callq _$s10Foundation4DataV15_RepresentationOWOy
movq %r15, %rdi
movq %rbx, %rsi
movq %r14, %rdx
callq _$s10Foundation4DataVys5UInt8VSicig
movl %eax, %r15d
movq %rbx, %rdi
movq %r14, %rsi
callq _$s10Foundation4DataV15_RepresentationOWOe
movzbl %r15b, %ecx
movq (%r13), %r15
movq 8(%r13), %r14
movq 16(%r13), %rbx
incq %rbx
seto %al
cmpb $1, %r12b
jne LBB7_3
testb %al, %al
jne LBB7_9
movzwl %cx, %r12d
shll $8, %r12d
movq %r15, %rdi
movq %r14, %rsi
callq _$s10Foundation4DataV15_RepresentationOWOy
movq %rbx, %rdi
movq %r15, %rsi
movq %r14, %rdx
callq _$s10Foundation4DataVys5UInt8VSicig
movl %eax, %ebx
movq %r15, %rdi
movq %r14, %rsi
callq _$s10Foundation4DataV15_RepresentationOWOe
movzbl %bl, %eax
orl %r12d, %eax
jmp LBB7_5
LBB7_3:
testb %al, %al
jne LBB7_7
movl %ecx, %r12d
movq %r15, %rdi
movq %r14, %rsi
callq _$s10Foundation4DataV15_RepresentationOWOy
movq %rbx, %rdi
movq %r15, %rsi
movq %r14, %rdx
callq _$s10Foundation4DataVys5UInt8VSicig
movl %eax, %ebx
movq %r15, %rdi
movq %r14, %rsi
callq _$s10Foundation4DataV15_RepresentationOWOe
movzbl %bl, %ecx
shll $8, %ecx
movzwl %r12w, %eax
orl %ecx, %eax
LBB7_5:
movq 16(%r13), %rcx
addq $2, %rcx
jo LBB7_8
movq %rcx, 16(%r13)
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
retq
LBB7_8:
## InlineAsm Start
## InlineAsm End
ud2
LBB7_9:
## InlineAsm Start
## InlineAsm End
ud2
LBB7_7:
## InlineAsm Start
## InlineAsm End
ud2
.cfi_endproc
<snip>
Digging into this a little bit, I see
$ xcrun swift-demangle s10Foundation4DataV15_RepresentationOWOy
$s10Foundation4DataV15_RepresentationOWOy ---> outlined copy of Foundation.Data._Representation
That it says “outlined copy” gives me pause. Am I not calling swiftc with the right optimization options? In any case, I feel like this should boil down to a few lines of assembly (some moves, shifts, and ors, with a branch).