Does String(bytesNoCopy:) copy bytes?

i did an "audit" of available noCopy variants. the most interesting are CFStringCreate WithBytesNoCopy and WithCharactersNoCopy, NSString(bytesNoCopy:) and (charactersNoCopy:), String(bytesNoCopy:) and (utf16CodeUnitsNoCopy:), CFDataCreate WithBytesNoCopy, NSData(bytesNoCopy:) and Data(bytesNoCopy:). i ran a bunch of tests on all of those to see the actual behaviour in different cases.

whether copy/nocopy actually happens depend upon several factors some of which are listed below. interestingly it is currently possible indeed to create a swift value type which value is indirectly changeable -- which is very bad, although i wasn't able to fry my computer by doing this so far.

factors that matter:

  • encoding (for strings created with "bytes" variant)
  • encoding endianess (for strings created with "bytes" variant)
  • length (short vs long)
  • CF vs NS vs Swift. swift types bridged from CF tend to reproduce this behaviour, swift types bridged from NS tend to not.

in the table below "CF" corresponds to CF strings/data. NS - NSString / NSData. "S" to Swift string/data. NS(CF) - corresponds to NS type bridged from CF type. similarly S(CF) or S(NS) - bridged from CF or NS types. "Y" correspond to "indirect changeable" behaviour (particular troublesome for swift value types).

most "dangerous" Y entries are marked with *, this is when swift value types can be changed indirectly.

Data(bytesNoCopy:) case is the most dangerous of them all as it doesn't even involve CF/NS bridging.

full results
CFString ascii      len:9  CF:N  NS(CF):N  S(CF):N  S(NS):N
CFString ascii      len:10 CF:Y  NS(CF):Y  S(CF):Y* S(NS):Y*
CFString utf8       len:9  CF:N  NS(CF):N  S(CF):N  S(NS):N
CFString utf8       len:10 CF:Y  NS(CF):Y  S(CF):Y* S(NS):Y*
CFString utf16LE    len:1  CF:Y  NS(CF):Y  S(CF):Y* S(NS):Y*
CFString utf16BE    len:1  CF:N  NS(CF):N  S(CF):N  S(NS):N
CFString_chars      len:1  CF:Y  NS(CF):Y  S(CF):Y* S(NS):Y*
NSString utf8       len:9        NS:N      S(NS):N
NSString utf8       len:10       NS:Y      S(NS):N
NSString utf16LE    len:1        NS:Y      S(NS):N
NSString utf16BE    len:20       NS:N      S(NS):N
NSString_chars      len:1        NS:Y      S(NS):N
String utf8         len:20                 S:N
String utf16LE      len:20                 S:N
String_chars        len:20                 S:N
CFData              len:14 CF:Y  NS(CF):Y  S(CF):N  S(NS):N
CFData              len:15 CF:Y  NS(CF):Y  S(CF):Y* S(NS):Y*
NSData              len:1        NS:Y      S(NS):N
NSData              len:20       NS:Y      S(NS):N
Data                len:14                 S:N
Data                len:15                 S:Y*

perhaps CF/NS ref types per se are not important much. irt swift (value) types i suggest to fix the broken bridging cases (always copy bytes in these cases) and deprecate/remove the "noCopy" string/data initializers. until anything is fixed here beware of the dangers of using noCopy initialisers with swift value types as per the above Ben Cohen's comment.

full test code
import Foundation

func test_CFString_bytes<T>(_ testName: String, count: Int, value1: T, value2: T, encoding: CFStringBuiltInEncodings, printDetails: Bool = false) {
    let size = count * MemoryLayout<T>.size
    let bytes = malloc(size)!
    let chars = bytes.assumingMemoryBound(to: T.self)
    
    for i in 0 ..< count {
        chars[i] = value1
    }
    
    let p = bytes.assumingMemoryBound(to: UInt8.self)
    let cf = CFStringCreateWithBytesNoCopy(nil, p, size, encoding.rawValue, false, nil)!
    let ns = cf as NSString
    let sc = cf as String
    let sn = ns as String
    
    let cf1 = "\(cf)"
    let ns1 = "\(ns)"
    let sc1 = "\(sc)"
    let sn1 = "\(sn)"
    chars.pointee = value2
    let cf2 = "\(cf)"
    let ns2 = "\(ns)"
    let sc2 = "\(sc)"
    let sn2 = "\(sn)"

    if printDetails {
        print("cf1: \(cf1)")
        print("ns1: \(ns1)")
        print("sc1: \(sc1)")
        print("sn1: \(sn1)")
        print("-- modification --")
        print("cf2: \(cf2)")
        print("ns2: \(ns2)")
        print("sc2: \(sc2)")
        print("sn2: \(sn2)")
    }
    
    print("\(testName)\tlen:\(count)\tCF:\(neq(cf1,cf2))\tNS(CF):\(neq(ns1,ns2))\tS(CF):\(neq(sc1,sc2,true))\tS(NS):\(neq(sn1,sn2,true))")
}

func test_CFString_chars(_ testName: String, count: Int, value1: UniChar, value2: UniChar, printDetails: Bool = false) {
    let size = count * MemoryLayout<UniChar>.size
    let bytes = malloc(size)!
    let chars = bytes.assumingMemoryBound(to: UniChar.self)
    
    for i in 0 ..< count {
        chars[i] = value1
    }
    
    let cf = CFStringCreateWithCharactersNoCopy(nil, chars, count, nil)!
    let ns = cf as NSString
    let sc = cf as String
    let sn = ns as String
    
    let cf1 = "\(cf)"
    let ns1 = "\(ns)"
    let sc1 = "\(sc)"
    let sn1 = "\(sn)"
    chars.pointee = value2
    let cf2 = "\(cf)"
    let ns2 = "\(ns)"
    let sc2 = "\(sc)"
    let sn2 = "\(sn)"

    if printDetails {
        print("cf1: \(cf1)")
        print("ns1: \(ns1)")
        print("sc1: \(sc1)")
        print("sn1: \(sn1)")
        print("-- modification --")
        print("cf2: \(cf2)")
        print("ns2: \(ns2)")
        print("sc2: \(sc2)")
        print("sn2: \(sn2)")
    }
    
    print("\(testName)\tlen:\(count)\tCF:\(neq(cf1,cf2))\tNS(CF):\(neq(ns1,ns2))\tS(CF):\(neq(sc1,sc2,true))\tS(NS):\(neq(sn1,sn2,true))")
}

func test_NSString_bytes<T>(_ testName: String, count: Int, value1: T, value2: T, encoding: String.Encoding, printDetails: Bool = false) {
    let size = count * MemoryLayout<T>.size
    let bytes = malloc(size)!
    let chars = bytes.assumingMemoryBound(to: T.self)
    
    for i in 0 ..< count {
        chars[i] = value1
    }
    
    let ns = NSString(bytesNoCopy: bytes, length: size, encoding: encoding.rawValue, freeWhenDone: false)!
    let sn = ns as String
    
    let ns1 = "\(ns)"
    let sn1 = "\(sn)"
    chars.pointee = value2
    let ns2 = "\(ns)"
    let sn2 = "\(sn)"

    if printDetails {
        print("ns1: \(ns1)")
        print("sn1: \(sn1)")
        print("-- modification --")
        print("ns2: \(ns2)")
        print("sn2: \(sn2)")
    }

    print("\(testName)\tlen:\(count)\t      \tNS:\(neq(ns1,ns2))    \tS(NS):\(neq(sn1,sn2,true))")
}

func test_NSString_chars(_ testName: String, count: Int, value1: UniChar, value2: UniChar, printDetails: Bool = false) {
    let size = count * MemoryLayout<UniChar>.size
    let bytes = malloc(size)!
    let chars = bytes.assumingMemoryBound(to: UniChar.self)
    
    for i in 0 ..< count {
        chars[i] = value1
    }
    
    let ns = NSString(charactersNoCopy: chars, length: count, freeWhenDone: false)
    let sn = ns as String
    
    let ns1 = "\(ns)"
    let sn1 = "\(sn)"
    chars.pointee = value2
    let ns2 = "\(ns)"
    let sn2 = "\(sn)"

    if printDetails {
        print("ns1: \(ns1)")
        print("sn1: \(sn1)")
        print("-- modification --")
        print("ns2: \(ns2)")
        print("sn2: \(sn2)")
    }

    print("\(testName)\tlen:\(count)\t      \tNS:\(neq(ns1,ns2))    \tS(NS):\(neq(sn1,sn2,true))")
}

func test_String_bytes<T>(_ testName: String, count: Int, value1: T, value2: T, encoding: String.Encoding, printDetails: Bool = false) {
    let size = count * MemoryLayout<T>.size
    let bytes = malloc(size)!
    let chars = bytes.assumingMemoryBound(to: T.self)
    
    for i in 0 ..< count {
        chars[i] = value1
    }
    
    let s = String(bytesNoCopy: bytes, length: size, encoding: encoding, freeWhenDone: false)!
    
    let s1 = "\(s)"
    chars.pointee = value2
    let s2 = "\(s)"

    if printDetails {
        print("s1: \(s1)")
        print("-- modification --")
        print("s2: \(s2)")
    }
    
    print("\(testName)\tlen:\(count)\t       \t        \tS:\(neq(s1,s2,true))")
}

func test_String_chars(_ testName: String, count: Int, value1: unichar, value2: unichar, printDetails: Bool = false) {
    let size = count * MemoryLayout<unichar>.size
    let bytes = malloc(size)!
    let chars = bytes.assumingMemoryBound(to: unichar.self)

    for i in 0 ..< count {
        chars[i] = value1
    }
    
    let s = String(utf16CodeUnitsNoCopy: chars, count: count, freeWhenDone: false)
    
    let s1 = "\(s)"
    chars.pointee = value2
    let s2 = "\(s)"

    if printDetails {
        print("s1: \(s1)")
        print("-- modification --")
        print("s2: \(s2)")
    }
    
    print("\(testName)\tlen:\(count)\t        \t       \tS:\(neq(s1,s2,true))")
}

func test_CFData_bytes(_ testName: String, count: Int, value1: UInt8, value2: UInt8, printDetails: Bool = false) {
    let bytes = malloc(count)!
    let chars = bytes.assumingMemoryBound(to: UInt8.self)
    
    for i in 0 ..< count {
        chars[i] = value1
    }
    
    let cf = CFDataCreateWithBytesNoCopy(nil, chars, count, nil)!
    let ns = cf as NSData
    let sc = cf as Data
    let sn = ns as Data
    
    let cf1 = "\(cf)"
    let ns1 = "\(ns)"
    let sc1 = "\(sc.hexString)"
    let sn1 = "\(sn.hexString)"
    chars.pointee = value2
    let cf2 = "\(cf)"
    let ns2 = "\(ns)"
    let sc2 = "\(sc.hexString)"
    let sn2 = "\(sn.hexString)"

    if printDetails {
        print("cf1: \(cf1)")
        print("ns1: \(ns1)")
        print("sc1: \(sc1)")
        print("sn1: \(sn1)")
        print("-- modification --")
        print("cf2: \(cf2)")
        print("ns2: \(ns2)")
        print("sc2: \(sc2)")
        print("sn2: \(sn2)")
    }
    
    print("\(testName)\tlen:\(count)\tCF:\(neq(cf1,cf2))\tNS(CF):\(neq(ns1,ns2))\tS(CF):\(neq(sc1,sc2,true))\tS(NS):\(neq(sn1,sn2,true))")
}

func test_NSData_bytes(_ testName: String, count: Int, value1: UInt8, value2: UInt8, printDetails: Bool = false) {
    let bytes = malloc(count)!
    let chars = bytes.assumingMemoryBound(to: UInt8.self)
    
    for i in 0 ..< count {
        chars[i] = value1
    }
    
    let ns = NSData(bytesNoCopy: bytes, length: count, freeWhenDone: false)
    let sn = ns as Data
    
    let ns1 = "\(ns)"
    let sn1 = "\(sn.hexString)"
    chars.pointee = value2
    let ns2 = "\(ns)"
    let sn2 = "\(sn.hexString)"

    if printDetails {
        print("ns1: \(ns1)")
        print("sn1: \(sn1)")
        print("-- modification --")
        print("ns2: \(ns2)")
        print("sn2: \(sn2)")
    }

    print("\(testName)\tlen:\(count)\t       \tNS:\(neq(ns1,ns2))   \tS(NS):\(neq(sn1,sn2,true))")
}

func test_Data_bytes(_ testName: String, count: Int, value1: UInt8, value2: UInt8, printDetails: Bool = false) {
    let bytes = malloc(count)!
    let chars = bytes.assumingMemoryBound(to: UInt8.self)
    
    for i in 0 ..< count {
        chars[i] = value1
    }
    
    let s = Data(bytesNoCopy: bytes, count: count, deallocator: .none)
    
    let s1 = "\(s.hexString)"
    chars.pointee = value2
    let s2 = "\(s.hexString)"

    if printDetails {
        print("s1: \(s1)")
        print("-- modification --")
        print("s2: \(s2)")
    }

    print("\(testName)\tlen:\(count)\t        \t       \tS:\(neq(s1,s2,true))")
}

func test() {

    test_CFString_bytes("CFString ascii   ", count: 9, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .ASCII)
    test_CFString_bytes("CFString ascii   ", count: 10, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .ASCII)
    test_CFString_bytes("CFString utf8    ", count: 9, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .UTF8)
    test_CFString_bytes("CFString utf8    ", count: 10, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .UTF8)
    test_CFString_bytes("CFString utf16LE ", count: 1, value1: UInt16(0x41), value2: UInt16(0x42), encoding: .UTF16LE)
    test_CFString_bytes("CFString utf16BE ", count: 1, value1: UInt16(0x4100), value2: UInt16(0x4200), encoding: .UTF16BE)
    test_CFString_chars("CFString_chars   ", count: 1, value1: 0x41, value2: 0x42)
    
    test_NSString_bytes("NSString utf8    ", count: 9, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .utf8)
    test_NSString_bytes("NSString utf8    ", count: 10, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .utf8)
    test_NSString_bytes("NSString utf16LE ", count: 1, value1: UInt16(0x41), value2: UInt16(0x42), encoding: .utf16LittleEndian)
    test_NSString_bytes("NSString utf16BE ", count: 20, value1: UInt16(0x4100), value2: UInt16(0x4200), encoding: .utf16BigEndian)
    test_NSString_chars("NSString_chars   ", count: 1, value1: 0x41, value2: 0x42)

    test_String_bytes(  "String utf8      ", count: 20, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .utf8)
    test_String_bytes(  "String utf16LE   ", count: 20, value1: UInt16(0x41), value2: UInt16(0x42), encoding: .utf16LittleEndian)
    test_String_chars(  "String_chars     ", count: 20, value1: 0x41, value2: 0x42)

    test_CFData_bytes(  "CFData           ", count: 14, value1: 0x41, value2: 0x42)
    test_CFData_bytes(  "CFData           ", count: 15, value1: 0x41, value2: 0x42)

    test_NSData_bytes(  "NSData           ", count: 1, value1: 0x41, value2: 0x42)
    test_NSData_bytes(  "NSData           ", count: 20, value1: 0x41, value2: 0x42)
    
    test_Data_bytes(    "Data             ", count: 14, value1: 0x41, value2: 0x42)
    test_Data_bytes(    "Data             ", count: 15, value1: 0x41, value2: 0x42)
    print()
}

func neq<T: Equatable>(_ a: T, _ b: T, _ dangerous: Bool = false) -> String {
    (a != b ? "Y" : "N") + (a != b && dangerous ? "*" : " ")
}

extension Data {
    var hexString: String {
        reduce("") { r, e in
            r + String(format: "%02x", e)
        }
    }
}

test()
3 Likes