i did an "audit" of available noCopy variants. the most interesting are CFStringCreate WithBytesNoCopy and WithCharactersNoCopy, NSString(bytesNoCopy:) and (charactersNoCopy:), String(bytesNoCopy:) and (utf16CodeUnitsNoCopy:), CFDataCreate WithBytesNoCopy, NSData(bytesNoCopy:) and Data(bytesNoCopy:). i ran a bunch of tests on all of those to see the actual behaviour in different cases.
whether copy/nocopy actually happens depend upon several factors some of which are listed below. interestingly it is currently possible indeed to create a swift value type which value is indirectly changeable -- which is very bad, although i wasn't able to fry my computer by doing this so far.
factors that matter:
- encoding (for strings created with "bytes" variant)
- encoding endianess (for strings created with "bytes" variant)
- length (short vs long)
- CF vs NS vs Swift. swift types bridged from CF tend to reproduce this behaviour, swift types bridged from NS tend to not.
in the table below "CF" corresponds to CF strings/data. NS - NSString / NSData. "S" to Swift string/data. NS(CF) - corresponds to NS type bridged from CF type. similarly S(CF) or S(NS) - bridged from CF or NS types. "Y" correspond to "indirect changeable" behaviour (particular troublesome for swift value types).
most "dangerous" Y entries are marked with *, this is when swift value types can be changed indirectly.
Data(bytesNoCopy:) case is the most dangerous of them all as it doesn't even involve CF/NS bridging.
full results
CFString ascii len:9 CF:N NS(CF):N S(CF):N S(NS):N
CFString ascii len:10 CF:Y NS(CF):Y S(CF):Y* S(NS):Y*
CFString utf8 len:9 CF:N NS(CF):N S(CF):N S(NS):N
CFString utf8 len:10 CF:Y NS(CF):Y S(CF):Y* S(NS):Y*
CFString utf16LE len:1 CF:Y NS(CF):Y S(CF):Y* S(NS):Y*
CFString utf16BE len:1 CF:N NS(CF):N S(CF):N S(NS):N
CFString_chars len:1 CF:Y NS(CF):Y S(CF):Y* S(NS):Y*
NSString utf8 len:9 NS:N S(NS):N
NSString utf8 len:10 NS:Y S(NS):N
NSString utf16LE len:1 NS:Y S(NS):N
NSString utf16BE len:20 NS:N S(NS):N
NSString_chars len:1 NS:Y S(NS):N
String utf8 len:20 S:N
String utf16LE len:20 S:N
String_chars len:20 S:N
CFData len:14 CF:Y NS(CF):Y S(CF):N S(NS):N
CFData len:15 CF:Y NS(CF):Y S(CF):Y* S(NS):Y*
NSData len:1 NS:Y S(NS):N
NSData len:20 NS:Y S(NS):N
Data len:14 S:N
Data len:15 S:Y*
perhaps CF/NS ref types per se are not important much. irt swift (value) types i suggest to fix the broken bridging cases (always copy bytes in these cases) and deprecate/remove the "noCopy" string/data initializers. until anything is fixed here beware of the dangers of using noCopy initialisers with swift value types as per the above Ben Cohen's comment.
full test code
import Foundation
func test_CFString_bytes<T>(_ testName: String, count: Int, value1: T, value2: T, encoding: CFStringBuiltInEncodings, printDetails: Bool = false) {
let size = count * MemoryLayout<T>.size
let bytes = malloc(size)!
let chars = bytes.assumingMemoryBound(to: T.self)
for i in 0 ..< count {
chars[i] = value1
}
let p = bytes.assumingMemoryBound(to: UInt8.self)
let cf = CFStringCreateWithBytesNoCopy(nil, p, size, encoding.rawValue, false, nil)!
let ns = cf as NSString
let sc = cf as String
let sn = ns as String
let cf1 = "\(cf)"
let ns1 = "\(ns)"
let sc1 = "\(sc)"
let sn1 = "\(sn)"
chars.pointee = value2
let cf2 = "\(cf)"
let ns2 = "\(ns)"
let sc2 = "\(sc)"
let sn2 = "\(sn)"
if printDetails {
print("cf1: \(cf1)")
print("ns1: \(ns1)")
print("sc1: \(sc1)")
print("sn1: \(sn1)")
print("-- modification --")
print("cf2: \(cf2)")
print("ns2: \(ns2)")
print("sc2: \(sc2)")
print("sn2: \(sn2)")
}
print("\(testName)\tlen:\(count)\tCF:\(neq(cf1,cf2))\tNS(CF):\(neq(ns1,ns2))\tS(CF):\(neq(sc1,sc2,true))\tS(NS):\(neq(sn1,sn2,true))")
}
func test_CFString_chars(_ testName: String, count: Int, value1: UniChar, value2: UniChar, printDetails: Bool = false) {
let size = count * MemoryLayout<UniChar>.size
let bytes = malloc(size)!
let chars = bytes.assumingMemoryBound(to: UniChar.self)
for i in 0 ..< count {
chars[i] = value1
}
let cf = CFStringCreateWithCharactersNoCopy(nil, chars, count, nil)!
let ns = cf as NSString
let sc = cf as String
let sn = ns as String
let cf1 = "\(cf)"
let ns1 = "\(ns)"
let sc1 = "\(sc)"
let sn1 = "\(sn)"
chars.pointee = value2
let cf2 = "\(cf)"
let ns2 = "\(ns)"
let sc2 = "\(sc)"
let sn2 = "\(sn)"
if printDetails {
print("cf1: \(cf1)")
print("ns1: \(ns1)")
print("sc1: \(sc1)")
print("sn1: \(sn1)")
print("-- modification --")
print("cf2: \(cf2)")
print("ns2: \(ns2)")
print("sc2: \(sc2)")
print("sn2: \(sn2)")
}
print("\(testName)\tlen:\(count)\tCF:\(neq(cf1,cf2))\tNS(CF):\(neq(ns1,ns2))\tS(CF):\(neq(sc1,sc2,true))\tS(NS):\(neq(sn1,sn2,true))")
}
func test_NSString_bytes<T>(_ testName: String, count: Int, value1: T, value2: T, encoding: String.Encoding, printDetails: Bool = false) {
let size = count * MemoryLayout<T>.size
let bytes = malloc(size)!
let chars = bytes.assumingMemoryBound(to: T.self)
for i in 0 ..< count {
chars[i] = value1
}
let ns = NSString(bytesNoCopy: bytes, length: size, encoding: encoding.rawValue, freeWhenDone: false)!
let sn = ns as String
let ns1 = "\(ns)"
let sn1 = "\(sn)"
chars.pointee = value2
let ns2 = "\(ns)"
let sn2 = "\(sn)"
if printDetails {
print("ns1: \(ns1)")
print("sn1: \(sn1)")
print("-- modification --")
print("ns2: \(ns2)")
print("sn2: \(sn2)")
}
print("\(testName)\tlen:\(count)\t \tNS:\(neq(ns1,ns2)) \tS(NS):\(neq(sn1,sn2,true))")
}
func test_NSString_chars(_ testName: String, count: Int, value1: UniChar, value2: UniChar, printDetails: Bool = false) {
let size = count * MemoryLayout<UniChar>.size
let bytes = malloc(size)!
let chars = bytes.assumingMemoryBound(to: UniChar.self)
for i in 0 ..< count {
chars[i] = value1
}
let ns = NSString(charactersNoCopy: chars, length: count, freeWhenDone: false)
let sn = ns as String
let ns1 = "\(ns)"
let sn1 = "\(sn)"
chars.pointee = value2
let ns2 = "\(ns)"
let sn2 = "\(sn)"
if printDetails {
print("ns1: \(ns1)")
print("sn1: \(sn1)")
print("-- modification --")
print("ns2: \(ns2)")
print("sn2: \(sn2)")
}
print("\(testName)\tlen:\(count)\t \tNS:\(neq(ns1,ns2)) \tS(NS):\(neq(sn1,sn2,true))")
}
func test_String_bytes<T>(_ testName: String, count: Int, value1: T, value2: T, encoding: String.Encoding, printDetails: Bool = false) {
let size = count * MemoryLayout<T>.size
let bytes = malloc(size)!
let chars = bytes.assumingMemoryBound(to: T.self)
for i in 0 ..< count {
chars[i] = value1
}
let s = String(bytesNoCopy: bytes, length: size, encoding: encoding, freeWhenDone: false)!
let s1 = "\(s)"
chars.pointee = value2
let s2 = "\(s)"
if printDetails {
print("s1: \(s1)")
print("-- modification --")
print("s2: \(s2)")
}
print("\(testName)\tlen:\(count)\t \t \tS:\(neq(s1,s2,true))")
}
func test_String_chars(_ testName: String, count: Int, value1: unichar, value2: unichar, printDetails: Bool = false) {
let size = count * MemoryLayout<unichar>.size
let bytes = malloc(size)!
let chars = bytes.assumingMemoryBound(to: unichar.self)
for i in 0 ..< count {
chars[i] = value1
}
let s = String(utf16CodeUnitsNoCopy: chars, count: count, freeWhenDone: false)
let s1 = "\(s)"
chars.pointee = value2
let s2 = "\(s)"
if printDetails {
print("s1: \(s1)")
print("-- modification --")
print("s2: \(s2)")
}
print("\(testName)\tlen:\(count)\t \t \tS:\(neq(s1,s2,true))")
}
func test_CFData_bytes(_ testName: String, count: Int, value1: UInt8, value2: UInt8, printDetails: Bool = false) {
let bytes = malloc(count)!
let chars = bytes.assumingMemoryBound(to: UInt8.self)
for i in 0 ..< count {
chars[i] = value1
}
let cf = CFDataCreateWithBytesNoCopy(nil, chars, count, nil)!
let ns = cf as NSData
let sc = cf as Data
let sn = ns as Data
let cf1 = "\(cf)"
let ns1 = "\(ns)"
let sc1 = "\(sc.hexString)"
let sn1 = "\(sn.hexString)"
chars.pointee = value2
let cf2 = "\(cf)"
let ns2 = "\(ns)"
let sc2 = "\(sc.hexString)"
let sn2 = "\(sn.hexString)"
if printDetails {
print("cf1: \(cf1)")
print("ns1: \(ns1)")
print("sc1: \(sc1)")
print("sn1: \(sn1)")
print("-- modification --")
print("cf2: \(cf2)")
print("ns2: \(ns2)")
print("sc2: \(sc2)")
print("sn2: \(sn2)")
}
print("\(testName)\tlen:\(count)\tCF:\(neq(cf1,cf2))\tNS(CF):\(neq(ns1,ns2))\tS(CF):\(neq(sc1,sc2,true))\tS(NS):\(neq(sn1,sn2,true))")
}
func test_NSData_bytes(_ testName: String, count: Int, value1: UInt8, value2: UInt8, printDetails: Bool = false) {
let bytes = malloc(count)!
let chars = bytes.assumingMemoryBound(to: UInt8.self)
for i in 0 ..< count {
chars[i] = value1
}
let ns = NSData(bytesNoCopy: bytes, length: count, freeWhenDone: false)
let sn = ns as Data
let ns1 = "\(ns)"
let sn1 = "\(sn.hexString)"
chars.pointee = value2
let ns2 = "\(ns)"
let sn2 = "\(sn.hexString)"
if printDetails {
print("ns1: \(ns1)")
print("sn1: \(sn1)")
print("-- modification --")
print("ns2: \(ns2)")
print("sn2: \(sn2)")
}
print("\(testName)\tlen:\(count)\t \tNS:\(neq(ns1,ns2)) \tS(NS):\(neq(sn1,sn2,true))")
}
func test_Data_bytes(_ testName: String, count: Int, value1: UInt8, value2: UInt8, printDetails: Bool = false) {
let bytes = malloc(count)!
let chars = bytes.assumingMemoryBound(to: UInt8.self)
for i in 0 ..< count {
chars[i] = value1
}
let s = Data(bytesNoCopy: bytes, count: count, deallocator: .none)
let s1 = "\(s.hexString)"
chars.pointee = value2
let s2 = "\(s.hexString)"
if printDetails {
print("s1: \(s1)")
print("-- modification --")
print("s2: \(s2)")
}
print("\(testName)\tlen:\(count)\t \t \tS:\(neq(s1,s2,true))")
}
func test() {
test_CFString_bytes("CFString ascii ", count: 9, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .ASCII)
test_CFString_bytes("CFString ascii ", count: 10, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .ASCII)
test_CFString_bytes("CFString utf8 ", count: 9, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .UTF8)
test_CFString_bytes("CFString utf8 ", count: 10, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .UTF8)
test_CFString_bytes("CFString utf16LE ", count: 1, value1: UInt16(0x41), value2: UInt16(0x42), encoding: .UTF16LE)
test_CFString_bytes("CFString utf16BE ", count: 1, value1: UInt16(0x4100), value2: UInt16(0x4200), encoding: .UTF16BE)
test_CFString_chars("CFString_chars ", count: 1, value1: 0x41, value2: 0x42)
test_NSString_bytes("NSString utf8 ", count: 9, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .utf8)
test_NSString_bytes("NSString utf8 ", count: 10, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .utf8)
test_NSString_bytes("NSString utf16LE ", count: 1, value1: UInt16(0x41), value2: UInt16(0x42), encoding: .utf16LittleEndian)
test_NSString_bytes("NSString utf16BE ", count: 20, value1: UInt16(0x4100), value2: UInt16(0x4200), encoding: .utf16BigEndian)
test_NSString_chars("NSString_chars ", count: 1, value1: 0x41, value2: 0x42)
test_String_bytes( "String utf8 ", count: 20, value1: UInt8(0x41), value2: UInt8(0x42), encoding: .utf8)
test_String_bytes( "String utf16LE ", count: 20, value1: UInt16(0x41), value2: UInt16(0x42), encoding: .utf16LittleEndian)
test_String_chars( "String_chars ", count: 20, value1: 0x41, value2: 0x42)
test_CFData_bytes( "CFData ", count: 14, value1: 0x41, value2: 0x42)
test_CFData_bytes( "CFData ", count: 15, value1: 0x41, value2: 0x42)
test_NSData_bytes( "NSData ", count: 1, value1: 0x41, value2: 0x42)
test_NSData_bytes( "NSData ", count: 20, value1: 0x41, value2: 0x42)
test_Data_bytes( "Data ", count: 14, value1: 0x41, value2: 0x42)
test_Data_bytes( "Data ", count: 15, value1: 0x41, value2: 0x42)
print()
}
func neq<T: Equatable>(_ a: T, _ b: T, _ dangerous: Bool = false) -> String {
(a != b ? "Y" : "N") + (a != b && dangerous ? "*" : " ")
}
extension Data {
var hexString: String {
reduce("") { r, e in
r + String(format: "%02x", e)
}
}
}
test()