Improve FileManager.contentsEqual(atPath:andPath:) speed

I recently opened a topic on the Apple Developer Forums titled FileManager.contentsEqual(atPath:andPath:) very slow, and an Apple Engineer suggested that I open a discussion "there" (hopefully this forum is the right place). So I'd like to open a discussion about possibly improving the default implementation of this method, and maybe even adding a new parameter that allows a custom buffer size to further improve its speed.

Looking at the source code, I see that it's using a buffer size of 8 * 1024, which is tiny.

Now consider the following implementation, which allows a custom buffer size:

extension FileManager {
    
    func fastContentsEqual(atPath path1: String, andPath path2: String, bufferSize: Int, progress: (_ delta: Int) -> Bool) -> Bool {
        do {
            let sourceDescriptor = open(path1, O_RDONLY | O_NOFOLLOW, 0)
            if sourceDescriptor < 0 {
                throw NSError(domain: NSPOSIXErrorDomain, code: Int(errno))
            }
            defer {
                close(sourceDescriptor)
            }
            let sourceFile = FileHandle(fileDescriptor: sourceDescriptor)
            let destinationDescriptor = open(path2, O_RDONLY | O_NOFOLLOW, 0)
            if destinationDescriptor < 0 {
                throw NSError(domain: NSPOSIXErrorDomain, code: Int(errno))
            }
            defer {
                close(destinationDescriptor)
            }
            let destinationFile = FileHandle(fileDescriptor: destinationDescriptor)
            var equal = true
            while autoreleasepool(invoking: {
                let sourceData = sourceFile.readData(ofLength: bufferSize)
                let destinationData = destinationFile.readData(ofLength: bufferSize)
                equal = sourceData == destinationData
                return sourceData.count > 0 && progress(sourceData.count) && equal
            }) { }
            return equal
        } catch {
            return contentsEqual(atPath: path1, andPath: path2) // use this as a fallback for unsupported files (like symbolic links)
        }
    }
    
}

This is how the two implementations compare with files of different sizes (with the same buffer size of 8_192):

File size FileManager (s) Custom (s) Speedup
1'000 0.00016 0.00009 1.7x
1'000'000 0.009 0.001 9x
10'000'000 0.21 0.11 1.9x
100'000'000 2.0 0.9 2.2x

And here are the results of the custom implementation for different file and buffer sizes, each one repeated 5 times.

This is the code I used for testing. I then opened the generated .tsv files in Numbers on Mac to generate the graphs.

let url = FileManager.default.temporaryDirectory.appending(path: UUID().uuidString, directoryHint: .isDirectory)
try! FileManager.default.createDirectory(at: url, withIntermediateDirectories: true)
print(url.path())
let source = url.appendingPathComponent("file source")
let destination = url.appendingPathComponent("file destination")

print("page size", getpagesize())
let fileSizes = [1_000, 1_000_000, 10_000_000, 100_000_000, 1_000_000_000, 10_000_000_000, Int(getpagesize()) * 10_000]
let bufferSizes: [Int] = (10..<31).map({ 1 << $0 })
let repetitions = 5

var times = [[TimeInterval]](repeating: [TimeInterval](repeating: 0, count: repetitions), count: bufferSizes.count)
for fileSize in fileSizes {
    print("fileSize", fileSize)
    for (i, bufferSize) in bufferSizes.enumerated() {
        print("bufferSize", bufferSize)
        for j in 0..<repetitions {
            try? FileManager.default.removeItem(at: destination)
            createFile(source: source, size: fileSize)
            createFile(source: destination, size: fileSize)
            let date = Date()
            let result = FileManager.default.fastContentsEqual(atPath: source.path, andPath: destination.path, bufferSize: bufferSize) { _ in true }
            let time = -date.timeIntervalSinceNow
            times[i][j] = time
        }
    }
    
    let header = ([""] + bufferSizes.map({ NumberFormatter.localizedString(from: $0 as NSNumber, number: .decimal) })).joined(separator: "\t")
    try! Data(([header] + (0..<repetitions).map { j in
        (["\(j)"] + (0..<bufferSizes.count).map { i in
            return timeToString(times[i][j])
        }).joined(separator: "\t")
    }).joined(separator: "\n").utf8).write(to: url.appendingPathComponent("\(fileSize).tsv"))
}

func timeToString(_ time: TimeInterval) -> String {
    return String(format: "%.6f", time)
}

func createFile(source: URL, size: Int) {
    let buffer = UnsafeMutableRawBufferPointer.allocate(byteCount: size, alignment: Int(getpagesize()))
    buffer.initializeMemory(as: Int.self, repeating: 0)
    let fp = fopen(source.path, "w")
    let success = fcntl(fileno(fp), F_NOCACHE, 1)
    assert(success == 0)
    let bytes = fwrite(buffer.baseAddress!, 1, size, fp)
    assert(bytes == size)
    fclose(fp)
}
4 Likes

Some miscellaneous thoughts, having looked at it briefly but not measured yet myself:

  • 8kB is clearly too small; a relic from the pre-16kB pages days most likely. One page is probably the smallest reasonable buffer
  • The memory footprint vs time tradeoff is significant. Allocating megabytes here will likely make this unusable for some clients. So we do have conflicting pressures.
  • Would using st_blksize to size the read chunks make sense? Is the fstatfs to find that information expensive enough to not be worth it?
  • Currently we set F_NOCACHE on both file descriptors. Is that the right set of tradeoffs? It might be, I'm not sure
  • Currently we don't use fadvise or fcntl+F_RDADVISE to inform the kernel of our intent to access the full contents of the file once sequentially. Perhaps that could mitigate some of the costs of smaller buffer sizes? Does F_NOCACHE disable that benefit, since the advisory readahead would be done into the cache?
  • It would be interesting to profile and see where the time is actually going. It may not be the cost of the SSD read itself, it could be overhead from context switching, sandbox checks, or filesystem data structures
  • Is the explicit deinitialize of the temporary buffers actually buying us anything? Does it cost anything?
  • Could we amortize the allocation and deallocation costs by asking for one buffer of twice the size, and then using the two halves of it separately?
2 Likes

Great stuff.. But shouldn't things like these be async?

Perhaps, but we'll still need to have the old method for compatibility at the very least, so no harm in making it more efficient.

It would be similar to rearranging deck chairs on the Titanic but yeah, we could do that.

I think this is a great discussion. I'd encourage you to open a PR on the repo so we can iterate there with the help of testing and benchmarking tools.

Hey @nickasd, thanks for this really detailed analysis.

and an Apple Engineer suggested that I open a discussion "there" (hopefully this forum is the right place)

That would be me, and yes you found the right place.

I think improving this implementation is a great idea. We ship as a dynamic library on macOS, iOS, etc. So if we can improve the implementation of a method that apps are already using, everybody benefits for free when they upgrade their OS. We can also work incrementally, with some of the ideas that @David_Smith suggested, which can reduce risk. We have a growing benchmark suite in the project too, which we can add yours to.

Thanks again.

I wanted to note that I checked with some filesystem folks and fcntl+F_RDADVISE is indeed mutually exclusive with F_NOCACHE, so that's an interesting set of tradeoffs

BTW, how well do file manager operations like duplicate and compare work when both files are on the same network share?

Ideal behaviour from user POV (considering APFS on the network disk):

  • duplicate (copy) is O(1)
  • comparing two "copies" is O(1)
  • comparing two files which are not "copies" takes linear time, comparing is done on the remote machine (files' contents' are not copied across the network)

I would literally try to copy-paste my code into the repository without being able to do much more than that (I usually avoid dealing with low-level code). Is that what you meant?

Sounds good to me, but the concepts mentioned by @David_Smith are definitely too low-level for me. I use some of them in my testing code, but if it wasn't for some help I got on the Apple Developer Forums I would never have come up with them. I was just made aware by a user of my app that file comparison is very slow, and the first alternate implementation that I came up with happened to be faster, but at this time I wouldn't be able to explain why it's faster or analyze the performance of the different components.

(This is backwards, you can identify if two files are different as soon as you encounter a difference, but identifying that they’re the same requires reading the whole file unless they’re literally the same inode. A network filesystem cannot promise that a duplicate operation will share inodes.)

Backwards?

To clarify what I wrote above on the "desired behaviour" from the end user POV:

  • if remote FS could duplicate the file quickly (sharing I-node, or similar like APFS) it's doing so "instantly" (1). Otherwise it's reading/writing file on the "server" (file content / bytes are not copied across the network) (2).
  • for a file that's not a copy, but happened to have the same content the slow path (2) is taken. (file content / bytes are not copied across the network).
  • when comparing files their i-nodes are checked first, and if they are equal the file is equal (1), or when they are different both files are read and the bytes are compared. This is done on the server. Similar to above no file content / bytes are not copied across the network.

I don't know how far this ideal is from what we have now!

1 Like

Ah, I misinterpreted "copy" as "a file with the same contents". I don't think most remote filesystems come with a remote-compare-contents operation, unfortunately.

Good to know.

Interestingly FileManager.copyItem to duplicate a file on a remote APFS volume also took linear time while duplicating that file with Finder was instantaneous.

To my disappointment FileManager.contentsEqual takes O(n) time for the two identical local files on APFS volume as well :frowning: (The second file was made via FileManager's copyItem in O(1) time).

I managed to make isDuplicate check that works in O(1) time † on a local APFS drive:

// TODO: this probably does NOT work on non APFS volumes, test and implement some check!
// TODO: this does NOT seem work for remote files (at least those mounted via SMB)! Test and implement some check.
func isDuplicate(_ path1: String, _ path2: String) -> Bool {
    let file1 = open(path1, O_RDONLY)
    if file1 < 0 { return false }
    defer { close(file1) }
    
    let file2 = open(path2, O_RDONLY)
    if file2 < 0 { return false }
    defer { close(file2) }
    
    var stat1 = stat()
    var err = fstat(file1, &stat1)
    if err < 0 { return false }
    
    var stat2 = stat()
    err = fstat(file2, &stat2)
    if err < 0 { return false }
    
    if stat1.st_dev != stat2.st_dev { return false }
    if stat1.st_blocks != stat2.st_blocks { return false }
    if stat1.st_blksize != stat2.st_blksize { return false }
    if stat1.st_ino == stat2.st_ino { return true }
    
    let fileSize = stat1.st_size
    let blockSize = Int64(stat1.st_blksize)
    
    var offset: Int64 = 0
    while offset < fileSize {
        var rec1 = log2phys()
        rec1.l2p_devoffset = offset
        err = fcntl(file1, F_LOG2PHYS_EXT, &rec1);
        if err < 0 { return false }
        
        var rec2 = log2phys()
        rec2.l2p_devoffset = offset
        err = fcntl(file2, F_LOG2PHYS_EXT, &rec2);
        if err < 0 { return false }
        
        if rec1.l2p_devoffset != rec2.l2p_devoffset {
            return false
        }
        offset += blockSize
    }
    return true
}

Note, it doesn't work correctly on a remote AFPS volume, at least when that's mounted via smb.

Subject to fixing this check on non-APFS and network volumes, when using it in to optimise contentsEqual, treat "isDuplicate == true" as a quick "yes, the files are equal" path and when "isDuplicate == false" treat it as "can't tell the result quickly, let's do a thorough comparison check" path. More optimal would be to use the best of both approaches: only compare the blocks that are known to be at different physical offsets.


† Important correction: I was wrong, the time complexity here is still O(n)... The factor upfront is lower compared to read, but not significantly lower than what I thought it would be, with the calls to read being traded to the calls to fcntl (which turn out to be just about twice as fast). There might be a quicker way that drills deeper into APFS structures (e.g. to get advantage of the fact that files are normally layed out in huge continuous spans) but to my knowledge that stuff is undocumented and/or might not be available to user-land code.


What would help to make this approach quicker is a fcntl style call that fills out the struct:

struct SomeNewStruct {
    var physicalOffset: Int
    var size: Int // this could be much larger than a single block!
}

then the code that iterates physical block offsets would jump in huge chunks, typically much larger than 4K or whatever the block size is. That could still be O(n) in the worst case, but in a typical case of not massively fragmented file that would be effectively O(1) time complexity.


Update: turns out I just wasn't using F_LOG2PHYS_EXT API to its full potential; it does have what I was looking just above.

Corrected code that runs about 1000x times faster
// TODO: this probably does NOT work on non APFS volumes, test and implement some check!
// TODO: this does NOT seem work for remote files (at least those mounted via SMB)! Test and implement some check.
func isDuplicate(_ path1: String, _ path2: String) -> Bool {

    let file1 = open(path1, O_RDONLY)
    if file1 < 0 { return false }
    defer { close(file1) }
    
    let file2 = open(path2, O_RDONLY)
    if file2 < 0 { return false }
    defer { close(file2) }
    
    var stat1 = stat()
    var err = fstat(file1, &stat1)
    if err < 0 { return false }
    
    var stat2 = stat()
    err = fstat(file2, &stat2)
    if err < 0 { return false }
    
    if stat1.st_dev != stat2.st_dev { return false }
    if stat1.st_blocks != stat2.st_blocks { return false }
    if stat1.st_blksize != stat2.st_blksize { return false }
    if stat1.st_ino == stat2.st_ino { return true }
    
    let fileSize = stat1.st_size
    
    var i = 0
    var offset: Int64 = 0
    while offset < fileSize {
        var rec1 = log2phys(l2p_flags: 0, l2p_contigbytes: .max, l2p_devoffset: offset)
        err = fcntl(file1, F_LOG2PHYS_EXT, &rec1);
        if err < 0 { return false }
        
        var rec2 = log2phys(l2p_flags: 0, l2p_contigbytes: .max, l2p_devoffset: offset)
        err = fcntl(file2, F_LOG2PHYS_EXT, &rec2);
        if err < 0 { return false }
        
        if rec1.l2p_devoffset != rec2.l2p_devoffset || rec1.l2p_contigbytes != rec2.l2p_contigbytes {
            return false
        }
        offset += rec1.l2p_contigbytes
        i += 1
    }
    return true
}

Time complexity: O(file fragment count)