[kudos] Regex Builder regex actually faster than using a raw regex string

I have two regex functions that I put head to head using XCTest measure, one using a raw regex string and the other using the builder.

The builder's speed was on par with using built in String functions. The raw one was a bunch slower. Over 2x when using named captures. (package manager 5.10 with raw strings enabled)

I haven't looked under the hood yet to figure out why (could be it's just better at writing regex's than me!), but if any old regex hands are wondering if it's worth it to switch... try it!

Nice work!

(Yes, Scanner and a custom scanner were even faster, but when you have the regex...)

//input string
"commonName:spider plant/whereAcquired:Trader Joe's/dateAcquired:2024-03-12 16:12:07"
//0.000454, 0.000343 without names
static func parse_regex(_ inputString:String) throws -> Dictionary<String,String>{
    let pattern = /(?:^|\A|\G)(?:\s*(?<key>.+?)\s*:\s*(?<value>.+?)\s*)(?:\/|$)/
    var dictionary:Dictionary<String, String> = [:]
    let matches = inputString.matches(of: pattern)
    for match in matches {
        dictionary[String(match.output.key)] = String(match.output.value)
    }
    return dictionary
}

Builder version

//0.00019 average
static func parse_regexLong(_ inputString:String) -> Dictionary<String,String>{
    let key = Reference(Substring.self)
    let value = Reference(Substring.self)
    let pattern = Regex {
        ChoiceOf {
            Anchor.startOfLine //^
            Anchor.startOfSubject //A
            Anchor.firstMatchingPositionInSubject //G
        }
        Regex {
            ZeroOrMore { CharacterClass.whitespace }
            Capture(as: key) {
                OneOrMore(.reluctant) { CharacterClass.any }
            }
            ZeroOrMore { CharacterClass.whitespace }
            ":"
            ZeroOrMore { CharacterClass.whitespace }
            Capture(as: value) {
                OneOrMore(.reluctant) { CharacterClass.any }
            }
            ZeroOrMore { CharacterClass.whitespace }
        }
        ChoiceOf {
            "/"
            Anchor.endOfLine
        }
    }
    var dictionary:Dictionary<String, String> = [:]
    let matches = inputString.matches(of: pattern)
    for match in matches {
        dictionary[String(match[key])] = String(match[value])
    }
    return dictionary
}

Similar to RegEx builder in speed:

   //0.000195
    static func parse(_ inputString:String) throws -> Dictionary<String,String> {
        let fullKeyValueList = inputString.split(separator: "/")
        var result:Dictionary<String,String> = [:]
        return fullKeyValueList.reduce(into: result){ result, item in
            let kvSplit = item.split(separator: ":", maxSplits: 1)
            let key = String(kvSplit[0]).trimmingCharacters(in: .whitespacesAndNewlines)
            let value = String(kvSplit[1]).trimmingCharacters(in: .whitespacesAndNewlines)
            result[key] = value
        }
    }
    
    //0.000113
    static func parse_stringIndex(_ inputString:String) throws -> Dictionary<String,String> {
        let fullKeyValueList = inputString.split(separator: "/")
        var result:Dictionary<String,String> = [:]
        return try fullKeyValueList.reduce(into: result){ result, item in
            if let firstColonIndex = item.firstIndex(of: ":") {
                let key = item
                    .prefix(upTo: firstColonIndex)
                    .trimmingCharacters(in: .whitespacesAndNewlines)
                let value = item
                    .suffix(from: item.index(firstColonIndex, offsetBy: 1))
                    .trimmingCharacters(in: .whitespacesAndNewlines)
                result[String(key)] = String(value)
            } else {
                throw HousePlantError.notAKeyValuePair
            }
        }
    }
2 Likes