Split line on a separator with optional whitespace

I have a string like this:

key: value

How would I best split the line into key and value variables on the first occurence of :, possibly followed by a single space? I am looking for something like this in Perl:

my ($key, $val) = split(/: ?/, "key: value")

So far I have only came up with this slightly nightmarish version:

func keyValueSplit<T: StringProtocol>(_ str: T) -> (String, String)? {
    guard let sep = str.firstIndex(of: ":") else { return nil }
    let rest =
        str[str.index(after: sep)] == " " ?
        str.index(sep, offsetBy: 2) :
        str.index(after: sep)
    let fst = str.prefix(upTo: sep)
    let snd = str.suffix(from: rest)
    return (String(fst), String(snd))
}
let whitespace:Set<Character> = [" "]
"key: value".split(separator: ":").map
{
    $0.drop
    {
        whitespace.contains($0) 
    }
}.map(String.init)

If Foundation is okay for you, how about this?

extension String {
  var keyValuePair: (String, String)? {
    let parts = split(separator: ":")
    guard parts.count == 2 else { return nil }
    let set = CharacterSet(charactersIn: " ")
    return (String(parts[0]), parts[1].trimmingCharacters(in: set))
  }
}
extension String {
  var trim: String {
    return String(self.drop { " ".contains($0) })
  }
  
  var keyValue: (key: String, value: String)? {
    let e = self.split(separator: ":", maxSplits: 1).map(String.init)
    guard e.count >= 2 else { return nil }
    return (e.first!, e.last!.trim )
  }
}

let (key, value) = "key: value".toKeyValue

Edit: fixed re @Avi

Thank you very much for the ideas! So far I have ended up with this:

// XCTAssertNil(decodeKeyValuePair(""))
// XCTAssertNil(decodeKeyValuePair("foo"))
// XCTAssertNil(decodeKeyValuePair("foo "))
// XCTAssertEqual(decodeKeyValuePair("foo:"), ("foo", ""))
// XCTAssertEqual(decodeKeyValuePair("foo: "), ("foo", ""))
// XCTAssertEqual(decodeKeyValuePair("foo:bar"), ("foo", "bar"))
// XCTAssertEqual(decodeKeyValuePair("foo: bar"), ("foo", "bar"))
// XCTAssertEqual(decodeKeyValuePair("foo:  bar"), ("foo", "bar"))
// XCTAssertEqual(decodeKeyValuePair("foo: bar: baz"), ("foo", "bar: baz"))
func decodeKeyValuePair(_ str: String) -> (String, String)? {

    guard var sep = str.firstIndex(of: ":") else {
        return nil
    }

    let key = String(str.prefix(upTo: sep))

    // Skip spaces
    let last = str.index(before: str.endIndex)
    while let next = str.index(sep, offsetBy: 1, limitedBy: last), str[next] == " " {
        sep = next
    }

    let val = String(str.suffix(from: str.index(after: sep)))

    return (key, val)
}

Makes me wish for something like the parsing API mentioned before.

This solution does not allow a : to appear in the value. That may be unreasonable in many cases. One such example is SSE, where the values are expected to be JSON objects.

1 Like

You can do a regular expression search with the same pattern:

import Foundation

func decodeKeyValuePair(_ str: String) -> (String, String)? {
    guard let sep = str.range(of: ": ?", options: .regularExpression) else {
        return nil
    }
    return (String(str[..<sep.lowerBound]), String(str[sep.upperBound...]))
}

Or, slightly obfuscated:

func decodeKeyValuePair(_ str: String) -> (String, String)? {
    return str.range(of: ": ?", options: .regularExpression).map {
        (String(str[..<$0.lowerBound]), String(str[$0.upperBound...]))
    }
}
1 Like

Wow, these are really good, thank you. And the performance difference is negligible.

Regular expressions were always my go-to working tool in Perl for string processing, I wish we could make them as good/accessible in Swift. Maybe introduce a special quoting operator for them, something like:

let sep = str.range(of: /: ?/)

I guess they could even be compiled down during the compilation phase. It’s a shame such a useful tool isn’t more at hand. At least something like this throughout the string APIs:

struct Regex {
    let pattern: String
}

prefix operator ~=

prefix func ~=(_ pattern: String) -> Regex {
    return Regex(pattern: pattern)
}

let str = "foo: bar"
let sep = str.range(of: ~=":\\s*")

extension String {
    func range(of regex: Regex) -> Range<Index>? {
        return self.range(of: regex.pattern, options: .regularExpression)
    }
}

(This is pure brain damage, but I’m sure we could come up with a layer of reasonable regexp usability and accessibility improvements across the string APIs.)

1 Like

If you search around you will find posts about how regular expressions and more general parsing could work in Swift in the future. I think @Chris_Lattner3 has posted some interesting ideas in this area, particularly.

I actually have a trimming() method for StringProtocol (and really, BidirectionalCollection) that I've been working on, which would allow for this:

extension StringProtocol {
    var asKeyValuePair: (SubSequence, SubSequence)? {
        let parts = split(separator: ":", maxSplits: 1)
        guard parts.count == 2 else { return nil }
        return (parts[parts.startIndex].trimming(), parts[parts.lastIndex].trimming())
    }
}

Usage example:

let str = " aKey : :aValue: "
if let (k, v) = str.asKeyValuePair {
    print("key = '\(k)'")
    print("val = '\(v)'")
}
else {
    print("Invalid key/value string: '\(str)'")
}

I'm going to post it in another thread soon (this weekend?).

I see, you’re right, these are two interesting threads that I have found:

TLDR: After Swift 5.

1 Like