MathExpression.swift
//
// MathExpression.swift
// Universal Pattern Recognizer
//
// Created by Maximilian Servais on 13.03.26.
//
// A complete math expression parser built with Rule.
//
// This example demonstrates how Rule scales from simple patterns to a full
// recursive-descent parser with operator precedence, functions, error recovery,
// and capture tree evaluation — all built from the same composable primitives.
//
// Grammar:
//
// root = gap? (expression | gap | unknown)+
// expression = term (('+' | '-') term)*
// term = factor (('*' | '/') factor)*
// factor = '-'? (number | function | parenthesized)
// function = functionKey '(' expression ')'
// parenthesized = '(' expression ')'
// number = hexadecimal | decimal | integer
// hexadecimal = '#' hexDigits
// decimal = digits '.' digits | '.' digits | digits '.'
// integer = digits
// unknown = any single character (error recovery)
//
import Foundation
private enum Token {
case root, expression, term, factor, function, parenthesized, number, hexadecimal, decimal, integer, unknown
case negate, plus, minus, multiply, divide
case sin, cos, sqrt, abs
}
// MARK: - Diagnostics
/// A diagnostic marker at a specific position in the input.
private struct Diagnostic {
let name: Token
let startOffset: Int
let length: Int
}
/// Collects all "Unknown Expression" diagnostics from a capture tree.
private func collectDiagnostics(_ capture: RuleCapture<String,Token>, in source: String) -> [Diagnostic] {
var results: [Diagnostic] = []
if capture.name == .unknown {
let startOffset = source.distance(from: source.startIndex, to: capture.range.lowerBound)
let length = source.distance(from: capture.range.lowerBound, to: capture.range.upperBound)
results.append(Diagnostic(name: capture.name, startOffset: startOffset, length: length))
}
for inner in capture.innerCaptures {
results.append(contentsOf: collectDiagnostics(inner, in: source))
}
return results
}
/// Prints the input with a visual marker line below any unknown tokens.
///
/// ┌──────────────────────────────────────┐
/// │ Diagnostic: Unknown expression │
/// │ │
/// │ 123+456+yxc+(2*5*zu) │
/// │ ^^^ ^^ │
/// └──────────────────────────────────────┘
private func printDiagnostics(for input: String, capture: RuleCapture<String,Token>) {
let diagnostics = collectDiagnostics(capture, in: input)
let label = diagnostics.isEmpty
? "Diagnostic: No issues"
: "Diagnostic: Unknown expression"
var markerChars = Array(repeating: Character(" "), count: input.count)
for diag in diagnostics {
for offset in diag.startOffset..<(diag.startOffset + diag.length) {
if offset < markerChars.count { markerChars[offset] = "^" }
}
}
let markerLine = String(markerChars)
let contentWidth = max(input.count, label.count)
let border = String(repeating: "─", count: contentWidth + 2)
let pad = { (s: String) in s + String(repeating: " ", count: contentWidth - s.count) }
print("┌" + border + "┐")
print("│ " + pad(label) + " │")
print("│ " + pad("") + " │")
print("│ " + pad(input) + " │")
if !diagnostics.isEmpty { print("│ " + pad(markerLine) + " │") }
print("└" + border + "┘")
}
// MARK: - Evaluation
/// Walks the capture tree and computes a numeric result.
///
/// Every decision is based on capture names — the source text is only
/// read for number literals. Operators and functions are identified
/// purely by their capture names.
private func evaluateMath(_ capture: RuleCapture<String,Token>, in source: String) -> Double {
switch capture.name {
case .root:
if capture.innerCaptures.count == 1,
capture.innerCaptures[0].name == .expression {
return evaluateMath(capture.innerCaptures[0], in: source)
}
return .nan
case .expression:
var result = evaluateMath(capture.innerCaptures[0], in: source)
var i = 1
while i + 1 < capture.innerCaptures.count {
let op = capture.innerCaptures[i].name
let rhs = evaluateMath(capture.innerCaptures[i + 1], in: source)
if op == .plus { result += rhs }
else if op == .minus { result -= rhs }
i += 2
}
return result
case .term:
var result = evaluateMath(capture.innerCaptures[0], in: source)
var i = 1
while i + 1 < capture.innerCaptures.count {
let op = capture.innerCaptures[i].name
let rhs = evaluateMath(capture.innerCaptures[i + 1], in: source)
if op == .multiply { result *= rhs }
else if op == .divide { result /= rhs }
i += 2
}
return result
case .factor:
var sign = 1.0
var value = 0.0
for inner in capture.innerCaptures {
if inner.name == .negate { sign = -1.0 }
else { value = evaluateMath(inner, in: source) }
}
return sign * value
case .integer:
return Double(String(source[capture.range]))!
case .decimal:
return Double(String(source[capture.range]))!
case .hexadecimal:
let hex = String(source[capture.range]).dropFirst() // skip '#'
return Double(UInt64(hex, radix: 16) ?? 0)
case .parenthesized:
for inner in capture.innerCaptures {
if inner.name == .expression { return evaluateMath(inner, in: source) }
}
preconditionFailure("Parenthesized expression without inner Expression")
case .function:
var funcName = Token.unknown
var argument = 0.0
for inner in capture.innerCaptures {
switch inner.name {
case .sin, .cos, .sqrt, .abs: funcName = inner.name
case .expression: argument = evaluateMath(inner, in: source)
default: break
}
}
switch funcName {
case .sin: return sin(argument)
case .cos: return cos(argument)
case .sqrt: return sqrt(argument)
case .abs: return abs(argument)
default: preconditionFailure("Unknown function")
}
case .unknown:
return .nan
default:
preconditionFailure("Unknown capture name: \(capture.name)")
}
}
// MARK: - Demo
/// Runs the math expression parser demo.
///
/// Parses, evaluates, and prints a series of math expressions covering
/// basic arithmetic, operator precedence, parentheses, negation, decimals,
/// hexadecimals, functions, error recovery, and capture tree output.
func mathExpressionDemo() {
typealias R = Rule<String,Token>
// --- Character classes ---
let unicode16Digit = R.unicode("0"..."9")
let unicode16Hex = R.characters("0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f","A","B","C","D","E","F")
// let unicode16Letter = R.unicode(.letters)
let unicode16Whitespace = R.characters(" ")
// --- Character sequences ---
let digits = R.oneOrMore(unicode16Digit)
let hexadecimals = R.oneOrMore(unicode16Hex)
let gap = R.oneOrMore(unicode16Whitespace)
// --- Punctuation ---
let decimalSeparator = R.characters(".")
let parenthesisOpen = R.characters("(")
let parenthesisClose = R.characters(")")
// --- Operators ---
// Each operator has its own capture name so the evaluation function can
// identify it without reading the source text.
// "Negate" is separate from "Minus": Minus is binary (a - b), Negate is unary (-a).
let plus = R.characters("+").capture(as: .plus)
let minus = R.characters("-").capture(as: .minus)
let negate = R.characters("-").capture(as: .negate)
let multiply = R.characters("*").capture(as: .multiply)
let divide = R.characters("/").capture(as: .divide)
let addOrSubtract = R.firstMatch(plus, minus)
let multiplyOrDivide = R.firstMatch(multiply, divide)
// --- Keywords ---
// Case insensitive: sin, SIN, and Sin all match.
// longestMatch ensures "sinh" would win over "sin" if added later.
let functionKey = R.longestMatch(
R.word("sin", caseInsensitive: true).capture(as: .sin),
R.word("cos", caseInsensitive: true).capture(as: .cos),
R.word("sqrt", caseInsensitive: true).capture(as: .sqrt),
R.word("abs", caseInsensitive: true).capture(as: .abs)
)
// --- Numbers ---
// longestMatch ensures "3.14" matches as Decimal, not Integer "3".
let integer = digits.capture(as: .integer)
let decimal = R.longestMatch(
R.sequence(digits, decimalSeparator, digits),
R.sequence(decimalSeparator, digits),
R.sequence(digits, decimalSeparator)
).capture(as: .decimal)
let hexadecimal = R.sequence(R.characters("#"), hexadecimals).capture(as: .hexadecimal)
let number = R.longestMatch(hexadecimal, decimal, integer)
// --- Error recovery ---
let unknownExpression = R.any().capture(as: .unknown)
// --- Grammar ---
// The RuleLibrary enables recursion: an expression can contain a
// parenthesized expression, which again contains an expression.
enum GrammarKey { case expression, term, factor }
let grammar = RuleLibrary<GrammarKey, String, Token>()
let expression = R.rule(.expression, in: grammar)
let term = R.rule(.term, in: grammar)
let factor = R.rule(.factor, in: grammar)
let parenthesizedExpression = R.sequence(
parenthesisOpen, gap.optional, expression, gap.optional, parenthesisClose
).capture(as: .parenthesized)
let function = R.sequence(
functionKey, parenthesisOpen, gap.optional, expression, gap.optional, parenthesisClose
).capture(as: .function)
// Factor: optional negation + number, function, or parenthesized expression.
grammar[.factor] = R.sequence(
negate.optional, .firstMatch(number, function, parenthesizedExpression)
).capture(as: .factor)
// Term: factors joined by * or /. Left-to-right: a * b / c = (a * b) / c.
grammar[.term] = R.sequence(
factor, .zeroOrMore(.sequence(gap.optional, multiplyOrDivide, gap.optional, factor))
).capture(as: .term)
// Expression: terms joined by + or -. Left-to-right: a + b - c = (a + b) - c.
grammar[.expression] = R.sequence(
term, .zeroOrMore(R.sequence(gap.optional, addOrSubtract, gap.optional, term))
).capture(as: .expression)
// --- Parse ---
// The outer loop tries expression first, then skips whitespace, then
// captures one unknown character as fallback. This ensures the parser
// never fails — unknown input is visible in the capture tree.
let fullMatch = R.sequence(
gap.optional,
.oneOrMore(.firstMatch(expression, gap, unknownExpression)),
.endOfSequence()
).capture(as: .root)
func parse(_ input: String) -> RuleCapture<String,Token>? {
fullMatch.evaluate(input)?.captures.first
}
// --- Test helper ---
func test(_ input: String, expected: Double) {
guard let root = parse(input) else {
print("[FAIL] '\(input)' — parse failed")
return
}
let result = evaluateMath(root, in: input)
let passed = expected.isNaN ? result.isNaN : Swift.abs(result - expected) < 0.0001
if passed { print("[PASS] '\(input)' = \(result)") }
else { print("[FAIL] '\(input)' = \(result) (expected \(expected))") }
if !collectDiagnostics(root, in: input).isEmpty {
printDiagnostics(for: input, capture: root)
}
}
// --- Tests ---
print("--- Basic arithmetic ---\n")
test("1 + 2", expected: 3)
test("10 - 3", expected: 7)
test("2 * 3", expected: 6)
test("10 / 4", expected: 2.5)
print("\n--- Operator precedence ---\n")
test("2 + 3 * 4", expected: 14)
test("10 - 2 * 3", expected: 4)
test("2 * 3 + 4 * 5", expected: 26)
print("\n--- Parentheses ---\n")
test("(2 + 3) * 4", expected: 20)
test("((1 + 2))", expected: 3)
test("(2 + 3) * (4 - 1)", expected: 15)
test("(3 + ((2 + 6) / 4)) * (6 - 1)", expected: 25)
print("\n--- Negation ---\n")
test("-5", expected: -5)
test("-5 + 10", expected: 5)
test("-(2 + 3)", expected: -5)
test("-2 * -3", expected: 6)
print("\n--- Integers ---\n")
test("42", expected: 42)
test("100 + 200", expected: 300)
print("\n--- Decimals ---\n")
test("3.14", expected: 3.14)
test("0.5 + 0.5", expected: 1.0)
test("2.5 * 4", expected: 10.0)
test(".5 + .5", expected: 1.0)
test("3. + .14", expected: 3.14)
print("\n--- Hexadecimal ---\n")
test("#FF", expected: 255)
test("#10 + #10", expected: 32)
test("#A * 2", expected: 20)
print("\n--- Functions ---\n")
test("sqrt(9)", expected: 3.0)
test("abs(0 - 5)", expected: 5.0)
test("sqrt(3 * 3 + 4 * 4)", expected: 5.0)
test("cos(0)", expected: 1.0)
test("SIN(0)", expected: 0.0)
test("Sqrt(16)", expected: 4.0)
print("\n--- Unknown tokens ---\n")
test("123+456+yxc+(2*5*zu)", expected: .nan)
print("\n--- Capture tree ---\n")
let example = "2 + 3 * ( 4 + 1 )"
if let root = parse(example) {
print("'\(example)':\n")
print(root.debugDescription)
// Root [0..<18]
// ╰─ Expression [0..<18]
// ├─ Term [0..<1]
// │ ╰─ Factor [0..<1]
// │ ╰─ Integer [0..<1]
// ├─ Plus [2..<3]
// ╰─ Term [4..<18]
// ├─ Factor [4..<5]
// │ ╰─ Integer [4..<5]
// ├─ Multiply [6..<7]
// ╰─ Factor [8..<18]
// ╰─ ParenthesizedExpression [8..<18]
// ╰─ Expression [10..<15]
// ├─ Term [10..<11]
// │ ╰─ Factor [10..<11]
// │ ╰─ Integer [10..<11]
// ├─ Plus [12..<13]
// ╰─ Term [14..<15]
// ╰─ Factor [14..<15]
// ╰─ Integer [14..<15]
}
}
BenchmarkInput.swift
//
// BenchmarkInput.swift
// Universal Pattern Recognizer
//
// Created by Maximilian Servais on 22.03.26.
//
// Deterministic test data and timing infrastructure for performance benchmarks.
//
// The text is built from two sections:
//
// Flat section — 6 000 log-like lines (~510 000 characters)
// ────────────────────────────────────────────────────────────
// These patterns can be matched by Swift Regex and Rule alike.
// They are designed to stress pure throughput on a large, realistic input.
//
// Dates "DD.MM.YYYY" 6 000 instances
// Quoted strings 6 000 instances
// Numbers (int + decimal) ~18 000 instances
// Words ~60 000 instances
//
// Nested section — ~1 400 expressions (~75 000 characters)
// ────────────────────────────────────────────────────────────
// These patterns require counting nesting depth. Classical regular expressions
// cannot handle them; Rule can, using the same composable primitives.
//
// Balanced parentheses 1 400 expressions (depths 2–8, 200 per depth)
// Block comments 700 expressions (depths 1–4, 175 per depth)
//
// Total input size: ~585 000 characters.
//
import Foundation
// MARK: - Test Input
/// The shared input for all benchmarks.
///
/// Computed once on first access and reused across every benchmark function.
/// The content is fully deterministic — identical across all runs.
let benchmarkText: String = {
var lines: [String] = []
lines.reserveCapacity(8_500)
// ── Flat section ────────────────────────────────────────────────────────
let levels = ["INFO", "WARN", "ERROR"]
let methods = ["GET", "POST", "PUT", "DELETE", "PATCH"]
let paths = (0..<20).map { "/api/v1/resource/\($0)" }
let statuses = [200, 201, 400, 404, 500]
// Cycle through five valid dates so every line contains DD.MM.YYYY.
let dates = ["01.01.2025", "15.06.2025", "22.03.2026", "07.11.2024", "30.09.2026"]
for i in 0..<6_000 {
let date = dates[i % dates.count]
let hh = String(format: "%02d", (i / 3_600) % 24)
let mm = String(format: "%02d", (i / 60) % 60)
let ss = String(format: "%02d", i % 60)
let level = levels[i % levels.count]
let method = methods[i % methods.count]
let path = paths[i % paths.count]
// Two numbers per line: a decimal response time and an integer status code.
let ms = Double(i % 500) + 0.1 * Double((i * 7) % 10)
let status = statuses[i % statuses.count]
lines.append(
"\(date) \(hh):\(mm):\(ss) [\(level)] \(method) \"\(path)\" completed in \(ms)ms with status \(status)"
)
}
lines.append("") // section separator
// ── Nested section: balanced parentheses ────────────────────────────────
//
// Builds arithmetic expressions with parentheses nested `depth` levels deep.
//
// depth 2: (1+2)
// depth 3: ((1+2)*3)
// depth 4: (((1+2)*3)+4)
// …
//
// A rule can find all matching groups recursively in a single pass.
// A classical regex cannot count nesting depth at all.
for depth in 2...8 {
for base in 0..<200 {
var expr = "\(base % 9 + 1)"
for d in 1..<depth {
let op: Character = d % 2 == 0 ? "+" : "*"
expr = "(\(expr)\(op)\((base + d) % 9 + 1))"
}
lines.append(expr)
}
}
lines.append("") // section separator
// ── Nested section: block comments ──────────────────────────────────────
//
// Builds C-style block comments nested `depth` levels deep.
//
// depth 1: /* comment */
// depth 2: /* outer /* comment */ outer */
// depth 3: /* a /* b /* comment */ b */ a */
//
// A rule with a recursive grammar matches the outermost comment in one pass
// and exposes all inner comments as captures — no second pass required.
// Classical regex fails even at depth 2.
for depth in 1...4 {
for i in 0..<175 {
var comment = "body \(i)"
for d in 0..<depth {
comment = "/* L\(d) \(comment) */"
}
lines.append(comment)
}
}
return lines.joined(separator: "\n")
}()
// MARK: - Benchmark Print
/// Prints a single benchmark result line with aligned columns.
private func printResult(_ label: String, matches: Int, averageMs: Double) {
let namePart = label.padding(toLength: nameColumnWidth, withPad: " ", startingAt: 0)
let matchPart = "Matches " + String(matches).padding(toLength: matchColumnWidth, withPad: " ", startingAt: 0)
let timePart = String(format: "%.1f ms", averageMs)
print(" \(namePart) \(matchPart) \(timePart)")
}
// MARK: - Benchmark Infrastructure
/// Measures the average wall-clock time of `block` over `iterations` runs.
///
/// - Parameters:
/// - label: A short description printed in the results table.
/// - iterations: Number of timed runs after one warmup run (default: 5).
/// - block: The work to measure. Returns an `Int` so the compiler
/// cannot optimise the call away.
///
/// The function prints one formatted result line and returns the average in milliseconds.
@discardableResult
func measure(_ label: String, iterations: Int = 10, block: () -> Int) -> Double {
// Warmup: one untimed run to prime caches and JIT.
var sink = block()
let startTime = Date()
for _ in 0..<iterations {
sink = block()
}
let totalTime = -startTime.timeIntervalSinceNow * 1_000 // convert to ms
let avg = totalTime / Double(iterations)
// Touch sink so the compiler cannot discard the block's side-effects
// in case print(" Matches found: \(sink)") is removed.
_ = sink
// Print result
printResult( label, matches: sink, averageMs: avg)
return avg
}
/// Column widths for aligned benchmark output.
private let nameColumnWidth = 45
private let matchColumnWidth = 10
private let timeColumnWidth = 12