Skip to content

Commit 9f0c459

Browse files
authored
[perf] Switch to direct generation of the DSL List (#841)
Implements the direct AST -> DSLList conversion
1 parent f27b1b7 commit 9f0c459

16 files changed

+721
-96
lines changed

Sources/_StringProcessing/ByteCodeGen+DSLList.swift

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,59 @@ fileprivate extension Compiler.ByteCodeGen {
351351
}
352352
}
353353

354+
func _guaranteesForwardProgressImpl(_ list: ArraySlice<DSLTree.Node>, position: inout Int) -> Bool {
355+
guard position < list.endIndex else { return false }
356+
let node = list[position]
357+
position += 1
358+
switch node {
359+
case .orderedChoice(let children):
360+
return (0..<children.count).allSatisfy { _ in
361+
_guaranteesForwardProgressImpl(list, position: &position)
362+
}
363+
case .concatenation(let children):
364+
return (0..<children.count).contains { _ in
365+
_guaranteesForwardProgressImpl(list, position: &position)
366+
}
367+
case .capture(_, _, _, _):
368+
return _guaranteesForwardProgressImpl(list, position: &position)
369+
case .nonCapturingGroup(let kind, _):
370+
switch kind.ast {
371+
case .lookahead, .negativeLookahead, .lookbehind, .negativeLookbehind:
372+
return false
373+
default:
374+
return _guaranteesForwardProgressImpl(list, position: &position)
375+
}
376+
case .atom(let atom):
377+
switch atom {
378+
case .changeMatchingOptions, .assertion: return false
379+
// Captures may be nil so backreferences may be zero length matches
380+
case .backreference: return false
381+
default: return true
382+
}
383+
case .trivia, .empty:
384+
return false
385+
case .quotedLiteral(let string):
386+
return !string.isEmpty
387+
case .consumer, .matcher:
388+
// Allow zero width consumers and matchers
389+
return false
390+
case .customCharacterClass(let ccc):
391+
return ccc.guaranteesForwardProgress
392+
case .quantification(let amount, _, _):
393+
let (atLeast, _) = amount.ast.bounds
394+
guard let atLeast, atLeast > 0 else { return false }
395+
return _guaranteesForwardProgressImpl(list, position: &position)
396+
case .limitCaptureNesting, .ignoreCapturesInTypedOutput:
397+
return _guaranteesForwardProgressImpl(list, position: &position)
398+
default: return false
399+
}
400+
}
401+
402+
func guaranteesForwardProgress(_ list: ArraySlice<DSLTree.Node>) -> Bool {
403+
var pos = list.startIndex
404+
return _guaranteesForwardProgressImpl(list, position: &pos)
405+
}
406+
354407
mutating func emitQuantification(
355408
_ amount: AST.Quantification.Amount,
356409
_ kind: DSLTree.QuantificationKind,
@@ -526,8 +579,8 @@ fileprivate extension Compiler.ByteCodeGen {
526579
let startPosition: PositionRegister?
527580
// FIXME: forward progress check?!
528581
let emitPositionChecking =
529-
(!optimizationsEnabled || (list.first?.guaranteesForwardProgress != true)) &&
530-
maxExtraTrips == nil
582+
(!optimizationsEnabled || !guaranteesForwardProgress(list))
583+
&& maxExtraTrips == nil
531584

532585
if emitPositionChecking {
533586
startPosition = builder.makePositionRegister()

Sources/_StringProcessing/Compiler.swift

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,30 @@
1212
internal import _RegexParser
1313

1414
class Compiler {
15-
let tree: DSLTree
15+
let tree: DSLList
1616

1717
// TODO: Or are these stored on the tree?
1818
var options = MatchingOptions()
1919
private var compileOptions: _CompileOptions = .default
2020

2121
init(ast: AST) {
22-
self.tree = ast.dslTree
22+
self.tree = DSLList(tree: ast.dslTree)
2323
}
2424

2525
init(tree: DSLTree) {
26-
self.tree = tree
26+
self.tree = DSLList(tree: tree)
27+
}
28+
29+
init(list: DSLList) {
30+
self.tree = list
2731
}
2832

2933
init(tree: DSLTree, compileOptions: _CompileOptions) {
34+
self.tree = DSLList(tree: tree)
35+
self.compileOptions = compileOptions
36+
}
37+
38+
init(tree: DSLList, compileOptions: _CompileOptions) {
3039
self.tree = tree
3140
self.compileOptions = compileOptions
3241
}
@@ -42,18 +51,20 @@ class Compiler {
4251
compileOptions:
4352
compileOptions,
4453
captureList: tree.captureList)
45-
return try codegen.emitRoot(tree.root)
54+
fatalError()
55+
// return try codegen.emitRoot(tree.root)
4656
}
4757

4858
__consuming func emitViaList() throws -> MEProgram {
4959
// TODO: Handle global options
50-
var dslList = DSLList(tree: tree)
60+
// var dslList = DSLList(tree: tree)
5161
var codegen = ByteCodeGen(
5262
options: options,
5363
compileOptions:
5464
compileOptions,
5565
captureList: tree.captureList)
56-
return try codegen.emitRoot(&dslList)
66+
var tree = tree
67+
return try codegen.emitRoot(&tree)
5768
}
5869
}
5970

@@ -105,20 +116,22 @@ func _compileRegex(
105116
_ syntax: SyntaxOptions = .traditional,
106117
_ semanticLevel: RegexSemanticLevel? = nil
107118
) throws -> MEProgram {
108-
let ast = try parse(regex, syntax)
109-
let dsl: DSLTree
119+
var ast = try parse(regex, syntax)
120+
let dsl: DSLList
110121

111122
switch semanticLevel?.base {
112123
case .graphemeCluster:
113124
let sequence = AST.MatchingOptionSequence(adding: [.init(.graphemeClusterSemantics, location: .fake)])
114-
dsl = DSLTree(.nonCapturingGroup(.init(ast: .changeMatchingOptions(sequence)), ast.dslTree.root))
125+
ast.root = AST.Node.group(AST.Group(.init(faking: .changeMatchingOptions(sequence)), ast.root, .fake))
126+
dsl = DSLList(ast: ast)
115127
case .unicodeScalar:
116128
let sequence = AST.MatchingOptionSequence(adding: [.init(.unicodeScalarSemantics, location: .fake)])
117-
dsl = DSLTree(.nonCapturingGroup(.init(ast: .changeMatchingOptions(sequence)), ast.dslTree.root))
129+
ast.root = AST.Node.group(AST.Group(.init(faking: .changeMatchingOptions(sequence)), ast.root, .fake))
130+
dsl = DSLList(ast: ast)
118131
case .none:
119-
dsl = ast.dslTree
132+
dsl = DSLList(ast: ast)
120133
}
121-
let program = try Compiler(tree: dsl).emit()
134+
let program = try Compiler(list: dsl).emit()
122135
return program
123136
}
124137

Sources/_StringProcessing/LiteralPrinter.swift

Lines changed: 164 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ extension Regex {
3636
@available(SwiftStdlib 6.0, *)
3737
public var _literalPattern: String? {
3838
var gen = LiteralPrinter(options: MatchingOptions())
39-
gen.outputNode(self.program.tree.root)
39+
var list = self.program.list.nodes[...]
40+
try? gen.outputList(&list)
4041
return gen.canonicalLiteralString
4142
}
4243
}
@@ -83,6 +84,159 @@ fileprivate struct LiteralPrinter {
8384
mutating func saveInconvertible(_ node: DSLTree.Node) {
8485
segments.append(.inconvertible(node))
8586
}
87+
88+
mutating func inconvertible(_ node: DSLTree.Node) throws {
89+
segments.append(.inconvertible(node))
90+
throw Incovertible.error
91+
}
92+
}
93+
94+
extension LiteralPrinter {
95+
enum Incovertible: Error {
96+
case error
97+
}
98+
99+
mutating func outputList(_ list: inout ArraySlice<DSLTree.Node>) throws {
100+
guard let node = list.popFirst() else {
101+
return
102+
}
103+
104+
switch node {
105+
case let .orderedChoice(children):
106+
try outputAlternation(&list, count: children.count)
107+
case let .concatenation(children):
108+
try outputConcatenation(&list, count: children.count)
109+
110+
case let .capture(name, nil, _, nil):
111+
options.beginScope()
112+
defer { options.endScope() }
113+
try outputCapture(&list, name: name)
114+
case .capture:
115+
// Captures that use a reference or a transform are unsupported
116+
try inconvertible(node)
117+
return
118+
119+
case let .nonCapturingGroup(kind, _):
120+
guard let kindPattern = kind._patternString else {
121+
try inconvertible(node)
122+
return
123+
}
124+
options.beginScope()
125+
defer { options.endScope() }
126+
127+
output(kindPattern)
128+
if case .changeMatchingOptions(let optionSequence) = kind.ast {
129+
options.apply(optionSequence)
130+
}
131+
try outputList(&list)
132+
output(")")
133+
134+
case .ignoreCapturesInTypedOutput(_),
135+
.limitCaptureNesting(_):
136+
try outputList(&list)
137+
case let .quantification(amount, kind, _):
138+
try outputQuantification(&list, amount: amount, kind: kind)
139+
case let .customCharacterClass(charClass):
140+
outputCustomCharacterClass(charClass)
141+
case let .atom(atom):
142+
outputAtom(atom)
143+
case let .quotedLiteral(literal):
144+
output(prepareQuotedLiteral(literal))
145+
146+
case .trivia(_):
147+
// TODO: Include trivia?
148+
return
149+
case .empty:
150+
return
151+
152+
case .conditional, .absentFunction, .consumer, .matcher, .characterPredicate:
153+
saveInconvertible(node)
154+
}
155+
}
156+
157+
mutating func outputAlternation(_ list: inout ArraySlice<DSLTree.Node>, count: Int) throws {
158+
for i in 0..<count {
159+
if i != 0 {
160+
output("|")
161+
}
162+
try outputList(&list)
163+
}
164+
}
165+
166+
mutating func outputConcatenation(_ list: inout ArraySlice<DSLTree.Node>, count: Int) throws {
167+
for _ in 0..<count {
168+
try outputList(&list)
169+
}
170+
}
171+
172+
mutating func outputCapture(_ list: inout ArraySlice<DSLTree.Node>, name: String?) throws {
173+
if let name {
174+
output("(?<\(name)>")
175+
} else {
176+
output("(")
177+
}
178+
try outputList(&list)
179+
output(")")
180+
}
181+
182+
func requiresGrouping(_ list: ArraySlice<DSLTree.Node>) -> Bool {
183+
guard let node = list.first else { return false } // malformed?
184+
switch node {
185+
case .concatenation(let children):
186+
switch children.count {
187+
case 0:
188+
return false
189+
case 1:
190+
return requiresGrouping(list.dropFirst())
191+
default:
192+
return true
193+
}
194+
195+
case .quotedLiteral(let literal):
196+
return prepareQuotedLiteral(literal).count > 1
197+
198+
default:
199+
return false
200+
}
201+
}
202+
203+
mutating func outputQuantification(
204+
_ list: inout ArraySlice<DSLTree.Node>,
205+
amount: DSLTree._AST.QuantificationAmount,
206+
kind: DSLTree.QuantificationKind
207+
) throws {
208+
// RegexBuilder regexes can have children that need
209+
if requiresGrouping(list) {
210+
output("(?:")
211+
try outputList(&list)
212+
output(")")
213+
} else {
214+
try outputList(&list)
215+
}
216+
217+
switch amount.ast {
218+
case .zeroOrMore:
219+
output("*")
220+
case .oneOrMore:
221+
output("+")
222+
case .zeroOrOne:
223+
output("?")
224+
case let .exactly(n):
225+
output("{\(n.value!)}")
226+
case let .nOrMore(n):
227+
output("{\(n.value!),}")
228+
case let .upToN(n):
229+
output("{,\(n.value!)}")
230+
case let .range(low, high):
231+
output("{\(low.value!),\(high.value!)}")
232+
#if RESILIENT_LIBRARIES
233+
@unknown default:
234+
fatalError()
235+
#endif
236+
}
237+
238+
outputQuantificationKind(kind)
239+
}
86240
}
87241

88242
extension LiteralPrinter {
@@ -455,7 +609,15 @@ extension String {
455609
}
456610

457611
func escapingConfusableCharacters() -> String {
458-
lazy.map(\.escapingConfusable).joined()
612+
reduce(into: "") { result, ch in
613+
for scalar in ch.unicodeScalars {
614+
if scalar.isPrintableASCII {
615+
result.append(Character(scalar))
616+
} else {
617+
result.append(scalar.escapedString)
618+
}
619+
}
620+
}
459621
}
460622
}
461623

0 commit comments

Comments
 (0)