Skip to content

Commit f838794

Browse files
committed
Implement direct AST -> DSLTree conversion
1 parent e539ac1 commit f838794

File tree

13 files changed

+550
-84
lines changed

13 files changed

+550
-84
lines changed

Sources/_StringProcessing/Compiler.swift

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,30 @@
1212
internal import _RegexParser
1313

1414
class Compiler {
15-
let tree: DSLTree
15+
let tree: DSLList
1616

1717
// TODO: Or are these stored on the tree?
1818
var options = MatchingOptions()
1919
private var compileOptions: _CompileOptions = .default
2020

2121
init(ast: AST) {
22-
self.tree = ast.dslTree
22+
self.tree = DSLList(tree: ast.dslTree)
2323
}
2424

2525
init(tree: DSLTree) {
26-
self.tree = tree
26+
self.tree = DSLList(tree: tree)
27+
}
28+
29+
init(list: DSLList) {
30+
self.tree = list
2731
}
2832

2933
init(tree: DSLTree, compileOptions: _CompileOptions) {
34+
self.tree = DSLList(tree: tree)
35+
self.compileOptions = compileOptions
36+
}
37+
38+
init(tree: DSLList, compileOptions: _CompileOptions) {
3039
self.tree = tree
3140
self.compileOptions = compileOptions
3241
}
@@ -42,18 +51,20 @@ class Compiler {
4251
compileOptions:
4352
compileOptions,
4453
captureList: tree.captureList)
45-
return try codegen.emitRoot(tree.root)
54+
fatalError()
55+
// return try codegen.emitRoot(tree.root)
4656
}
4757

4858
__consuming func emitViaList() throws -> MEProgram {
4959
// TODO: Handle global options
50-
var dslList = DSLList(tree: tree)
60+
// var dslList = DSLList(tree: tree)
5161
var codegen = ByteCodeGen(
5262
options: options,
5363
compileOptions:
5464
compileOptions,
5565
captureList: tree.captureList)
56-
return try codegen.emitRoot(&dslList)
66+
var tree = tree
67+
return try codegen.emitRoot(&tree)
5768
}
5869
}
5970

@@ -105,20 +116,22 @@ func _compileRegex(
105116
_ syntax: SyntaxOptions = .traditional,
106117
_ semanticLevel: RegexSemanticLevel? = nil
107118
) throws -> MEProgram {
108-
let ast = try parse(regex, syntax)
109-
let dsl: DSLTree
119+
var ast = try parse(regex, syntax)
120+
let dsl: DSLList
110121

111122
switch semanticLevel?.base {
112123
case .graphemeCluster:
113124
let sequence = AST.MatchingOptionSequence(adding: [.init(.graphemeClusterSemantics, location: .fake)])
114-
dsl = DSLTree(.nonCapturingGroup(.init(ast: .changeMatchingOptions(sequence)), ast.dslTree.root))
125+
ast.root = AST.Node.group(AST.Group(.init(faking: .changeMatchingOptions(sequence)), ast.root, .fake))
126+
dsl = DSLList(ast: ast)
115127
case .unicodeScalar:
116128
let sequence = AST.MatchingOptionSequence(adding: [.init(.unicodeScalarSemantics, location: .fake)])
117-
dsl = DSLTree(.nonCapturingGroup(.init(ast: .changeMatchingOptions(sequence)), ast.dslTree.root))
129+
ast.root = AST.Node.group(AST.Group(.init(faking: .changeMatchingOptions(sequence)), ast.root, .fake))
130+
dsl = DSLList(ast: ast)
118131
case .none:
119-
dsl = ast.dslTree
132+
dsl = DSLList(ast: ast)
120133
}
121-
let program = try Compiler(tree: dsl).emit()
134+
let program = try Compiler(list: dsl).emit()
122135
return program
123136
}
124137

Sources/_StringProcessing/LiteralPrinter.swift

Lines changed: 155 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ extension Regex {
3636
@available(SwiftStdlib 6.0, *)
3737
public var _literalPattern: String? {
3838
var gen = LiteralPrinter(options: MatchingOptions())
39-
gen.outputNode(self.program.tree.root)
39+
var list = self.program.tree.nodes[...]
40+
try? gen.outputList(&list)
4041
return gen.canonicalLiteralString
4142
}
4243
}
@@ -83,6 +84,159 @@ fileprivate struct LiteralPrinter {
8384
mutating func saveInconvertible(_ node: DSLTree.Node) {
8485
segments.append(.inconvertible(node))
8586
}
87+
88+
mutating func inconvertible(_ node: DSLTree.Node) throws {
89+
segments.append(.inconvertible(node))
90+
throw Incovertible.error
91+
}
92+
}
93+
94+
extension LiteralPrinter {
95+
enum Incovertible: Error {
96+
case error
97+
}
98+
99+
mutating func outputList(_ list: inout ArraySlice<DSLTree.Node>) throws {
100+
guard let node = list.popFirst() else {
101+
return
102+
}
103+
104+
switch node {
105+
case let .orderedChoice(children):
106+
try outputAlternation(&list, count: children.count)
107+
case let .concatenation(children):
108+
try outputConcatenation(&list, count: children.count)
109+
110+
case let .capture(name, nil, _, nil):
111+
options.beginScope()
112+
defer { options.endScope() }
113+
try outputCapture(&list, name: name)
114+
case .capture:
115+
// Captures that use a reference or a transform are unsupported
116+
try inconvertible(node)
117+
return
118+
119+
case let .nonCapturingGroup(kind, _):
120+
guard let kindPattern = kind._patternString else {
121+
try inconvertible(node)
122+
return
123+
}
124+
options.beginScope()
125+
defer { options.endScope() }
126+
127+
output(kindPattern)
128+
if case .changeMatchingOptions(let optionSequence) = kind.ast {
129+
options.apply(optionSequence)
130+
}
131+
try outputList(&list)
132+
output(")")
133+
134+
case .ignoreCapturesInTypedOutput(_),
135+
.limitCaptureNesting(_):
136+
try outputList(&list)
137+
case let .quantification(amount, kind, _):
138+
try outputQuantification(&list, amount: amount, kind: kind)
139+
case let .customCharacterClass(charClass):
140+
outputCustomCharacterClass(charClass)
141+
case let .atom(atom):
142+
outputAtom(atom)
143+
case let .quotedLiteral(literal):
144+
output(prepareQuotedLiteral(literal))
145+
146+
case .trivia(_):
147+
// TODO: Include trivia?
148+
return
149+
case .empty:
150+
return
151+
152+
case .conditional, .absentFunction, .consumer, .matcher, .characterPredicate:
153+
saveInconvertible(node)
154+
}
155+
}
156+
157+
mutating func outputAlternation(_ list: inout ArraySlice<DSLTree.Node>, count: Int) throws {
158+
for i in 0..<count {
159+
if i != 0 {
160+
output("|")
161+
}
162+
try outputList(&list)
163+
}
164+
}
165+
166+
mutating func outputConcatenation(_ list: inout ArraySlice<DSLTree.Node>, count: Int) throws {
167+
for _ in 0..<count {
168+
try outputList(&list)
169+
}
170+
}
171+
172+
mutating func outputCapture(_ list: inout ArraySlice<DSLTree.Node>, name: String?) throws {
173+
if let name {
174+
output("(?<\(name)>")
175+
} else {
176+
output("(")
177+
}
178+
try outputList(&list)
179+
output(")")
180+
}
181+
182+
func requiresGrouping(_ list: ArraySlice<DSLTree.Node>) -> Bool {
183+
guard let node = list.first else { return false } // malformed?
184+
switch node {
185+
case .concatenation(let children):
186+
switch children.count {
187+
case 0:
188+
return false
189+
case 1:
190+
return requiresGrouping(list.dropFirst())
191+
default:
192+
return true
193+
}
194+
195+
case .quotedLiteral(let literal):
196+
return prepareQuotedLiteral(literal).count > 1
197+
198+
default:
199+
return false
200+
}
201+
}
202+
203+
mutating func outputQuantification(
204+
_ list: inout ArraySlice<DSLTree.Node>,
205+
amount: DSLTree._AST.QuantificationAmount,
206+
kind: DSLTree.QuantificationKind
207+
) throws {
208+
// RegexBuilder regexes can have children that need
209+
if requiresGrouping(list) {
210+
output("(?:")
211+
try outputList(&list)
212+
output(")")
213+
} else {
214+
try outputList(&list)
215+
}
216+
217+
switch amount.ast {
218+
case .zeroOrMore:
219+
output("*")
220+
case .oneOrMore:
221+
output("+")
222+
case .zeroOrOne:
223+
output("?")
224+
case let .exactly(n):
225+
output("{\(n.value!)}")
226+
case let .nOrMore(n):
227+
output("{\(n.value!),}")
228+
case let .upToN(n):
229+
output("{,\(n.value!)}")
230+
case let .range(low, high):
231+
output("{\(low.value!),\(high.value!)}")
232+
#if RESILIENT_LIBRARIES
233+
@unknown default:
234+
fatalError()
235+
#endif
236+
}
237+
238+
outputQuantificationKind(kind)
239+
}
86240
}
87241

88242
extension LiteralPrinter {

Sources/_StringProcessing/Regex/ASTConversion.swift

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,120 @@ extension AST {
1717
}
1818
}
1919

20+
extension AST.Node {
21+
func convert(into list: inout [DSLTree.Node]) throws {
22+
switch self {
23+
case .alternation(let alternation):
24+
list.append(.orderedChoice(Array(repeating: TEMP_FAKE_NODE, count: alternation.children.count)))
25+
for child in alternation.children {
26+
try child.convert(into: &list)
27+
}
28+
case .concatenation(let concatenation):
29+
let coalesced = self.coalescedChildren
30+
list.append(.concatenation(Array(repeating: TEMP_FAKE_NODE, count: coalesced.count)))
31+
for child in coalesced {
32+
try child.convert(into: &list)
33+
}
34+
case .group(let group):
35+
let child = group.child
36+
switch group.kind.value {
37+
case .capture:
38+
list.append(.capture(TEMP_FAKE_NODE))
39+
try child.convert(into: &list)
40+
case .namedCapture(let name):
41+
list.append(.capture(name: name.value, TEMP_FAKE_NODE))
42+
try child.convert(into: &list)
43+
case .balancedCapture:
44+
throw Unsupported("TODO: balanced captures")
45+
default:
46+
list.append(.nonCapturingGroup(.init(ast: group.kind.value), TEMP_FAKE_NODE))
47+
try child.convert(into: &list)
48+
}
49+
case .conditional(let conditional):
50+
list.append(.conditional(.init(ast: conditional.condition.kind), TEMP_FAKE_NODE, TEMP_FAKE_NODE))
51+
try conditional.trueBranch.convert(into: &list)
52+
try conditional.falseBranch.convert(into: &list)
53+
case .quantification(let quant):
54+
list.append(
55+
.quantification(.init(ast: quant.amount.value), .syntax(.init(ast: quant.kind.value)), TEMP_FAKE_NODE))
56+
try quant.child.convert(into: &list)
57+
case .quote(let node):
58+
list.append(.quotedLiteral(node.literal))
59+
case .trivia(let node):
60+
list.append(.trivia(node.contents))
61+
case .interpolation(_):
62+
throw Unsupported("TODO: interpolation")
63+
case .atom(let atom):
64+
switch atom.kind {
65+
case .scalarSequence(let seq):
66+
// The DSL doesn't have an equivalent node for scalar sequences. Splat
67+
// them into a concatenation of scalars.
68+
// list.append(.concatenation(Array(repeating: TEMP_FAKE_NODE, count: seq.scalarValues.count)))
69+
list.append(.quotedLiteral(String(seq.scalarValues)))
70+
default:
71+
list.append(.atom(atom.dslTreeAtom))
72+
}
73+
case .customCharacterClass(let ccc):
74+
list.append(.customCharacterClass(ccc.dslTreeClass))
75+
case .absentFunction(let abs):
76+
// TODO: What should this map to?
77+
list.append(.absentFunction(.init(ast: abs)))
78+
case .empty(_):
79+
list.append(.empty)
80+
}
81+
}
82+
83+
var coalescedChildren: [AST.Node] {
84+
// Before converting a concatenation in a tree to list form, we need to
85+
// flatten out any nested concatenations, and coalesce any adjacent
86+
// characters and scalars, forming quoted literals of their contents,
87+
// over which we can perform grapheme breaking.
88+
89+
func flatten(_ node: AST.Node) -> [AST.Node] {
90+
switch node {
91+
case .concatenation(let concat):
92+
return concat.children.flatMap(flatten)
93+
default:
94+
return [node]
95+
}
96+
}
97+
98+
switch self {
99+
case .alternation(let v): return v.children
100+
case .concatenation(let v):
101+
let children = v.children
102+
.flatMap(flatten)
103+
.coalescing(with: "", into: { AST.Node.quote(.init($0, .fake)) }) { str, node in
104+
switch node {
105+
case .atom(let a):
106+
guard let c = a.literalCharacterValue else { return false }
107+
str.append(c)
108+
return true
109+
case .quote(let q):
110+
str += q.literal
111+
return true
112+
case .trivia:
113+
// Trivia can be completely ignored if we've already coalesced
114+
// something.
115+
return !str.isEmpty
116+
default:
117+
return false
118+
}
119+
}
120+
return children
121+
122+
case .group(let group):
123+
return [group.child]
124+
case .conditional(let conditional):
125+
return [conditional.trueBranch, conditional.falseBranch]
126+
case .quantification(let quant):
127+
return [quant.child]
128+
case .quote, .trivia, .interpolation, .atom, .customCharacterClass, .absentFunction, .empty:
129+
return []
130+
}
131+
}
132+
}
133+
20134
extension AST.Node {
21135
/// Converts an AST node to a `convertedRegexLiteral` node.
22136
var dslTreeNode: DSLTree.Node {

Sources/_StringProcessing/Regex/AnyRegexOutput.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ extension Regex where Output == AnyRegexOutput {
284284
/// - Parameter regex: A regular expression to convert to use a dynamic
285285
/// capture list.
286286
public init<OtherOutput>(_ regex: Regex<OtherOutput>) {
287-
self.init(node: regex.root)
287+
self.init(list: regex.list)
288288
}
289289
}
290290

@@ -331,7 +331,7 @@ extension Regex {
331331
_ regex: Regex<AnyRegexOutput>,
332332
as outputType: Output.Type = Output.self
333333
) {
334-
self.init(node: regex.root)
334+
self.init(list: regex.list)
335335

336336
guard _verifyType().0 else {
337337
return nil

0 commit comments

Comments
 (0)