Skip to content

Commit 467e885

Browse files
committed
Hande canOnlyMatchAtStart for DSLList
1 parent 4bd8a5d commit 467e885

File tree

2 files changed

+145
-48
lines changed

2 files changed

+145
-48
lines changed

Sources/_StringProcessing/ByteCodeGen+DSLList.swift

Lines changed: 145 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,139 @@ extension Compiler.ByteCodeGen {
2626
var list = root.nodes[...]
2727
try emitNode(&list)
2828

29-
// FIXME: Restore this canOnlyMatchAtStart
30-
// builder.canOnlyMatchAtStart = root.canOnlyMatchAtStart()
29+
builder.canOnlyMatchAtStart = canOnlyMatchAtStart(in: root)
3130
builder.buildAccept()
3231
return try builder.assemble()
3332
}
3433
}
3534

3635
fileprivate extension Compiler.ByteCodeGen {
36+
/// Implementation for `canOnlyMatchAtStart`, which maintains the option
37+
/// state.
38+
///
39+
/// For a given specific node, this method can return one of three values:
40+
///
41+
/// - `true`: This node is guaranteed to match only at the start of a subject.
42+
/// - `false`: This node can match anywhere in the subject.
43+
/// - `nil`: This node is inconclusive about where it can match.
44+
///
45+
/// In particular, non-required groups and option-setting groups are
46+
/// inconclusive about where they can match.
47+
private mutating func _canOnlyMatchAtStartImpl(
48+
_ list: inout ArraySlice<DSLTree.Node>
49+
) -> Bool? {
50+
guard let node = list.popFirst() else { return false }
51+
switch node {
52+
// Defining cases
53+
case .atom(.assertion(.startOfSubject)):
54+
return true
55+
case .atom(.assertion(.caretAnchor)):
56+
return !options.anchorsMatchNewlines
57+
58+
// Changing options doesn't determine `true`/`false`.
59+
case .atom(.changeMatchingOptions(let sequence)):
60+
options.apply(sequence.ast)
61+
return nil
62+
63+
// Any other atom or consuming node returns `false`.
64+
case .atom, .customCharacterClass, .quotedLiteral:
65+
return false
66+
67+
// Trivia/empty have no effect.
68+
case .trivia, .empty:
69+
return nil
70+
71+
// In an alternation, all of its children must match only at start.
72+
case .orderedChoice(let children):
73+
for _ in 0..<children.count {
74+
guard _canOnlyMatchAtStartImpl(&list) == true else {
75+
return false
76+
}
77+
}
78+
return true
79+
80+
case .concatenation(let children):
81+
// In a concatenation, the first definitive child provides the answer.
82+
var i = 0
83+
var found = false
84+
while i < children.count {
85+
i += 1
86+
if let result = _canOnlyMatchAtStartImpl(&list) {
87+
found = result
88+
break
89+
}
90+
}
91+
// Once a definitive answer has been found, skip the rest of the nodes
92+
// in the concatenation.
93+
while i < children.count {
94+
i += 1
95+
try? skipNode(&list, preservingCaptures: false)
96+
}
97+
return found
98+
99+
// Groups (and other parent nodes) defer to the child.
100+
case .nonCapturingGroup(let kind, _):
101+
// Don't let a negative lookahead affect this - need to continue to next sibling
102+
if kind.isNegativeLookahead {
103+
try? skipNode(&list, preservingCaptures: false)
104+
return nil
105+
}
106+
options.beginScope()
107+
defer { options.endScope() }
108+
if case .changeMatchingOptions(let sequence) = kind.ast {
109+
options.apply(sequence)
110+
}
111+
return _canOnlyMatchAtStartImpl(&list)
112+
case .capture:
113+
options.beginScope()
114+
defer { options.endScope() }
115+
return _canOnlyMatchAtStartImpl(&list)
116+
case .ignoreCapturesInTypedOutput, .limitCaptureNesting:
117+
return _canOnlyMatchAtStartImpl(&list)
118+
119+
// A quantification that doesn't require its child to exist can still
120+
// allow a start-only match. (e.g. `/(foo)?^bar/`)
121+
case .quantification(let amount, _, _):
122+
if amount.requiresAtLeastOne {
123+
return _canOnlyMatchAtStartImpl(&list)
124+
} else {
125+
try? skipNode(&list, preservingCaptures: false)
126+
return nil
127+
}
128+
129+
// For conditional nodes, both sides must require matching at start.
130+
case .conditional:
131+
return _canOnlyMatchAtStartImpl(&list) == true
132+
&& _canOnlyMatchAtStartImpl(&list) == true
133+
134+
// Extended behavior isn't known, so we return `false` for safety.
135+
case .consumer, .matcher, .characterPredicate, .absentFunction:
136+
return false
137+
}
138+
}
139+
140+
/// Returns a Boolean value indicating whether the regex with this node as
141+
/// the root can _only_ match at the start of a subject.
142+
///
143+
/// For example, these regexes can only match at the start of a subject:
144+
///
145+
/// - `/^foo/`
146+
/// - `/(^foo|^bar)/` (both sides of the alternation start with `^`)
147+
///
148+
/// These can match other places in a subject:
149+
///
150+
/// - `/(^foo)?bar/` (`^` is in an optional group)
151+
/// - `/(^foo|bar)/` (only one side of the alternation starts with `^`)
152+
/// - `/(?m)^foo/` (`^` means "the start of a line" due to `(?m)`)
153+
mutating func canOnlyMatchAtStart(in list: DSLList) -> Bool {
154+
let currentOptions = options
155+
options = MatchingOptions()
156+
defer { options = currentOptions }
157+
158+
var list = list.nodes[...]
159+
return _canOnlyMatchAtStartImpl(&list) ?? false
160+
}
161+
37162
mutating func emitAlternationGen<T>(
38163
_ elements: inout ArraySlice<T>,
39164
alternationCount: Int,
@@ -667,41 +792,48 @@ fileprivate extension Compiler.ByteCodeGen {
667792
// MARK: Skip node
668793

669794
extension Compiler.ByteCodeGen {
670-
mutating func skipNode(_ list: inout ArraySlice<DSLTree.Node>) throws {
795+
mutating func skipNode(
796+
_ list: inout ArraySlice<DSLTree.Node>,
797+
preservingCaptures: Bool = true
798+
) throws {
671799
guard let node = list.popFirst() else { return }
672800
switch node {
673801
case let .orderedChoice(children):
674802
let n = children.count
675803
for _ in 0..<n {
676-
try skipNode(&list)
804+
try skipNode(&list, preservingCaptures: preservingCaptures)
677805
}
678806

679807
case let .concatenation(children):
680808
let n = children.count
681809
for _ in 0..<n {
682-
try skipNode(&list)
810+
try skipNode(&list, preservingCaptures: preservingCaptures)
683811
}
684812

685813
case let .capture(name, refId, _, transform):
686814
options.beginScope()
687815
defer { options.endScope() }
688816

689-
let cap = builder.makeCapture(id: refId, name: name)
690-
builder.buildBeginCapture(cap)
691-
try skipNode(&list)
692-
builder.buildEndCapture(cap)
817+
if preservingCaptures {
818+
let cap = builder.makeCapture(id: refId, name: name)
819+
builder.buildBeginCapture(cap)
820+
try skipNode(&list, preservingCaptures: preservingCaptures)
821+
builder.buildEndCapture(cap)
822+
} else {
823+
try skipNode(&list, preservingCaptures: preservingCaptures)
824+
}
693825

694826
case let .nonCapturingGroup(kind, _):
695-
try skipNode(&list)
827+
try skipNode(&list, preservingCaptures: preservingCaptures)
696828

697829
case .ignoreCapturesInTypedOutput:
698-
try skipNode(&list)
830+
try skipNode(&list, preservingCaptures: preservingCaptures)
699831

700832
case .limitCaptureNesting:
701-
try skipNode(&list)
833+
try skipNode(&list, preservingCaptures: preservingCaptures)
702834

703835
case let .quantification(amt, kind, _):
704-
try skipNode(&list)
836+
try skipNode(&list, preservingCaptures: preservingCaptures)
705837

706838
case .customCharacterClass, .atom, .quotedLiteral, .matcher:
707839
break

Sources/_StringProcessing/Regex/DSLList.swift

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -25,41 +25,6 @@ struct DSLList {
2525
}
2626
}
2727

28-
extension DSLList {
29-
struct Children: Sequence {
30-
var nodes: [DSLTree.Node]
31-
var firstChildIndex: Int
32-
33-
struct Iterator: IteratorProtocol {
34-
var nodes: [DSLTree.Node]
35-
var currentIndex: Int
36-
var remainingCount: Int
37-
38-
mutating func next() -> DSLTree.Node? {
39-
guard remainingCount > 0 else { return nil }
40-
guard currentIndex < nodes.count else {
41-
// FIXME: assert?
42-
print("ERROR: index out of bounds")
43-
return nil
44-
}
45-
remainingCount -= 1
46-
var nextIndex = currentIndex
47-
var inc = nodes[currentIndex].directChildren + 1
48-
while inc > 0 {
49-
nextIndex += 1
50-
inc += nodes[nextIndex].directChildren - 1
51-
}
52-
53-
return nodes[currentIndex]
54-
}
55-
}
56-
57-
func makeIterator() -> Iterator {
58-
Iterator(nodes: nodes, currentIndex: firstChildIndex, remainingCount: nodes[firstChildIndex].directChildren)
59-
}
60-
}
61-
}
62-
6328
extension DSLTree.Node {
6429
var directChildren: Int {
6530
switch self {

0 commit comments

Comments
 (0)