@@ -26,14 +26,139 @@ extension Compiler.ByteCodeGen {
2626 var list = root. nodes [ ... ]
2727 try emitNode ( & list)
2828
29- // FIXME: Restore this canOnlyMatchAtStart
30- // builder.canOnlyMatchAtStart = root.canOnlyMatchAtStart()
29+ builder. canOnlyMatchAtStart = canOnlyMatchAtStart ( in: root)
3130 builder. buildAccept ( )
3231 return try builder. assemble ( )
3332 }
3433}
3534
3635fileprivate extension Compiler . ByteCodeGen {
36+ /// Implementation for `canOnlyMatchAtStart`, which maintains the option
37+ /// state.
38+ ///
39+ /// For a given specific node, this method can return one of three values:
40+ ///
41+ /// - `true`: This node is guaranteed to match only at the start of a subject.
42+ /// - `false`: This node can match anywhere in the subject.
43+ /// - `nil`: This node is inconclusive about where it can match.
44+ ///
45+ /// In particular, non-required groups and option-setting groups are
46+ /// inconclusive about where they can match.
47+ private mutating func _canOnlyMatchAtStartImpl(
48+ _ list: inout ArraySlice < DSLTree . Node >
49+ ) -> Bool ? {
50+ guard let node = list. popFirst ( ) else { return false }
51+ switch node {
52+ // Defining cases
53+ case . atom( . assertion( . startOfSubject) ) :
54+ return true
55+ case . atom( . assertion( . caretAnchor) ) :
56+ return !options. anchorsMatchNewlines
57+
58+ // Changing options doesn't determine `true`/`false`.
59+ case . atom( . changeMatchingOptions( let sequence) ) :
60+ options. apply ( sequence. ast)
61+ return nil
62+
63+ // Any other atom or consuming node returns `false`.
64+ case . atom, . customCharacterClass, . quotedLiteral:
65+ return false
66+
67+ // Trivia/empty have no effect.
68+ case . trivia, . empty:
69+ return nil
70+
71+ // In an alternation, all of its children must match only at start.
72+ case . orderedChoice( let children) :
73+ for _ in 0 ..< children. count {
74+ guard _canOnlyMatchAtStartImpl ( & list) == true else {
75+ return false
76+ }
77+ }
78+ return true
79+
80+ case . concatenation( let children) :
81+ // In a concatenation, the first definitive child provides the answer.
82+ var i = 0
83+ var found = false
84+ while i < children. count {
85+ i += 1
86+ if let result = _canOnlyMatchAtStartImpl ( & list) {
87+ found = result
88+ break
89+ }
90+ }
91+ // Once a definitive answer has been found, skip the rest of the nodes
92+ // in the concatenation.
93+ while i < children. count {
94+ i += 1
95+ try ? skipNode ( & list, preservingCaptures: false )
96+ }
97+ return found
98+
99+ // Groups (and other parent nodes) defer to the child.
100+ case . nonCapturingGroup( let kind, _) :
101+ // Don't let a negative lookahead affect this - need to continue to next sibling
102+ if kind. isNegativeLookahead {
103+ try ? skipNode ( & list, preservingCaptures: false )
104+ return nil
105+ }
106+ options. beginScope ( )
107+ defer { options. endScope ( ) }
108+ if case . changeMatchingOptions( let sequence) = kind. ast {
109+ options. apply ( sequence)
110+ }
111+ return _canOnlyMatchAtStartImpl ( & list)
112+ case . capture:
113+ options. beginScope ( )
114+ defer { options. endScope ( ) }
115+ return _canOnlyMatchAtStartImpl ( & list)
116+ case . ignoreCapturesInTypedOutput, . limitCaptureNesting:
117+ return _canOnlyMatchAtStartImpl ( & list)
118+
119+ // A quantification that doesn't require its child to exist can still
120+ // allow a start-only match. (e.g. `/(foo)?^bar/`)
121+ case . quantification( let amount, _, _) :
122+ if amount. requiresAtLeastOne {
123+ return _canOnlyMatchAtStartImpl ( & list)
124+ } else {
125+ try ? skipNode ( & list, preservingCaptures: false )
126+ return nil
127+ }
128+
129+ // For conditional nodes, both sides must require matching at start.
130+ case . conditional:
131+ return _canOnlyMatchAtStartImpl ( & list) == true
132+ && _canOnlyMatchAtStartImpl ( & list) == true
133+
134+ // Extended behavior isn't known, so we return `false` for safety.
135+ case . consumer, . matcher, . characterPredicate, . absentFunction:
136+ return false
137+ }
138+ }
139+
140+ /// Returns a Boolean value indicating whether the regex with this node as
141+ /// the root can _only_ match at the start of a subject.
142+ ///
143+ /// For example, these regexes can only match at the start of a subject:
144+ ///
145+ /// - `/^foo/`
146+ /// - `/(^foo|^bar)/` (both sides of the alternation start with `^`)
147+ ///
148+ /// These can match other places in a subject:
149+ ///
150+ /// - `/(^foo)?bar/` (`^` is in an optional group)
151+ /// - `/(^foo|bar)/` (only one side of the alternation starts with `^`)
152+ /// - `/(?m)^foo/` (`^` means "the start of a line" due to `(?m)`)
153+ mutating func canOnlyMatchAtStart( in list: DSLList ) -> Bool {
154+ let currentOptions = options
155+ options = MatchingOptions ( )
156+ defer { options = currentOptions }
157+
158+ var list = list. nodes [ ... ]
159+ return _canOnlyMatchAtStartImpl ( & list) ?? false
160+ }
161+
37162 mutating func emitAlternationGen< T> (
38163 _ elements: inout ArraySlice < T > ,
39164 alternationCount: Int ,
@@ -667,41 +792,48 @@ fileprivate extension Compiler.ByteCodeGen {
667792// MARK: Skip node
668793
669794extension Compiler . ByteCodeGen {
670- mutating func skipNode( _ list: inout ArraySlice < DSLTree . Node > ) throws {
795+ mutating func skipNode(
796+ _ list: inout ArraySlice < DSLTree . Node > ,
797+ preservingCaptures: Bool = true
798+ ) throws {
671799 guard let node = list. popFirst ( ) else { return }
672800 switch node {
673801 case let . orderedChoice( children) :
674802 let n = children. count
675803 for _ in 0 ..< n {
676- try skipNode ( & list)
804+ try skipNode ( & list, preservingCaptures : preservingCaptures )
677805 }
678806
679807 case let . concatenation( children) :
680808 let n = children. count
681809 for _ in 0 ..< n {
682- try skipNode ( & list)
810+ try skipNode ( & list, preservingCaptures : preservingCaptures )
683811 }
684812
685813 case let . capture( name, refId, _, transform) :
686814 options. beginScope ( )
687815 defer { options. endScope ( ) }
688816
689- let cap = builder. makeCapture ( id: refId, name: name)
690- builder. buildBeginCapture ( cap)
691- try skipNode ( & list)
692- builder. buildEndCapture ( cap)
817+ if preservingCaptures {
818+ let cap = builder. makeCapture ( id: refId, name: name)
819+ builder. buildBeginCapture ( cap)
820+ try skipNode ( & list, preservingCaptures: preservingCaptures)
821+ builder. buildEndCapture ( cap)
822+ } else {
823+ try skipNode ( & list, preservingCaptures: preservingCaptures)
824+ }
693825
694826 case let . nonCapturingGroup( kind, _) :
695- try skipNode ( & list)
827+ try skipNode ( & list, preservingCaptures : preservingCaptures )
696828
697829 case . ignoreCapturesInTypedOutput:
698- try skipNode ( & list)
830+ try skipNode ( & list, preservingCaptures : preservingCaptures )
699831
700832 case . limitCaptureNesting:
701- try skipNode ( & list)
833+ try skipNode ( & list, preservingCaptures : preservingCaptures )
702834
703835 case let . quantification( amt, kind, _) :
704- try skipNode ( & list)
836+ try skipNode ( & list, preservingCaptures : preservingCaptures )
705837
706838 case . customCharacterClass, . atom, . quotedLiteral, . matcher:
707839 break
0 commit comments