@@ -9,114 +9,26 @@ extension Character {
99}
1010
1111extension Processor {
12- mutating func matchBuiltin (
12+ mutating func matchBuiltinCC (
1313 _ cc: _CharacterClassModel . Representation ,
14- _ isInverted: Bool ,
15- _ isStrictASCII: Bool ,
16- _ isScalarSemantics: Bool
14+ isInverted: Bool ,
15+ isStrictASCII: Bool ,
16+ isScalarSemantics: Bool
1717 ) -> Bool {
18- guard let next = _doMatchBuiltin (
18+ guard let next = input . _matchBuiltinCC (
1919 cc,
20- isInverted,
21- isStrictASCII,
22- isScalarSemantics
20+ at: currentPosition,
21+ isInverted: isInverted,
22+ isStrictASCII: isStrictASCII,
23+ isScalarSemantics: isScalarSemantics
2324 ) else {
2425 signalFailure ( )
2526 return false
2627 }
2728 currentPosition = next
2829 return true
2930 }
30-
31- func _doMatchBuiltin(
32- _ cc: _CharacterClassModel . Representation ,
33- _ isInverted: Bool ,
34- _ isStrictASCII: Bool ,
35- _ isScalarSemantics: Bool
36- ) -> Input . Index ? {
37- guard let char = load ( ) , let scalar = loadScalar ( ) else {
38- return nil
39- }
40-
41- let asciiCheck = !isStrictASCII
42- || ( scalar. isASCII && isScalarSemantics)
43- || char. isASCII
44-
45- var matched : Bool
46- var next : Input . Index
47- switch ( isScalarSemantics, cc) {
48- case ( _, . anyGrapheme) :
49- next = input. index ( after: currentPosition)
50- case ( _, . anyScalar) :
51- next = input. unicodeScalars. index ( after: currentPosition)
52- case ( true , _) :
53- next = input. unicodeScalars. index ( after: currentPosition)
54- case ( false , _) :
55- next = input. index ( after: currentPosition)
56- }
57-
58- switch cc {
59- case . any, . anyGrapheme:
60- matched = true
61- case . anyScalar:
62- if isScalarSemantics {
63- matched = true
64- } else {
65- matched = input. isOnGraphemeClusterBoundary ( next)
66- }
67- case . digit:
68- if isScalarSemantics {
69- matched = scalar. properties. numericType != nil && asciiCheck
70- } else {
71- matched = char. isNumber && asciiCheck
72- }
73- case . horizontalWhitespace:
74- if isScalarSemantics {
75- matched = scalar. isHorizontalWhitespace && asciiCheck
76- } else {
77- matched = char. _isHorizontalWhitespace && asciiCheck
78- }
79- case . verticalWhitespace:
80- if isScalarSemantics {
81- matched = scalar. isNewline && asciiCheck
82- } else {
83- matched = char. _isNewline && asciiCheck
84- }
85- case . newlineSequence:
86- if isScalarSemantics {
87- matched = scalar. isNewline && asciiCheck
88- if matched && scalar == " \r "
89- && next != input. endIndex && input. unicodeScalars [ next] == " \n " {
90- // Match a full CR-LF sequence even in scalar semantics
91- input. unicodeScalars. formIndex ( after: & next)
92- }
93- } else {
94- matched = char. _isNewline && asciiCheck
95- }
96- case . whitespace:
97- if isScalarSemantics {
98- matched = scalar. properties. isWhitespace && asciiCheck
99- } else {
100- matched = char. isWhitespace && asciiCheck
101- }
102- case . word:
103- if isScalarSemantics {
104- matched = scalar. properties. isAlphabetic && asciiCheck
105- } else {
106- matched = char. isWordCharacter && asciiCheck
107- }
108- }
109-
110- if isInverted {
111- matched. toggle ( )
112- }
11331
114- guard matched else {
115- return nil
116- }
117- return next
118- }
119-
12032 func isAtStartOfLine( _ payload: AssertionPayload ) -> Bool {
12133 if currentPosition == subjectBounds. lowerBound { return true }
12234 switch payload. semanticLevel {
@@ -126,7 +38,7 @@ extension Processor {
12638 return input. unicodeScalars [ input. unicodeScalars. index ( before: currentPosition) ] . isNewline
12739 }
12840 }
129-
41+
13042 func isAtEndOfLine( _ payload: AssertionPayload ) -> Bool {
13143 if currentPosition == subjectBounds. upperBound { return true }
13244 switch payload. semanticLevel {
@@ -169,7 +81,7 @@ extension Processor {
16981 return isAtStartOfLine ( payload)
17082 case . endOfLine:
17183 return isAtEndOfLine ( payload)
172-
84+
17385 case . caretAnchor:
17486 if payload. anchorsMatchNewlines {
17587 return isAtStartOfLine ( payload)
@@ -202,3 +114,152 @@ extension Processor {
202114 }
203115 }
204116}
117+
118+ // MARK: Built-in character class matching
119+
120+ extension String {
121+
122+ // Mentioned in ProgrammersManual.md, update docs if redesigned
123+ func _matchBuiltinCC(
124+ _ cc: _CharacterClassModel . Representation ,
125+ at currentPosition: String . Index ,
126+ isInverted: Bool ,
127+ isStrictASCII: Bool ,
128+ isScalarSemantics: Bool
129+ ) -> String . Index ? {
130+ guard currentPosition < endIndex else {
131+ return nil
132+ }
133+ if case . definite( let result) = _quickMatchBuiltinCC (
134+ cc,
135+ at: currentPosition,
136+ isInverted: isInverted,
137+ isStrictASCII: isStrictASCII,
138+ isScalarSemantics: isScalarSemantics
139+ ) {
140+ assert ( result == _thoroughMatchBuiltinCC (
141+ cc,
142+ at: currentPosition,
143+ isInverted: isInverted,
144+ isStrictASCII: isStrictASCII,
145+ isScalarSemantics: isScalarSemantics) )
146+ return result
147+ }
148+ return _thoroughMatchBuiltinCC (
149+ cc,
150+ at: currentPosition,
151+ isInverted: isInverted,
152+ isStrictASCII: isStrictASCII,
153+ isScalarSemantics: isScalarSemantics)
154+ }
155+
156+ // Mentioned in ProgrammersManual.md, update docs if redesigned
157+ @inline ( __always)
158+ func _quickMatchBuiltinCC(
159+ _ cc: _CharacterClassModel . Representation ,
160+ at currentPosition: String . Index ,
161+ isInverted: Bool ,
162+ isStrictASCII: Bool ,
163+ isScalarSemantics: Bool
164+ ) -> QuickResult < String . Index ? > {
165+ assert ( currentPosition < endIndex)
166+ guard let ( next, result) = _quickMatch (
167+ cc, at: currentPosition, isScalarSemantics: isScalarSemantics
168+ ) else {
169+ return . unknown
170+ }
171+ return . definite( result == isInverted ? nil : next)
172+ }
173+
174+ // Mentioned in ProgrammersManual.md, update docs if redesigned
175+ @inline ( never)
176+ func _thoroughMatchBuiltinCC(
177+ _ cc: _CharacterClassModel . Representation ,
178+ at currentPosition: String . Index ,
179+ isInverted: Bool ,
180+ isStrictASCII: Bool ,
181+ isScalarSemantics: Bool
182+ ) -> String . Index ? {
183+ assert ( currentPosition < endIndex)
184+ let char = self [ currentPosition]
185+ let scalar = unicodeScalars [ currentPosition]
186+
187+ let asciiCheck = !isStrictASCII
188+ || ( scalar. isASCII && isScalarSemantics)
189+ || char. isASCII
190+
191+ var matched : Bool
192+ var next : String . Index
193+ switch ( isScalarSemantics, cc) {
194+ case ( _, . anyGrapheme) :
195+ next = index ( after: currentPosition)
196+ case ( _, . anyScalar) :
197+ next = unicodeScalars. index ( after: currentPosition)
198+ case ( true , _) :
199+ next = unicodeScalars. index ( after: currentPosition)
200+ case ( false , _) :
201+ next = index ( after: currentPosition)
202+ }
203+
204+ switch cc {
205+ case . any, . anyGrapheme:
206+ matched = true
207+ case . anyScalar:
208+ if isScalarSemantics {
209+ matched = true
210+ } else {
211+ matched = isOnGraphemeClusterBoundary ( next)
212+ }
213+ case . digit:
214+ if isScalarSemantics {
215+ matched = scalar. properties. numericType != nil && asciiCheck
216+ } else {
217+ matched = char. isNumber && asciiCheck
218+ }
219+ case . horizontalWhitespace:
220+ if isScalarSemantics {
221+ matched = scalar. isHorizontalWhitespace && asciiCheck
222+ } else {
223+ matched = char. _isHorizontalWhitespace && asciiCheck
224+ }
225+ case . verticalWhitespace:
226+ if isScalarSemantics {
227+ matched = scalar. isNewline && asciiCheck
228+ } else {
229+ matched = char. _isNewline && asciiCheck
230+ }
231+ case . newlineSequence:
232+ if isScalarSemantics {
233+ matched = scalar. isNewline && asciiCheck
234+ if matched && scalar == " \r "
235+ && next != endIndex && unicodeScalars [ next] == " \n " {
236+ // Match a full CR-LF sequence even in scalar semantics
237+ unicodeScalars. formIndex ( after: & next)
238+ }
239+ } else {
240+ matched = char. _isNewline && asciiCheck
241+ }
242+ case . whitespace:
243+ if isScalarSemantics {
244+ matched = scalar. properties. isWhitespace && asciiCheck
245+ } else {
246+ matched = char. isWhitespace && asciiCheck
247+ }
248+ case . word:
249+ if isScalarSemantics {
250+ matched = scalar. properties. isAlphabetic && asciiCheck
251+ } else {
252+ matched = char. isWordCharacter && asciiCheck
253+ }
254+ }
255+
256+ if isInverted {
257+ matched. toggle ( )
258+ }
259+
260+ guard matched else {
261+ return nil
262+ }
263+ return next
264+ }
265+ }
0 commit comments