@@ -15,8 +15,10 @@ extension Processor {
1515 isStrictASCII: Bool ,
1616 isScalarSemantics: Bool
1717 ) -> Bool {
18- guard let next = _doMatchBuiltinCC (
18+ guard let next = _matchBuiltinCC (
1919 cc,
20+ in: input,
21+ at: currentPosition,
2022 isInverted: isInverted,
2123 isStrictASCII: isStrictASCII,
2224 isScalarSemantics: isScalarSemantics
@@ -28,140 +30,6 @@ extension Processor {
2830 return true
2931 }
3032
31- // Mentioned in ProgrammersManual.md, update docs if redesigned
32- func _doMatchBuiltinCC(
33- _ cc: _CharacterClassModel . Representation ,
34- isInverted: Bool ,
35- isStrictASCII: Bool ,
36- isScalarSemantics: Bool
37- ) -> Input . Index ? {
38- if case . definite( let result) = _quickMatchBuiltinCC (
39- cc,
40- isInverted: isInverted,
41- isStrictASCII: isStrictASCII,
42- isScalarSemantics: isScalarSemantics
43- ) {
44- assert ( result == _thoroughMatchBuiltinCC (
45- cc,
46- isInverted: isInverted,
47- isStrictASCII: isStrictASCII,
48- isScalarSemantics: isScalarSemantics) )
49- return result
50- }
51- return _thoroughMatchBuiltinCC (
52- cc,
53- isInverted: isInverted,
54- isStrictASCII: isStrictASCII,
55- isScalarSemantics: isScalarSemantics)
56- }
57-
58- // Mentioned in ProgrammersManual.md, update docs if redesigned
59- @inline ( __always)
60- func _quickMatchBuiltinCC(
61- _ cc: _CharacterClassModel . Representation ,
62- isInverted: Bool ,
63- isStrictASCII: Bool ,
64- isScalarSemantics: Bool
65- ) -> QuickResult < Input . Index ? > {
66- guard let ( next, result) = input. _quickMatch (
67- cc, at: currentPosition, isScalarSemantics: isScalarSemantics
68- ) else {
69- return . unknown
70- }
71- return . definite( result == isInverted ? nil : next)
72- }
73-
74- // Mentioned in ProgrammersManual.md, update docs if redesigned
75- @inline ( never)
76- func _thoroughMatchBuiltinCC(
77- _ cc: _CharacterClassModel . Representation ,
78- isInverted: Bool ,
79- isStrictASCII: Bool ,
80- isScalarSemantics: Bool
81- ) -> Input . Index ? {
82- guard let char = load ( ) , let scalar = loadScalar ( ) else {
83- return nil
84- }
85-
86- let asciiCheck = !isStrictASCII
87- || ( scalar. isASCII && isScalarSemantics)
88- || char. isASCII
89-
90- var matched : Bool
91- var next : Input . Index
92- switch ( isScalarSemantics, cc) {
93- case ( _, . anyGrapheme) :
94- next = input. index ( after: currentPosition)
95- case ( _, . anyScalar) :
96- next = input. unicodeScalars. index ( after: currentPosition)
97- case ( true , _) :
98- next = input. unicodeScalars. index ( after: currentPosition)
99- case ( false , _) :
100- next = input. index ( after: currentPosition)
101- }
102-
103- switch cc {
104- case . any, . anyGrapheme:
105- matched = true
106- case . anyScalar:
107- if isScalarSemantics {
108- matched = true
109- } else {
110- matched = input. isOnGraphemeClusterBoundary ( next)
111- }
112- case . digit:
113- if isScalarSemantics {
114- matched = scalar. properties. numericType != nil && asciiCheck
115- } else {
116- matched = char. isNumber && asciiCheck
117- }
118- case . horizontalWhitespace:
119- if isScalarSemantics {
120- matched = scalar. isHorizontalWhitespace && asciiCheck
121- } else {
122- matched = char. _isHorizontalWhitespace && asciiCheck
123- }
124- case . verticalWhitespace:
125- if isScalarSemantics {
126- matched = scalar. isNewline && asciiCheck
127- } else {
128- matched = char. _isNewline && asciiCheck
129- }
130- case . newlineSequence:
131- if isScalarSemantics {
132- matched = scalar. isNewline && asciiCheck
133- if matched && scalar == " \r "
134- && next != input. endIndex && input. unicodeScalars [ next] == " \n " {
135- // Match a full CR-LF sequence even in scalar semantics
136- input. unicodeScalars. formIndex ( after: & next)
137- }
138- } else {
139- matched = char. _isNewline && asciiCheck
140- }
141- case . whitespace:
142- if isScalarSemantics {
143- matched = scalar. properties. isWhitespace && asciiCheck
144- } else {
145- matched = char. isWhitespace && asciiCheck
146- }
147- case . word:
148- if isScalarSemantics {
149- matched = scalar. properties. isAlphabetic && asciiCheck
150- } else {
151- matched = char. isWordCharacter && asciiCheck
152- }
153- }
154-
155- if isInverted {
156- matched. toggle ( )
157- }
158-
159- guard matched else {
160- return nil
161- }
162- return next
163- }
164-
16533 func isAtStartOfLine( _ payload: AssertionPayload ) -> Bool {
16634 if currentPosition == subjectBounds. lowerBound { return true }
16735 switch payload. semanticLevel {
@@ -171,7 +39,7 @@ extension Processor {
17139 return input. unicodeScalars [ input. unicodeScalars. index ( before: currentPosition) ] . isNewline
17240 }
17341 }
174-
42+
17543 func isAtEndOfLine( _ payload: AssertionPayload ) -> Bool {
17644 if currentPosition == subjectBounds. upperBound { return true }
17745 switch payload. semanticLevel {
@@ -214,7 +82,7 @@ extension Processor {
21482 return isAtStartOfLine ( payload)
21583 case . endOfLine:
21684 return isAtEndOfLine ( payload)
217-
85+
21886 case . caretAnchor:
21987 if payload. anchorsMatchNewlines {
22088 return isAtStartOfLine ( payload)
@@ -247,3 +115,159 @@ extension Processor {
247115 }
248116 }
249117}
118+
119+ // MARK: Built-in character class matching
120+
121+ // Mentioned in ProgrammersManual.md, update docs if redesigned
122+ @_effects ( releasenone)
123+ func _matchBuiltinCC(
124+ _ cc: _CharacterClassModel . Representation ,
125+ in input: String ,
126+ at currentPosition: String . Index ,
127+ isInverted: Bool ,
128+ isStrictASCII: Bool ,
129+ isScalarSemantics: Bool
130+ ) -> String . Index ? {
131+ guard currentPosition < input. endIndex else {
132+ return nil
133+ }
134+ if case . definite( let result) = _quickMatchBuiltinCC (
135+ cc,
136+ in: input,
137+ at: currentPosition,
138+ isInverted: isInverted,
139+ isStrictASCII: isStrictASCII,
140+ isScalarSemantics: isScalarSemantics
141+ ) {
142+ assert ( result == _thoroughMatchBuiltinCC (
143+ cc,
144+ in: input,
145+ at: currentPosition,
146+ isInverted: isInverted,
147+ isStrictASCII: isStrictASCII,
148+ isScalarSemantics: isScalarSemantics) )
149+ return result
150+ }
151+ return _thoroughMatchBuiltinCC (
152+ cc,
153+ in: input,
154+ at: currentPosition,
155+ isInverted: isInverted,
156+ isStrictASCII: isStrictASCII,
157+ isScalarSemantics: isScalarSemantics)
158+ }
159+
160+ // Mentioned in ProgrammersManual.md, update docs if redesigned
161+ @_effects ( releasenone)
162+ @inline ( __always)
163+ func _quickMatchBuiltinCC(
164+ _ cc: _CharacterClassModel . Representation ,
165+ in input: String ,
166+ at currentPosition: String . Index ,
167+ isInverted: Bool ,
168+ isStrictASCII: Bool ,
169+ isScalarSemantics: Bool
170+ ) -> QuickResult < String . Index ? > {
171+ assert ( currentPosition < input. endIndex)
172+ guard let ( next, result) = input. _quickMatch (
173+ cc, at: currentPosition, isScalarSemantics: isScalarSemantics
174+ ) else {
175+ return . unknown
176+ }
177+ return . definite( result == isInverted ? nil : next)
178+ }
179+
180+ // Mentioned in ProgrammersManual.md, update docs if redesigned
181+ @_effects ( releasenone)
182+ @inline ( never)
183+ func _thoroughMatchBuiltinCC(
184+ _ cc: _CharacterClassModel . Representation ,
185+ in input: String ,
186+ at currentPosition: String . Index ,
187+ isInverted: Bool ,
188+ isStrictASCII: Bool ,
189+ isScalarSemantics: Bool
190+ ) -> String . Index ? {
191+ assert ( currentPosition < input. endIndex)
192+ let char = input [ currentPosition]
193+ let scalar = input. unicodeScalars [ currentPosition]
194+
195+ let asciiCheck = !isStrictASCII
196+ || ( scalar. isASCII && isScalarSemantics)
197+ || char. isASCII
198+
199+ var matched : Bool
200+ var next : String . Index
201+ switch ( isScalarSemantics, cc) {
202+ case ( _, . anyGrapheme) :
203+ next = input. index ( after: currentPosition)
204+ case ( _, . anyScalar) :
205+ next = input. unicodeScalars. index ( after: currentPosition)
206+ case ( true , _) :
207+ next = input. unicodeScalars. index ( after: currentPosition)
208+ case ( false , _) :
209+ next = input. index ( after: currentPosition)
210+ }
211+
212+ switch cc {
213+ case . any, . anyGrapheme:
214+ matched = true
215+ case . anyScalar:
216+ if isScalarSemantics {
217+ matched = true
218+ } else {
219+ matched = input. isOnGraphemeClusterBoundary ( next)
220+ }
221+ case . digit:
222+ if isScalarSemantics {
223+ matched = scalar. properties. numericType != nil && asciiCheck
224+ } else {
225+ matched = char. isNumber && asciiCheck
226+ }
227+ case . horizontalWhitespace:
228+ if isScalarSemantics {
229+ matched = scalar. isHorizontalWhitespace && asciiCheck
230+ } else {
231+ matched = char. _isHorizontalWhitespace && asciiCheck
232+ }
233+ case . verticalWhitespace:
234+ if isScalarSemantics {
235+ matched = scalar. isNewline && asciiCheck
236+ } else {
237+ matched = char. _isNewline && asciiCheck
238+ }
239+ case . newlineSequence:
240+ if isScalarSemantics {
241+ matched = scalar. isNewline && asciiCheck
242+ if matched && scalar == " \r "
243+ && next != input. endIndex && input. unicodeScalars [ next] == " \n " {
244+ // Match a full CR-LF sequence even in scalar semantics
245+ input. unicodeScalars. formIndex ( after: & next)
246+ }
247+ } else {
248+ matched = char. _isNewline && asciiCheck
249+ }
250+ case . whitespace:
251+ if isScalarSemantics {
252+ matched = scalar. properties. isWhitespace && asciiCheck
253+ } else {
254+ matched = char. isWhitespace && asciiCheck
255+ }
256+ case . word:
257+ if isScalarSemantics {
258+ matched = scalar. properties. isAlphabetic && asciiCheck
259+ } else {
260+ matched = char. isWordCharacter && asciiCheck
261+ }
262+ }
263+
264+ if isInverted {
265+ matched. toggle ( )
266+ }
267+
268+ guard matched else {
269+ return nil
270+ }
271+ return next
272+ }
273+
0 commit comments