1212@_implementationOnly import _RegexParser
1313@_spi ( RegexBuilder) import _StringProcessing
1414
15+ /// A class of characters that match in a regex.
16+ ///
17+ /// A character class can represent individual characters, a group of
18+ /// characters, the set of character that match some set of criteria, or
19+ /// a set algebraic combination of all of the above.
1520@available ( SwiftStdlib 5 . 7 , * )
1621public struct CharacterClass {
1722 internal var ccc : DSLTree . CustomCharacterClass
@@ -42,6 +47,20 @@ extension CharacterClass: RegexComponent {
4247
4348@available ( SwiftStdlib 5 . 7 , * )
4449extension CharacterClass {
50+ /// A character class that matches any character that does not match this
51+ /// character class.
52+ ///
53+ /// For example, you can use the `inverted` property to create a character
54+ /// class that excludes a specific group of characters:
55+ ///
56+ /// let validCharacters = CharacterClass("a"..."z", .anyOf("-_"))
57+ /// let invalidCharacters = validCharacters.inverted
58+ ///
59+ /// let username = "user123"
60+ /// if username.contains(invalidCharacters) {
61+ /// print("Invalid username: '\(username)'")
62+ /// }
63+ /// // Prints "Invalid username: 'user123'"
4564 public var inverted : CharacterClass {
4665 if let inv = builtin? . inverted {
4766 return CharacterClass ( builtin: inv)
@@ -53,26 +72,50 @@ extension CharacterClass {
5372
5473@available ( SwiftStdlib 5 . 7 , * )
5574extension RegexComponent where Self == CharacterClass {
75+ /// A character class that matches any element.
76+ ///
77+ /// This character class is unaffected by the `dotMatchesNewlines()` method.
78+ /// To match any character that isn't a newline, see
79+ /// ``anyNonNewline``.
80+ ///
81+ /// This character class is equivalent to the regex syntax "dot"
82+ /// metacharacter in single-line mode: `(?s:.)`.
5683 public static var any : CharacterClass {
5784 . init( DSLTree . CustomCharacterClass ( members: [ . atom( . any) ] ) )
5885 }
5986
87+ /// A character class that matches any element that isn't a newline.
88+ ///
89+ /// This character class is unaffected by the `dotMatchesNewlines()` method.
90+ /// To match any character, including newlines, see ``any``.
91+ ///
92+ /// This character class is equivalent to the regex syntax "dot"
93+ /// metacharacter with single-line mode disabled: `(?-s:.)`.
6094 public static var anyNonNewline : CharacterClass {
6195 . init( DSLTree . CustomCharacterClass ( members: [ . atom( . anyNonNewline) ] ) )
6296 }
6397
98+ /// A character class that matches any single `Character`, or extended
99+ /// grapheme cluster, regardless of the current semantic level.
100+ ///
101+ /// This character class is equivalent to `\X` in regex syntax.
64102 public static var anyGraphemeCluster : CharacterClass {
65103 . init( builtin: . anyGrapheme)
66104 }
67105
68- public static var whitespace : CharacterClass {
69- . init( builtin: . whitespace)
70- }
71-
106+ /// A character class that matches any digit.
107+ ///
108+ /// This character class is equivalent to `\d` in regex syntax.
72109 public static var digit : CharacterClass {
73110 . init( builtin: . digit)
74111 }
75112
113+ /// A character class that matches any hexadecimal digit.
114+ ///
115+ /// `hexDigit` matches the ASCII characters `0` through `9`, and upper- or
116+ /// lowercase `a` through `f`. The corresponding characters in the "Halfwidth
117+ /// and Fullwidth Forms" Unicode block are not matched by this character
118+ /// class.
76119 public static var hexDigit : CharacterClass {
77120 . init( DSLTree . CustomCharacterClass ( members: [
78121 . range( . char( " A " ) , . char( " F " ) ) ,
@@ -81,27 +124,56 @@ extension RegexComponent where Self == CharacterClass {
81124 ] ) )
82125 }
83126
127+ /// A character class that matches any element that is a "word character".
128+ ///
129+ /// This character class is equivalent to `\w` in regex syntax.
130+ public static var word : CharacterClass {
131+ . init( builtin: . word)
132+ }
133+
134+ /// A character class that matches any element that is classified as
135+ /// whitespace.
136+ ///
137+ /// This character class is equivalent to `\s` in regex syntax.
138+ public static var whitespace : CharacterClass {
139+ . init( builtin: . whitespace)
140+ }
141+
142+ /// A character class that matches any element that is classified as
143+ /// horizontal whitespace.
144+ ///
145+ /// This character class is equivalent to `\h` in regex syntax.
84146 public static var horizontalWhitespace : CharacterClass {
85147 . init( builtin: . horizontalWhitespace)
86148 }
87149
150+ /// A character class that matches any newline sequence.
151+ ///
152+ /// This character class is equivalent to `\R` or `\n` in regex syntax.
88153 public static var newlineSequence : CharacterClass {
89154 . init( builtin: . newlineSequence)
90155 }
91156
157+ /// A character class that matches any element that is classified as
158+ /// vertical whitespace.
159+ ///
160+ /// This character class is equivalent to `\v` in regex syntax.
92161 public static var verticalWhitespace : CharacterClass {
93162 . init( builtin: . verticalWhitespace)
94163 }
95-
96- public static var word : CharacterClass {
97- . init( builtin: . word)
98- }
99164}
100165
101166@available ( SwiftStdlib 5 . 7 , * )
102167extension RegexComponent where Self == CharacterClass {
103168 /// Returns a character class that matches any character in the given string
104169 /// or sequence.
170+ ///
171+ /// Calling this method with a group of characters is equivalent to listing
172+ /// those characters in a custom character class in regex syntax. For example,
173+ /// the two regexes in this example are equivalent:
174+ ///
175+ /// let regex1 = /[abcd]+/
176+ /// let regex2 = OneOrMore(.anyOf("abcd"))
105177 public static func anyOf< S: Sequence > ( _ s: S ) -> CharacterClass
106178 where S. Element == Character
107179 {
@@ -111,6 +183,9 @@ extension RegexComponent where Self == CharacterClass {
111183
112184 /// Returns a character class that matches any Unicode scalar in the given
113185 /// sequence.
186+ ///
187+ /// Calling this method with a group of Unicode scalars is equivalent to
188+ /// listing them in a custom character class in regex syntax.
114189 public static func anyOf< S: Sequence > ( _ s: S ) -> CharacterClass
115190 where S. Element == UnicodeScalar
116191 {
@@ -122,6 +197,11 @@ extension RegexComponent where Self == CharacterClass {
122197// Unicode properties
123198@available ( SwiftStdlib 5 . 7 , * )
124199extension CharacterClass {
200+ /// Returns a character class that matches any element with the given Unicode
201+ /// general category.
202+ ///
203+ /// For example, when passed `.uppercaseLetter`, this method is equivalent to
204+ /// `/\p{Uppercase_Letter}/` or `/\p{Lu}/`.
125205 public static func generalCategory( _ category: Unicode . GeneralCategory ) -> CharacterClass {
126206 return CharacterClass ( . generalCategory( category) )
127207 }
@@ -148,6 +228,7 @@ public func ...(lhs: UnicodeScalar, rhs: UnicodeScalar) -> CharacterClass {
148228
149229@available ( SwiftStdlib 5 . 7 , * )
150230extension RegexComponent where Self == CharacterClass {
231+ /// Creates a character class that combines the given classes in a union.
151232 public init ( _ first: CharacterClass , _ rest: CharacterClass ... ) {
152233 if rest. isEmpty {
153234 self . init ( first. ccc)
@@ -161,24 +242,29 @@ extension RegexComponent where Self == CharacterClass {
161242
162243@available ( SwiftStdlib 5 . 7 , * )
163244extension CharacterClass {
245+ /// Returns a character class from the union of this class and the given class.
164246 public func union( _ other: CharacterClass ) -> CharacterClass {
165247 CharacterClass ( . init( members: [
166248 . custom( self . ccc) ,
167249 . custom( other. ccc) ] ) )
168250 }
169251
252+ /// Returns a character class from the intersection of this class and the given class.
170253 public func intersection( _ other: CharacterClass ) -> CharacterClass {
171254 CharacterClass ( . init( members: [
172255 . intersection( self . ccc, other. ccc)
173256 ] ) )
174257 }
175258
259+ /// Returns a character class by subtracting the given class from this class.
176260 public func subtracting( _ other: CharacterClass ) -> CharacterClass {
177261 CharacterClass ( . init( members: [
178262 . subtraction( self . ccc, other. ccc)
179263 ] ) )
180264 }
181265
266+ /// Returns a character class matching elements in one or the other, but not both,
267+ /// of this class and the given class.
182268 public func symmetricDifference( _ other: CharacterClass ) -> CharacterClass {
183269 CharacterClass ( . init( members: [
184270 . symmetricDifference( self . ccc, other. ccc)
0 commit comments