@@ -104,6 +104,12 @@ pub enum TokenKind {
104104 /// for emoji identifier recovery, as those are not meant to be ever accepted.
105105 InvalidPrefix ,
106106
107+ /// Guarded string literal prefix: `#"` or `##`.
108+ ///
109+ /// Used for reserving "guarded strings" (RFC 3598) in edition 2024.
110+ /// Split into the component tokens on older editions.
111+ GuardedStrPrefix ,
112+
107113 /// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
108114 /// suffix, but may be present here on string and float literals. Users of
109115 /// this type will need to check for and reject that case.
@@ -191,30 +197,41 @@ pub enum DocStyle {
191197/// `rustc_ast::ast::LitKind`).
192198#[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
193199pub enum LiteralKind {
194- /// " 12_u8", " 0o100", " 0b120i99", " 1f32" .
200+ /// ` 12_u8`, ` 0o100`, ` 0b120i99`, ` 1f32` .
195201 Int { base : Base , empty_int : bool } ,
196- /// " 12.34f32", " 1e3" , but not " 1f32" .
202+ /// ` 12.34f32`, ` 1e3` , but not ` 1f32` .
197203 Float { base : Base , empty_exponent : bool } ,
198- /// " 'a'", " '\\'", " '''", "';"
204+ /// ` 'a'`, ` '\\'`, ` '''`, `';`
199205 Char { terminated : bool } ,
200- /// " b'a'", " b'\\'", " b'''", " b';"
206+ /// ` b'a'`, ` b'\\'`, ` b'''`, ` b';`
201207 Byte { terminated : bool } ,
202- /// "" abc"", "" abc"
208+ /// `" abc"`, `" abc`
203209 Str { terminated : bool } ,
204- /// " b"abc"", " b"abc"
210+ /// ` b"abc"`, ` b"abc`
205211 ByteStr { terminated : bool } ,
206212 /// `c"abc"`, `c"abc`
207213 CStr { terminated : bool } ,
208- /// " r"abc"", " r#"abc"#", " r####"ab"###"c"####", " r#"a" . `None` indicates
214+ /// ` r"abc"`, ` r#"abc"#`, ` r####"ab"###"c"####`, ` r#"a` . `None` indicates
209215 /// an invalid literal.
210216 RawStr { n_hashes : Option < u8 > } ,
211- /// " br"abc"", " br#"abc"#", " br####"ab"###"c"####", " br#"a" . `None`
217+ /// ` br"abc"`, ` br#"abc"#`, ` br####"ab"###"c"####`, ` br#"a` . `None`
212218 /// indicates an invalid literal.
213219 RawByteStr { n_hashes : Option < u8 > } ,
214220 /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
215221 RawCStr { n_hashes : Option < u8 > } ,
216222}
217223
224+ /// `#"abc"#`, `##"a"` (fewer closing), or even `#"a` (unterminated).
225+ ///
226+ /// Can capture fewer closing hashes than starting hashes,
227+ /// for more efficient lexing and better backwards diagnostics.
228+ #[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
229+ pub struct GuardedStr {
230+ pub n_hashes : u32 ,
231+ pub terminated : bool ,
232+ pub token_len : u32 ,
233+ }
234+
218235#[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
219236pub enum RawStrError {
220237 /// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##`
@@ -403,6 +420,12 @@ impl Cursor<'_> {
403420 TokenKind :: Literal { kind : literal_kind, suffix_start }
404421 }
405422
423+ // Guarded string literal prefix: `#"` or `##`
424+ '#' if matches ! ( self . first( ) , '"' | '#' ) => {
425+ self . bump ( ) ;
426+ TokenKind :: GuardedStrPrefix
427+ }
428+
406429 // One-symbol tokens.
407430 ';' => Semi ,
408431 ',' => Comma ,
@@ -780,6 +803,60 @@ impl Cursor<'_> {
780803 false
781804 }
782805
806+ /// Attempt to lex for a guarded string literal.
807+ ///
808+ /// Used by `rustc_parse::lexer` to lex for guarded strings
809+ /// conditionally based on edition.
810+ ///
811+ /// Note: this will not reset the `Cursor` when a
812+ /// guarded string is not found. It is the caller's
813+ /// responsibility to do so.
814+ pub fn guarded_double_quoted_string ( & mut self ) -> Option < GuardedStr > {
815+ debug_assert ! ( self . prev( ) != '#' ) ;
816+
817+ let mut n_start_hashes: u32 = 0 ;
818+ while self . first ( ) == '#' {
819+ n_start_hashes += 1 ;
820+ self . bump ( ) ;
821+ }
822+
823+ if self . first ( ) != '"' {
824+ return None ;
825+ }
826+ self . bump ( ) ;
827+ debug_assert ! ( self . prev( ) == '"' ) ;
828+
829+ // Lex the string itself as a normal string literal
830+ // so we can recover that for older editions later.
831+ let terminated = self . double_quoted_string ( ) ;
832+ if !terminated {
833+ let token_len = self . pos_within_token ( ) ;
834+ self . reset_pos_within_token ( ) ;
835+
836+ return Some ( GuardedStr { n_hashes : n_start_hashes, terminated : false , token_len } ) ;
837+ }
838+
839+ // Consume closing '#' symbols.
840+ // Note that this will not consume extra trailing `#` characters:
841+ // `###"abcde"####` is lexed as a `GuardedStr { n_end_hashes: 3, .. }`
842+ // followed by a `#` token.
843+ let mut n_end_hashes = 0 ;
844+ while self . first ( ) == '#' && n_end_hashes < n_start_hashes {
845+ n_end_hashes += 1 ;
846+ self . bump ( ) ;
847+ }
848+
849+ // Reserved syntax, always an error, so it doesn't matter if
850+ // `n_start_hashes != n_end_hashes`.
851+
852+ self . eat_literal_suffix ( ) ;
853+
854+ let token_len = self . pos_within_token ( ) ;
855+ self . reset_pos_within_token ( ) ;
856+
857+ Some ( GuardedStr { n_hashes : n_start_hashes, terminated : true , token_len } )
858+ }
859+
783860 /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
784861 fn raw_double_quoted_string ( & mut self , prefix_len : u32 ) -> Result < u8 , RawStrError > {
785862 // Wrap the actual function to handle the error with too many hashes.
0 commit comments