33
44use super :: StringReader ;
55use errors:: { Applicability , DiagnosticBuilder } ;
6- use syntax_pos:: { BytePos , Pos , Span , NO_EXPANSION } ;
6+ use syntax_pos:: { BytePos , Pos , Span , NO_EXPANSION , symbol :: kw } ;
77use crate :: parse:: token;
88
99#[ rustfmt:: skip] // for line breaks
@@ -298,18 +298,20 @@ const UNICODE_ARRAY: &[(char, &str, char)] = &[
298298 ( '>' , "Fullwidth Greater-Than Sign" , '>' ) ,
299299] ;
300300
301+ // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of
302+ // keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`.
303+ // However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add
304+ // fancier error recovery to it, as there will be less overall work to do this way.
301305const ASCII_ARRAY : & [ ( char , & str , Option < token:: TokenKind > ) ] = & [
302306 ( ' ' , "Space" , Some ( token:: Whitespace ) ) ,
303- ( '_' , "Underscore" , None ) ,
307+ ( '_' , "Underscore" , Some ( token :: Ident ( kw :: Underscore , false ) ) ) ,
304308 ( '-' , "Minus/Hyphen" , Some ( token:: BinOp ( token:: Minus ) ) ) ,
305309 ( ',' , "Comma" , Some ( token:: Comma ) ) ,
306310 ( ';' , "Semicolon" , Some ( token:: Semi ) ) ,
307311 ( ':' , "Colon" , Some ( token:: Colon ) ) ,
308312 ( '!' , "Exclamation Mark" , Some ( token:: Not ) ) ,
309313 ( '?' , "Question Mark" , Some ( token:: Question ) ) ,
310314 ( '.' , "Period" , Some ( token:: Dot ) ) ,
311- ( '\'' , "Single Quote" , None ) , // Literals are already lexed by this point, so we can't recover
312- ( '"' , "Quotation Mark" , None ) , // gracefully just by spitting the correct token out.
313315 ( '(' , "Left Parenthesis" , Some ( token:: OpenDelim ( token:: Paren ) ) ) ,
314316 ( ')' , "Right Parenthesis" , Some ( token:: CloseDelim ( token:: Paren ) ) ) ,
315317 ( '[' , "Left Square Bracket" , Some ( token:: OpenDelim ( token:: Bracket ) ) ) ,
@@ -324,6 +326,10 @@ const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
324326 ( '<' , "Less-Than Sign" , Some ( token:: Lt ) ) ,
325327 ( '=' , "Equals Sign" , Some ( token:: Eq ) ) ,
326328 ( '>' , "Greater-Than Sign" , Some ( token:: Gt ) ) ,
329+ // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by
330+ // spitting the correct token out.
331+ ( '\'' , "Single Quote" , None ) ,
332+ ( '"' , "Quotation Mark" , None ) ,
327333] ;
328334
329335crate fn check_for_substitution < ' a > (
0 commit comments