diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index d10b192034354..c29ab569f4795 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -331,24 +331,37 @@ pub fn is_whitespace(c: char) -> bool { matches!( c, - // Usual ASCII suspects - '\u{0009}' // \t - | '\u{000A}' // \n + // End-of-line characters + | '\u{000A}' // line feed (\n) | '\u{000B}' // vertical tab | '\u{000C}' // form feed - | '\u{000D}' // \r - | '\u{0020}' // space - - // NEXT LINE from latin1 - | '\u{0085}' + | '\u{000D}' // carriage return (\r) + | '\u{0085}' // next line (from latin1) + | '\u{2028}' // LINE SEPARATOR + | '\u{2029}' // PARAGRAPH SEPARATOR - // Bidi markers + // `Default_Ignorable_Code_Point` characters | '\u{200E}' // LEFT-TO-RIGHT MARK | '\u{200F}' // RIGHT-TO-LEFT MARK - // Dedicated whitespace characters from Unicode - | '\u{2028}' // LINE SEPARATOR - | '\u{2029}' // PARAGRAPH SEPARATOR + // Horizontal space characters + | '\u{0009}' // tab (\t) + | '\u{0020}' // space + ) +} + +/// True if `c` is considered horizontal whitespace according to Rust language definition. +pub fn is_horizontal_whitespace(c: char) -> bool { + // This is Pattern_White_Space. + // + // Note that this set is stable (ie, it doesn't change with different + // Unicode versions), so it's ok to just hard-code the values. + + matches!( + c, + // Horizontal space characters + '\u{0009}' // tab (\t) + | '\u{0020}' // space ) } @@ -538,7 +551,7 @@ impl Cursor<'_> { debug_assert!(length_opening >= 3); // whitespace between the opening and the infostring. - self.eat_while(|ch| ch != '\n' && is_whitespace(ch)); + self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch)); // copied from `eat_identifier`, but allows `-` and `.` in infostring to allow something like // `---Cargo.toml` as a valid opener @@ -547,7 +560,7 @@ impl Cursor<'_> { self.eat_while(|c| is_id_continue(c) || c == '-' || c == '.'); } - self.eat_while(|ch| ch != '\n' && is_whitespace(ch)); + self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch)); let invalid_infostring = self.first() != '\n'; let mut found = false; @@ -588,7 +601,7 @@ impl Cursor<'_> { // on a standalone line. Might be wrong. while let Some(closing) = rest.find("---") { let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1); - if rest[preceding_chars_start..closing].chars().all(is_whitespace) { + if rest[preceding_chars_start..closing].chars().all(is_horizontal_whitespace) { // candidate found potential_closing = Some(closing); break; diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 9792240a54850..e3bd6a9a3270f 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -6,7 +6,7 @@ use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_contr use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; use rustc_lexer::{ - Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace, + Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_horizontal_whitespace, }; use rustc_literal_escaper::{EscapeError, Mode, check_for_errors}; use rustc_session::lint::BuiltinLintDiag; @@ -597,7 +597,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { let last_line_start = within.rfind('\n').map_or(0, |i| i + 1); let last_line = &within[last_line_start..]; - let last_line_trimmed = last_line.trim_start_matches(is_whitespace); + let last_line_trimmed = last_line.trim_start_matches(is_horizontal_whitespace); let last_line_start_pos = frontmatter_opening_end_pos + BytePos(last_line_start as u32); let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos); @@ -640,7 +640,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { }); } - if !rest.trim_matches(is_whitespace).is_empty() { + if !rest.trim_matches(is_horizontal_whitespace).is_empty() { let span = self.mk_sp(last_line_start_pos, self.pos); self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span }); } diff --git a/tests/ui/.gitattributes b/tests/ui/.gitattributes index 9ea3d3fb0e1f1..4923ac100e172 100644 --- a/tests/ui/.gitattributes +++ b/tests/ui/.gitattributes @@ -3,4 +3,5 @@ json-bom-plus-crlf.rs -text json-bom-plus-crlf-multifile.rs -text json-bom-plus-crlf-multifile-aux.rs -text trailing-carriage-return-in-string.rs -text +frontmatter-crlf.rs -text *.bin -text diff --git a/tests/ui/frontmatter/frontmatter-contains-whitespace.rs b/tests/ui/frontmatter/frontmatter-contains-whitespace.rs new file mode 100644 index 0000000000000..11df822f793cf --- /dev/null +++ b/tests/ui/frontmatter/frontmatter-contains-whitespace.rs @@ -0,0 +1,22 @@ +#!/usr/bin/env -S cargo -Zscript +---cargo +# Beware editing: it has numerous whitespace characters which are important. +# It contains one ranges from the 'PATTERN_WHITE_SPACE' property outlined in +# https://unicode.org/Public/UNIDATA/PropList.txt +# +# The characters in the first expression of the assertion can be generated +# from: "4\u{0C}+\n\t\r7\t*\u{20}2\u{85}/\u{200E}3\u{200F}*\u{2028}2\u{2029}" +package.description = """ +4 + + +7 * 2…/‎3‏*
2 +""" +--- + +//@ check-pass + +// Ensure the frontmatter can contain any whitespace + +#![feature(frontmatter)] + +fn main() {} diff --git a/tests/ui/frontmatter/frontmatter-crlf.rs b/tests/ui/frontmatter/frontmatter-crlf.rs new file mode 100644 index 0000000000000..b46cc9ddbb0e0 --- /dev/null +++ b/tests/ui/frontmatter/frontmatter-crlf.rs @@ -0,0 +1,14 @@ +#!/usr/bin/env -S cargo -Zscript +--- +[dependencies] +clap = "4" +--- + +//@ check-pass +// ignore-tidy-cr + +// crlf line endings should be accepted + +#![feature(frontmatter)] + +fn main() {} diff --git a/tests/ui/frontmatter/frontmatter-whitespace-3.rs b/tests/ui/frontmatter/frontmatter-whitespace-3.rs index 95e0981e2ae8b..6580514fba2d2 100644 --- a/tests/ui/frontmatter/frontmatter-whitespace-3.rs +++ b/tests/ui/frontmatter/frontmatter-whitespace-3.rs @@ -1,7 +1,7 @@ ----cargo ---- +---cargo +--- // please note the whitespace characters after the first four lines. // This ensures that we accept whitespaces before the frontmatter, after @@ -10,6 +10,7 @@ //@ check-pass // ignore-tidy-end-whitespace // ignore-tidy-leading-newlines +// ignore-tidy-tab #![feature(frontmatter)] diff --git a/tests/ui/frontmatter/frontmatter-whitespace-4.rs b/tests/ui/frontmatter/frontmatter-whitespace-4.rs index 3bda3227838c7..1e7c5556647b0 100644 --- a/tests/ui/frontmatter/frontmatter-whitespace-4.rs +++ b/tests/ui/frontmatter/frontmatter-whitespace-4.rs @@ -1,7 +1,8 @@ ---- cargo +--- cargo --- //@ check-pass +// ignore-tidy-tab // A frontmatter infostring can have leading whitespace. #![feature(frontmatter)]