From d15cd2e6a2f9345e00accf573f2c62c850176d1d Mon Sep 17 00:00:00 2001 From: Marijn Schouten Date: Fri, 14 Feb 2025 13:42:56 +0100 Subject: [PATCH] Make skip_whitespace do a single pass --- compiler/rustc_lexer/src/unescape.rs | 45 ++++++++++++++++++---------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index d6ea4249247f3..6d8b64a06affc 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -356,8 +356,10 @@ where let start = src.len() - chars.as_str().len() - c.len_utf8(); let res = match c { '\\' => { - match chars.clone().next() { + let mut chars_clone = chars.clone(); + match chars_clone.next() { Some('\n') => { + chars = chars_clone; // Rust language specification requires us to skip whitespaces // if unescaped '\' character is followed by '\n'. // For details see [Rust language reference] @@ -379,30 +381,41 @@ where } } +/// Skip ASCII whitespace, except for the formfeed character +/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)). +/// Warns on unescaped newline and following non-ASCII whitespace. fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) where F: FnMut(Range, EscapeError), { - let tail = chars.as_str(); - let first_non_space = tail - .bytes() - .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') - .unwrap_or(tail.len()); - if tail[1..first_non_space].contains('\n') { - // The +1 accounts for the escaping slash. - let end = start + first_non_space + 1; + // the escaping slash and newline characters add 2 bytes + let mut end = start + 2; + let mut contains_nl = false; + + // manual next_if loop + let mut next_char; + loop { + let mut chars_clone = chars.clone(); + next_char = chars_clone.next(); + match next_char { + Some(c) if c.is_ascii_whitespace() && c != '\x0c' => { + *chars = chars_clone; + end += 1; + contains_nl |= c == '\n'; + } + _ => break, + } + } + + if contains_nl { callback(start..end, EscapeError::MultipleSkippedLinesWarning); } - let tail = &tail[first_non_space..]; - if let Some(c) = tail.chars().next() { + if let Some(c) = next_char { if c.is_whitespace() { - // For error reporting, we would like the span to contain the character that was not - // skipped. The +1 is necessary to account for the leading \ that started the escape. - let end = start + first_non_space + c.len_utf8() + 1; - callback(start..end, EscapeError::UnskippedWhitespaceWarning); + // for error reporting, include the character that was not skipped in the span + callback(start..end + c.len_utf8(), EscapeError::UnskippedWhitespaceWarning); } } - *chars = tail.chars(); } /// Takes a contents of a string literal (without quotes) and produces a