Skip to content

Commit 5fbbfbb

Browse files
Simplify raw string error reporting.
This makes `UnvalidatedRawStr` and `ValidatedRawStr` unnecessary and removes 70 lines.
1 parent b85e3fe commit 5fbbfbb

File tree

3 files changed

+85
-216
lines changed

3 files changed

+85
-216
lines changed

src/librustc_lexer/src/lib.rs

Lines changed: 45 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ mod tests;
2929
use self::LiteralKind::*;
3030
use self::TokenKind::*;
3131
use crate::cursor::{Cursor, EOF_CHAR};
32-
use std::convert::TryInto;
32+
use std::convert::TryFrom;
3333

3434
/// Parsed token.
3535
/// It doesn't contain information about data that has been parsed,
@@ -142,84 +142,24 @@ pub enum LiteralKind {
142142
/// "b"abc"", "b"abc"
143143
ByteStr { terminated: bool },
144144
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
145-
RawStr(UnvalidatedRawStr),
145+
RawStr { n_hashes: u16, err: Option<RawStrError> },
146146
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
147-
RawByteStr(UnvalidatedRawStr),
148-
}
149-
150-
/// Represents something that looks like a raw string, but may have some
151-
/// problems. Use `.validate()` to convert it into something
152-
/// usable.
153-
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
154-
pub struct UnvalidatedRawStr {
155-
/// The prefix (`r###"`) is valid
156-
valid_start: bool,
157-
158-
/// The postfix (`"###`) is valid
159-
valid_end: bool,
160-
161-
/// The number of leading `#`
162-
n_start_hashes: usize,
163-
/// The number of trailing `#`. `n_end_hashes` <= `n_start_hashes`
164-
n_end_hashes: usize,
165-
/// The offset starting at `r` or `br` where the user may have intended to end the string.
166-
/// Currently, it is the longest sequence of pattern `"#+"`.
167-
possible_terminator_offset: Option<usize>,
147+
RawByteStr { n_hashes: u16, err: Option<RawStrError> },
168148
}
169149

170150
/// Error produced validating a raw string. Represents cases like:
171-
/// - `r##~"abcde"##`: `LexRawStrError::InvalidStarter`
172-
/// - `r###"abcde"##`: `LexRawStrError::NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
173-
/// - Too many `#`s (>65536): `TooManyDelimiters`
151+
/// - `r##~"abcde"##`: `InvalidStarter`
152+
/// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
153+
/// - Too many `#`s (>65535): `TooManyDelimiters`
174154
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
175-
pub enum LexRawStrError {
155+
pub enum RawStrError {
176156
/// Non `#` characters exist between `r` and `"` eg. `r#~"..`
177-
InvalidStarter,
157+
InvalidStarter { bad_char: char },
178158
/// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
179159
/// may have intended to terminate it.
180160
NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option<usize> },
181-
/// More than 65536 `#`s exist.
182-
TooManyDelimiters,
183-
}
184-
185-
/// Raw String that contains a valid prefix (`#+"`) and postfix (`"#+`) where
186-
/// there are a matching number of `#` characters in both. Note that this will
187-
/// not consume extra trailing `#` characters: `r###"abcde"####` is lexed as a
188-
/// `ValidatedRawString { n_hashes: 3 }` followed by a `#` token.
189-
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
190-
pub struct ValidatedRawStr {
191-
n_hashes: u16,
192-
}
193-
194-
impl ValidatedRawStr {
195-
pub fn num_hashes(&self) -> u16 {
196-
self.n_hashes
197-
}
198-
}
199-
200-
impl UnvalidatedRawStr {
201-
pub fn validate(self) -> Result<ValidatedRawStr, LexRawStrError> {
202-
if !self.valid_start {
203-
return Err(LexRawStrError::InvalidStarter);
204-
}
205-
206-
// Only up to 65535 `#`s are allowed in raw strings
207-
let n_start_safe: u16 =
208-
self.n_start_hashes.try_into().map_err(|_| LexRawStrError::TooManyDelimiters)?;
209-
210-
if self.n_start_hashes > self.n_end_hashes || !self.valid_end {
211-
Err(LexRawStrError::NoTerminator {
212-
expected: self.n_start_hashes,
213-
found: self.n_end_hashes,
214-
possible_terminator_offset: self.possible_terminator_offset,
215-
})
216-
} else {
217-
// Since the lexer should never produce a literal with n_end > n_start, if n_start <= n_end,
218-
// they must be equal.
219-
debug_assert_eq!(self.n_start_hashes, self.n_end_hashes);
220-
Ok(ValidatedRawStr { n_hashes: n_start_safe })
221-
}
222-
}
161+
/// More than 65535 `#`s exist.
162+
TooManyDelimiters { found: usize },
223163
}
224164

225165
/// Base of numeric literal encoding according to its prefix.
@@ -354,12 +294,12 @@ impl Cursor<'_> {
354294
'r' => match (self.first(), self.second()) {
355295
('#', c1) if is_id_start(c1) => self.raw_ident(),
356296
('#', _) | ('"', _) => {
357-
let raw_str_i = self.raw_double_quoted_string(1);
297+
let (n_hashes, err) = self.raw_double_quoted_string(1);
358298
let suffix_start = self.len_consumed();
359-
if raw_str_i.n_end_hashes == raw_str_i.n_start_hashes {
299+
if err.is_none() {
360300
self.eat_literal_suffix();
361301
}
362-
let kind = RawStr(raw_str_i);
302+
let kind = RawStr { n_hashes, err };
363303
Literal { kind, suffix_start }
364304
}
365305
_ => self.ident(),
@@ -389,14 +329,12 @@ impl Cursor<'_> {
389329
}
390330
('r', '"') | ('r', '#') => {
391331
self.bump();
392-
let raw_str_i = self.raw_double_quoted_string(2);
332+
let (n_hashes, err) = self.raw_double_quoted_string(2);
393333
let suffix_start = self.len_consumed();
394-
let terminated = raw_str_i.n_start_hashes == raw_str_i.n_end_hashes;
395-
if terminated {
334+
if err.is_none() {
396335
self.eat_literal_suffix();
397336
}
398-
399-
let kind = RawByteStr(raw_str_i);
337+
let kind = RawByteStr { n_hashes, err };
400338
Literal { kind, suffix_start }
401339
}
402340
_ => self.ident(),
@@ -692,27 +630,34 @@ impl Cursor<'_> {
692630
false
693631
}
694632

695-
/// Eats the double-quoted string and returns an `UnvalidatedRawStr`.
696-
fn raw_double_quoted_string(&mut self, prefix_len: usize) -> UnvalidatedRawStr {
633+
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
634+
fn raw_double_quoted_string(&mut self, prefix_len: usize) -> (u16, Option<RawStrError>) {
635+
// Wrap the actual function to handle the error with too many hashes.
636+
// This way, it eats the whole raw string.
637+
let (n_hashes, err) = self.raw_string_unvalidated(prefix_len);
638+
// Only up to 65535 `#`s are allowed in raw strings
639+
match u16::try_from(n_hashes) {
640+
Ok(num) => (num, err),
641+
// We lie about the number of hashes here :P
642+
Err(_) => (0, Some(RawStrError::TooManyDelimiters { found: n_hashes })),
643+
}
644+
}
645+
646+
fn raw_string_unvalidated(&mut self, prefix_len: usize) -> (usize, Option<RawStrError>) {
697647
debug_assert!(self.prev() == 'r');
698-
let mut valid_start: bool = false;
699648
let start_pos = self.len_consumed();
700-
let (mut possible_terminator_offset, mut max_hashes) = (None, 0);
649+
let mut possible_terminator_offset = None;
650+
let mut max_hashes = 0;
701651

702652
// Count opening '#' symbols.
703653
let n_start_hashes = self.eat_while(|c| c == '#');
704654

705655
// Check that string is started.
706656
match self.bump() {
707-
Some('"') => valid_start = true,
708-
_ => {
709-
return UnvalidatedRawStr {
710-
valid_start,
711-
valid_end: false,
712-
n_start_hashes,
713-
n_end_hashes: 0,
714-
possible_terminator_offset,
715-
};
657+
Some('"') => (),
658+
c => {
659+
let c = c.unwrap_or(EOF_CHAR);
660+
return (n_start_hashes, Some(RawStrError::InvalidStarter { bad_char: c }));
716661
}
717662
}
718663

@@ -722,13 +667,14 @@ impl Cursor<'_> {
722667
self.eat_while(|c| c != '"');
723668

724669
if self.is_eof() {
725-
return UnvalidatedRawStr {
726-
valid_start,
727-
valid_end: false,
670+
return (
728671
n_start_hashes,
729-
n_end_hashes: max_hashes,
730-
possible_terminator_offset,
731-
};
672+
Some(RawStrError::NoTerminator {
673+
expected: n_start_hashes,
674+
found: max_hashes,
675+
possible_terminator_offset,
676+
}),
677+
);
732678
}
733679

734680
// Eat closing double quote.
@@ -737,7 +683,7 @@ impl Cursor<'_> {
737683
// Check that amount of closing '#' symbols
738684
// is equal to the amount of opening ones.
739685
// Note that this will not consume extra trailing `#` characters:
740-
// `r###"abcde"####` is lexed as a `LexedRawString { n_hashes: 3 }`
686+
// `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
741687
// followed by a `#` token.
742688
let mut hashes_left = n_start_hashes;
743689
let is_closing_hash = |c| {
@@ -751,13 +697,7 @@ impl Cursor<'_> {
751697
let n_end_hashes = self.eat_while(is_closing_hash);
752698

753699
if n_end_hashes == n_start_hashes {
754-
return UnvalidatedRawStr {
755-
valid_start,
756-
valid_end: true,
757-
n_start_hashes,
758-
n_end_hashes,
759-
possible_terminator_offset: None,
760-
};
700+
return (n_start_hashes, None);
761701
} else if n_end_hashes > max_hashes {
762702
// Keep track of possible terminators to give a hint about
763703
// where there might be a missing terminator

src/librustc_lexer/src/tests.rs

Lines changed: 16 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -2,92 +2,46 @@
22
mod tests {
33
use crate::*;
44

5-
fn check_raw_str(
6-
s: &str,
7-
expected: UnvalidatedRawStr,
8-
validated: Result<ValidatedRawStr, LexRawStrError>,
9-
) {
5+
fn check_raw_str(s: &str, expected_hashes: u16, expected_err: Option<RawStrError>) {
106
let s = &format!("r{}", s);
117
let mut cursor = Cursor::new(s);
128
cursor.bump();
13-
let tok = cursor.raw_double_quoted_string(0);
14-
assert_eq!(tok, expected);
15-
assert_eq!(tok.validate(), validated);
9+
let (n_hashes, err) = cursor.raw_double_quoted_string(0);
10+
assert_eq!(n_hashes, expected_hashes);
11+
assert_eq!(err, expected_err);
1612
}
1713

1814
#[test]
1915
fn test_naked_raw_str() {
20-
check_raw_str(
21-
r#""abc""#,
22-
UnvalidatedRawStr {
23-
n_start_hashes: 0,
24-
n_end_hashes: 0,
25-
valid_start: true,
26-
valid_end: true,
27-
possible_terminator_offset: None,
28-
},
29-
Ok(ValidatedRawStr { n_hashes: 0 }),
30-
);
16+
check_raw_str(r#""abc""#, 0, None);
3117
}
3218

3319
#[test]
3420
fn test_raw_no_start() {
35-
check_raw_str(
36-
r##""abc"#"##,
37-
UnvalidatedRawStr {
38-
n_start_hashes: 0,
39-
n_end_hashes: 0,
40-
valid_start: true,
41-
valid_end: true,
42-
possible_terminator_offset: None,
43-
},
44-
Ok(ValidatedRawStr { n_hashes: 0 }),
45-
);
21+
check_raw_str(r##""abc"#"##, 0, None);
4622
}
4723

4824
#[test]
4925
fn test_too_many_terminators() {
5026
// this error is handled in the parser later
51-
check_raw_str(
52-
r###"#"abc"##"###,
53-
UnvalidatedRawStr {
54-
n_start_hashes: 1,
55-
n_end_hashes: 1,
56-
valid_end: true,
57-
valid_start: true,
58-
possible_terminator_offset: None,
59-
},
60-
Ok(ValidatedRawStr { n_hashes: 1 }),
61-
);
27+
check_raw_str(r###"#"abc"##"###, 1, None);
6228
}
6329

6430
#[test]
6531
fn test_unterminated() {
6632
check_raw_str(
6733
r#"#"abc"#,
68-
UnvalidatedRawStr {
69-
n_start_hashes: 1,
70-
n_end_hashes: 0,
71-
valid_end: false,
72-
valid_start: true,
73-
possible_terminator_offset: None,
74-
},
75-
Err(LexRawStrError::NoTerminator {
34+
1,
35+
Some(RawStrError::NoTerminator {
7636
expected: 1,
7737
found: 0,
7838
possible_terminator_offset: None,
7939
}),
8040
);
8141
check_raw_str(
8242
r###"##"abc"#"###,
83-
UnvalidatedRawStr {
84-
n_start_hashes: 2,
85-
n_end_hashes: 1,
86-
valid_start: true,
87-
valid_end: false,
88-
possible_terminator_offset: Some(7),
89-
},
90-
Err(LexRawStrError::NoTerminator {
43+
2,
44+
Some(RawStrError::NoTerminator {
9145
expected: 2,
9246
found: 1,
9347
possible_terminator_offset: Some(7),
@@ -96,14 +50,8 @@ mod tests {
9650
// We're looking for "# not just any #
9751
check_raw_str(
9852
r###"##"abc#"###,
99-
UnvalidatedRawStr {
100-
n_start_hashes: 2,
101-
n_end_hashes: 0,
102-
valid_start: true,
103-
valid_end: false,
104-
possible_terminator_offset: None,
105-
},
106-
Err(LexRawStrError::NoTerminator {
53+
2,
54+
Some(RawStrError::NoTerminator {
10755
expected: 2,
10856
found: 0,
10957
possible_terminator_offset: None,
@@ -113,32 +61,16 @@ mod tests {
11361

11462
#[test]
11563
fn test_invalid_start() {
116-
check_raw_str(
117-
r##"#~"abc"#"##,
118-
UnvalidatedRawStr {
119-
n_start_hashes: 1,
120-
n_end_hashes: 0,
121-
valid_start: false,
122-
valid_end: false,
123-
possible_terminator_offset: None,
124-
},
125-
Err(LexRawStrError::InvalidStarter),
126-
);
64+
check_raw_str(r##"#~"abc"#"##, 1, Some(RawStrError::InvalidStarter { bad_char: '~' }));
12765
}
12866

12967
#[test]
13068
fn test_unterminated_no_pound() {
13169
// https://github.com/rust-lang/rust/issues/70677
13270
check_raw_str(
13371
r#"""#,
134-
UnvalidatedRawStr {
135-
n_start_hashes: 0,
136-
n_end_hashes: 0,
137-
valid_start: true,
138-
valid_end: false,
139-
possible_terminator_offset: None,
140-
},
141-
Err(LexRawStrError::NoTerminator {
72+
0,
73+
Some(RawStrError::NoTerminator {
14274
expected: 0,
14375
found: 0,
14476
possible_terminator_offset: None,

0 commit comments

Comments
 (0)