LukasKalbertodt · LukasKalbertodt · Mar 5, 2023 · Mar 3, 2023 · Mar 3, 2023 · Mar 3, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -40,3 +40,10 @@ jobs:
       run: |
         cargo test --release --no-default-features --lib -- --include-ignored
         cargo test --doc --no-default-features
+
+    - name: Build with check_suffix
+      run: cargo build --features=check_suffix
+    - name: Run tests with check_suffix
+      run: |
+        cargo test --release --features=check_suffix --lib -- --include-ignored
+        cargo test --doc --features=check_suffix
diff --git a/Cargo.toml b/Cargo.toml
@@ -26,6 +26,8 @@ exclude = [".github"]
 
 [features]
 default = ["proc-macro2"]
+check_suffix = ["unicode-xid"]
 
 [dependencies]
 proc-macro2 = { version = "1", optional = true }
+unicode-xid = { version = "0.2.4", optional = true }
diff --git a/examples/procmacro/examples/main.rs b/examples/procmacro/examples/main.rs
@@ -1,4 +1,4 @@
-use procmacro_example::{concat, repeat};
+use procmacro_example::{concat, dbg_and_swallow, repeat};
 
 const FOO: &str = concat!(r#"Hello "# '🦊' "\nHere is a friend: \u{1F427}");
 // const FOO: &str = concat!(::);
@@ -8,6 +8,7 @@ const BAR: &str = repeat!(3 * "నా పిల్లి లావుగా ఉ
 const BAZ: &str = repeat!(0b101 * "🦀");
 // const BAZ: &str = repeat!(3.5 * "🦀");
 
+dbg_and_swallow!(16px);
 
 fn main() {
     println!("{}", FOO);

diff --git a/examples/procmacro/src/lib.rs b/examples/procmacro/src/lib.rs
@@ -3,6 +3,14 @@ use proc_macro::{Spacing, TokenStream, TokenTree};
 use litrs::{Literal, IntegerLit, StringLit};
 
 
+#[proc_macro]
+pub fn dbg_and_swallow(input: TokenStream) -> TokenStream {
+    for token in input {
+        println!("{} -> {:#?}", token, Literal::try_from(&token));
+    }
+    TokenStream::new()
+}
+
 /// Concatinates all input string and char literals into a single output string
 /// literal.
 #[proc_macro]

diff --git a/src/byte/mod.rs b/src/byte/mod.rs
@@ -4,6 +4,7 @@ use crate::{
     Buffer, ParseError,
     err::{perr, ParseErrorKind::*},
     escape::unescape,
+    parse::check_suffix,
 };
 
 
@@ -15,6 +16,8 @@ use crate::{
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct ByteLit<B: Buffer> {
     raw: B,
+    /// Start index of the suffix or `raw.len()` if there is no suffix.
+    start_suffix: usize,
     value: u8,
 }
 
@@ -29,15 +32,20 @@ impl<B: Buffer> ByteLit<B> {
             return Err(perr(None, InvalidByteLiteralStart));
         }
 
-        let value = parse_impl(&input)?;
-        Ok(Self { raw: input, value })
+        let (value, start_suffix) = parse_impl(&input)?;
+        Ok(Self { raw: input, value, start_suffix })
     }
 
     /// Returns the byte value that this literal represents.
     pub fn value(&self) -> u8 {
         self.value
     }
 
+    /// The optional suffix. Returns `""` if the suffix is empty/does not exist.
+    pub fn suffix(&self) -> &str {
+        &(*self.raw)[self.start_suffix..]
+    }
+
     /// Returns the raw input that was passed to `parse`.
     pub fn raw_input(&self) -> &str {
         &self.raw
@@ -56,6 +64,7 @@ impl ByteLit<&str> {
     pub fn to_owned(&self) -> ByteLit<String> {
         ByteLit {
             raw: self.raw.to_owned(),
+            start_suffix: self.start_suffix,
             value: self.value,
         }
     }
@@ -69,32 +78,29 @@ impl<B: Buffer> fmt::Display for ByteLit<B> {
 
 /// Precondition: must start with `b'`.
 #[inline(never)]
-pub(crate) fn parse_impl(input: &str) -> Result<u8, ParseError> {
-    if input.len() == 2 {
-        return Err(perr(None, UnterminatedByteLiteral));
-    }
-    if *input.as_bytes().last().unwrap() != b'\'' {
-        return Err(perr(None, UnterminatedByteLiteral));
-    }
-
-    let inner = &input[2..input.len() - 1];
-    let first = inner.as_bytes().get(0).ok_or(perr(None, EmptyByteLiteral))?;
+pub(crate) fn parse_impl(input: &str) -> Result<(u8, usize), ParseError> {
+    let input_bytes = input.as_bytes();
+    let first = input_bytes.get(2).ok_or(perr(None, UnterminatedByteLiteral))?;
     let (c, len) = match first {
-        b'\'' => return Err(perr(2, UnescapedSingleQuote)),
-        b'\n' | b'\t' | b'\r'
-            => return Err(perr(2, UnescapedSpecialWhitespace)),
-
-        b'\\' => unescape::<u8>(inner, 2)?,
+        b'\'' if input_bytes.get(3) == Some(&b'\'') => return Err(perr(2, UnescapedSingleQuote)),
+        b'\'' => return Err(perr(None, EmptyByteLiteral)),
+        b'\n' | b'\t' | b'\r' => return Err(perr(2, UnescapedSpecialWhitespace)),
+        b'\\' => unescape::<u8>(&input[2..], 2)?,
         other if other.is_ascii() => (*other, 1),
         _ => return Err(perr(2, NonAsciiInByteLiteral)),
     };
-    let rest = &inner[len..];
 
-    if !rest.is_empty() {
-        return Err(perr(len + 2..input.len() - 1, OverlongByteLiteral));
+    match input[2 + len..].find('\'') {
+        Some(0) => {}
+        Some(_) => return Err(perr(None, OverlongByteLiteral)),
+        None => return Err(perr(None, UnterminatedByteLiteral)),
     }
 
-    Ok(c)
+    let start_suffix = 2 + len + 1;
+    let suffix = &input[start_suffix..];
+    check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?;
+
+    Ok((c, start_suffix))
 }
 
 #[cfg(test)]

diff --git a/src/byte/tests.rs b/src/byte/tests.rs
@@ -3,16 +3,20 @@ use crate::{ByteLit, Literal, test_util::{assert_parse_ok_eq, assert_roundtrip}}
 // ===== Utility functions =======================================================================
 
 macro_rules! check {
-    ($lit:literal) => {
-        let input = stringify!($lit);
+    ($lit:literal) => { check!($lit, stringify!($lit), "") };
+    ($lit:literal, $input:expr, $suffix:literal) => {
+        let input = $input;
         let expected = ByteLit {
             raw: input,
+            start_suffix: input.len() - $suffix.len(),
             value: $lit,
         };
 
         assert_parse_ok_eq(input, ByteLit::parse(input), expected.clone(), "ByteLit::parse");
         assert_parse_ok_eq(input, Literal::parse(input), Literal::Byte(expected), "Literal::parse");
-        assert_eq!(ByteLit::parse(input).unwrap().value(), $lit);
+        let lit = ByteLit::parse(input).unwrap();
+        assert_eq!(lit.value(), $lit);
+        assert_eq!(lit.suffix(), $suffix);
         assert_roundtrip(expected.to_owned(), input);
     };
 }
@@ -113,13 +117,23 @@ fn byte_escapes() {
     check!(b'\xFF');
 }
 
+#[test]
+fn suffixes() {
+    check!(b'a', r##"b'a'peter"##, "peter");
+    check!(b'#', r##"b'#'peter"##, "peter");
+    check!(b'\n', r##"b'\n'peter"##, "peter");
+    check!(b'\'', r##"b'\''peter"##, "peter");
+    check!(b'\"', r##"b'\"'peter"##, "peter");
+    check!(b'\xFF', r##"b'\xFF'peter"##, "peter");
+}
+
 #[test]
 fn invald_escapes() {
     assert_err!(ByteLit, r"b'\a'", UnknownEscape, 2..4);
     assert_err!(ByteLit, r"b'\y'", UnknownEscape, 2..4);
-    assert_err!(ByteLit, r"b'\", UnterminatedByteLiteral, None);
-    assert_err!(ByteLit, r"b'\x'", UnterminatedEscape, 2..4);
-    assert_err!(ByteLit, r"b'\x1'", UnterminatedEscape, 2..5);
+    assert_err!(ByteLit, r"b'\", UnterminatedEscape, 2..3);
+    assert_err!(ByteLit, r"b'\x'", UnterminatedEscape, 2..5);
+    assert_err!(ByteLit, r"b'\x1'", InvalidXEscape, 2..6);
     assert_err!(ByteLit, r"b'\xaj'", InvalidXEscape, 2..6);
     assert_err!(ByteLit, r"b'\xjb'", InvalidXEscape, 2..6);
 }
@@ -148,16 +162,16 @@ fn unicode_escape_not_allowed() {
 #[test]
 fn parse_err() {
     assert_err!(ByteLit, r"b''", EmptyByteLiteral, None);
-    assert_err!(ByteLit, r"b' ''", OverlongByteLiteral, 3..4);
+    assert_err!(ByteLit, r"b' ''", UnexpectedChar, 4..5);
 
     assert_err!(ByteLit, r"b'", UnterminatedByteLiteral, None);
     assert_err!(ByteLit, r"b'a", UnterminatedByteLiteral, None);
     assert_err!(ByteLit, r"b'\n", UnterminatedByteLiteral, None);
     assert_err!(ByteLit, r"b'\x35", UnterminatedByteLiteral, None);
 
-    assert_err!(ByteLit, r"b'ab'", OverlongByteLiteral, 3..4);
-    assert_err!(ByteLit, r"b'a _'", OverlongByteLiteral, 3..5);
-    assert_err!(ByteLit, r"b'\n3'", OverlongByteLiteral, 4..5);
+    assert_err!(ByteLit, r"b'ab'", OverlongByteLiteral, None);
+    assert_err!(ByteLit, r"b'a _'", OverlongByteLiteral, None);
+    assert_err!(ByteLit, r"b'\n3'", OverlongByteLiteral, None);
 
     assert_err!(ByteLit, r"", Empty, None);
 

diff --git a/src/bytestr/mod.rs b/src/bytestr/mod.rs
@@ -24,6 +24,9 @@ pub struct ByteStringLit<B: Buffer> {
     /// The number of hash signs in case of a raw string literal, or `None` if
     /// it's not a raw string literal.
     num_hashes: Option<u32>,
+
+    /// Start index of the suffix or `raw.len()` if there is no suffix.
+    start_suffix: usize,
 }
 
 impl<B: Buffer> ByteStringLit<B> {
@@ -37,7 +40,8 @@ impl<B: Buffer> ByteStringLit<B> {
             return Err(perr(None, InvalidByteStringLiteralStart));
         }
 
-        Self::parse_impl(input)
+        let (value, num_hashes, start_suffix) = parse_impl(&input)?;
+        Ok(Self { raw: input, value, num_hashes, start_suffix })
     }
 
     /// Returns the string value this literal represents (where all escapes have
@@ -56,6 +60,11 @@ impl<B: Buffer> ByteStringLit<B> {
         value.map(B::ByteCow::from).unwrap_or_else(|| raw.cut(inner_range).into_byte_cow())
     }
 
+    /// The optional suffix. Returns `""` if the suffix is empty/does not exist.
+    pub fn suffix(&self) -> &str {
+        &(*self.raw)[self.start_suffix..]
+    }
+
     /// Returns whether this literal is a raw string literal (starting with
     /// `r`).
     pub fn is_raw_byte_string(&self) -> bool {
@@ -75,27 +84,8 @@ impl<B: Buffer> ByteStringLit<B> {
     /// The range within `self.raw` that excludes the quotes and potential `r#`.
     fn inner_range(&self) -> Range<usize> {
         match self.num_hashes {
-            None => 2..self.raw.len() - 1,
-            Some(n) => 2 + n as usize + 1..self.raw.len() - n as usize - 1,
-        }
-    }
-
-    /// Precondition: input has to start with either `b"` or `br`.
-    pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> {
-        if input.starts_with(r"br") {
-            let (value, num_hashes) = scan_raw_string::<u8>(&input, 2)?;
-            Ok(Self {
-                raw: input,
-                value: value.map(|s| s.into_bytes()),
-                num_hashes: Some(num_hashes),
-            })
-        } else {
-            let value = unescape_string::<u8>(&input, 2)?.map(|s| s.into_bytes());
-            Ok(Self {
-                raw: input,
-                value,
-                num_hashes: None,
-            })
+            None => 2..self.start_suffix - 1,
+            Some(n) => 2 + n as usize + 1..self.start_suffix - n as usize - 1,
         }
     }
 }
@@ -108,6 +98,7 @@ impl ByteStringLit<&str> {
             raw: self.raw.to_owned(),
             value: self.value,
             num_hashes: self.num_hashes,
+            start_suffix: self.start_suffix,
         }
     }
 }
@@ -119,5 +110,17 @@ impl<B: Buffer> fmt::Display for ByteStringLit<B> {
 }
 
 
+/// Precondition: input has to start with either `b"` or `br`.
+#[inline(never)]
+fn parse_impl(input: &str) -> Result<(Option<Vec<u8>>, Option<u32>, usize), ParseError> {
+    if input.starts_with("br") {
+        scan_raw_string::<u8>(&input, 2)
+            .map(|(v, num, start_suffix)| (v.map(String::into_bytes), Some(num), start_suffix))
+    } else {
+        unescape_string::<u8>(&input, 2)
+            .map(|(v, start_suffix)| (v.map(String::into_bytes), None, start_suffix))
+    }
+}
+
 #[cfg(test)]
 mod tests;
diff --git a/src/bytestr/tests.rs b/src/bytestr/tests.rs
@@ -4,19 +4,25 @@ use crate::{Literal, ByteStringLit, test_util::{assert_parse_ok_eq, assert_round
 
 macro_rules! check {
     ($lit:literal, $has_escapes:expr, $num_hashes:expr) => {
-        let input = stringify!($lit);
+        check!($lit, stringify!($lit), $has_escapes, $num_hashes, "")
+    };
+    ($lit:literal, $input:expr, $has_escapes:expr, $num_hashes:expr, $suffix:literal) => {
+        let input = $input;
         let expected = ByteStringLit {
             raw: input,
             value: if $has_escapes { Some($lit.to_vec()) } else { None },
             num_hashes: $num_hashes,
+            start_suffix: input.len() - $suffix.len(),
         };
 
         assert_parse_ok_eq(
             input, ByteStringLit::parse(input), expected.clone(), "ByteStringLit::parse");
         assert_parse_ok_eq(
             input, Literal::parse(input), Literal::ByteString(expected.clone()), "Literal::parse");
-        assert_eq!(ByteStringLit::parse(input).unwrap().value(), $lit);
-        assert_eq!(ByteStringLit::parse(input).unwrap().into_value().as_ref(), $lit);
+        let lit = ByteStringLit::parse(input).unwrap();
+        assert_eq!(lit.value(), $lit);
+        assert_eq!(lit.suffix(), $suffix);
+        assert_eq!(lit.into_value().as_ref(), $lit);
         assert_roundtrip(expected.into_owned(), input);
     };
 }
@@ -43,6 +49,7 @@ fn special_whitespace() {
                 raw: &*input,
                 value: None,
                 num_hashes,
+                start_suffix: input.len(),
             };
             assert_parse_ok_eq(
                 &input, ByteStringLit::parse(&*input), expected.clone(), "ByteStringLit::parse");
@@ -147,17 +154,23 @@ fn raw_byte_string() {
     check!(br#"cat\n\t\r\0\\x60\u{123}doggo"#, false, Some(1));
 }
 
+#[test]
+fn suffixes() {
+    check!(b"hello", r###"b"hello"suffix"###, false, None, "suffix");
+    check!(b"fox", r#"b"fox"peter"#, false, None, "peter");
+    check!(b"a\x0cb\\", r#"b"a\x0cb\\"_jürgen"#, true, None, "_jürgen");
+    check!(br"a\x0cb\\", r###"br#"a\x0cb\\"#_jürgen"###, false, Some(1), "_jürgen");
+}
+
 #[test]
 fn parse_err() {
     assert_err!(ByteStringLit, r#"b""#, UnterminatedString, None);
     assert_err!(ByteStringLit, r#"b"cat"#, UnterminatedString, None);
     assert_err!(ByteStringLit, r#"b"Jurgen"#, UnterminatedString, None);
     assert_err!(ByteStringLit, r#"b"foo bar baz"#, UnterminatedString, None);
 
-    assert_err!(ByteStringLit, r#"b"fox"peter"#, UnexpectedChar, 6..11);
-    assert_err!(ByteStringLit, r#"b"fox"peter""#, UnexpectedChar, 6..12);
-    assert_err!(ByteStringLit, r#"b"fox"bar"#, UnexpectedChar, 6..9);
-    assert_err!(ByteStringLit, r###"br#"foo "# bar"#"###, UnexpectedChar, 10..16);
+    assert_err!(ByteStringLit, r#"b"fox"peter""#, InvalidSuffix, 6);
+    assert_err!(ByteStringLit, r###"br#"foo "# bar"#"###, UnexpectedChar, 10);
 
     assert_err!(ByteStringLit, "b\"\r\"", IsolatedCr, 2);
     assert_err!(ByteStringLit, "b\"fo\rx\"", IsolatedCr, 4);
@@ -179,10 +192,10 @@ fn non_ascii() {
 }
 
 #[test]
-fn invald_escapes() {
+fn invalid_escapes() {
     assert_err!(ByteStringLit, r#"b"\a""#, UnknownEscape, 2..4);
     assert_err!(ByteStringLit, r#"b"foo\y""#, UnknownEscape, 5..7);
-    assert_err!(ByteStringLit, r#"b"\"#, UnterminatedString, None);
+    assert_err!(ByteStringLit, r#"b"\"#, UnterminatedEscape, 2);
     assert_err!(ByteStringLit, r#"b"\x""#, UnterminatedEscape, 2..4);
     assert_err!(ByteStringLit, r#"b"foo\x1""#, UnterminatedEscape, 5..8);
     assert_err!(ByteStringLit, r#"b" \xaj""#, InvalidXEscape, 3..7);