Skip to content

Implement arbitrary suffixes (for all literals) #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 5, 2023
7 changes: 7 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,10 @@ jobs:
run: |
cargo test --release --no-default-features --lib -- --include-ignored
cargo test --doc --no-default-features

- name: Build with check_suffix
run: cargo build --features=check_suffix
- name: Run tests with check_suffix
run: |
cargo test --release --features=check_suffix --lib -- --include-ignored
cargo test --doc --features=check_suffix
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ exclude = [".github"]

[features]
default = ["proc-macro2"]
check_suffix = ["unicode-xid"]

[dependencies]
proc-macro2 = { version = "1", optional = true }
unicode-xid = { version = "0.2.4", optional = true }
3 changes: 2 additions & 1 deletion examples/procmacro/examples/main.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use procmacro_example::{concat, repeat};
use procmacro_example::{concat, dbg_and_swallow, repeat};

const FOO: &str = concat!(r#"Hello "# '🦊' "\nHere is a friend: \u{1F427}");
// const FOO: &str = concat!(::);
Expand All @@ -8,6 +8,7 @@ const BAR: &str = repeat!(3 * "నా పిల్లి లావుగా ఉ
const BAZ: &str = repeat!(0b101 * "🦀");
// const BAZ: &str = repeat!(3.5 * "🦀");

dbg_and_swallow!(16px);

fn main() {
println!("{}", FOO);
Expand Down
8 changes: 8 additions & 0 deletions examples/procmacro/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ use proc_macro::{Spacing, TokenStream, TokenTree};
use litrs::{Literal, IntegerLit, StringLit};


#[proc_macro]
pub fn dbg_and_swallow(input: TokenStream) -> TokenStream {
for token in input {
println!("{} -> {:#?}", token, Literal::try_from(&token));
}
TokenStream::new()
}

/// Concatinates all input string and char literals into a single output string
/// literal.
#[proc_macro]
Expand Down
48 changes: 27 additions & 21 deletions src/byte/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::{
Buffer, ParseError,
err::{perr, ParseErrorKind::*},
escape::unescape,
parse::check_suffix,
};


Expand All @@ -15,6 +16,8 @@ use crate::{
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ByteLit<B: Buffer> {
raw: B,
/// Start index of the suffix or `raw.len()` if there is no suffix.
start_suffix: usize,
value: u8,
}

Expand All @@ -29,15 +32,20 @@ impl<B: Buffer> ByteLit<B> {
return Err(perr(None, InvalidByteLiteralStart));
}

let value = parse_impl(&input)?;
Ok(Self { raw: input, value })
let (value, start_suffix) = parse_impl(&input)?;
Ok(Self { raw: input, value, start_suffix })
}

/// Returns the byte value that this literal represents.
pub fn value(&self) -> u8 {
self.value
}

/// The optional suffix. Returns `""` if the suffix is empty/does not exist.
pub fn suffix(&self) -> &str {
&(*self.raw)[self.start_suffix..]
}

/// Returns the raw input that was passed to `parse`.
pub fn raw_input(&self) -> &str {
&self.raw
Expand All @@ -56,6 +64,7 @@ impl ByteLit<&str> {
pub fn to_owned(&self) -> ByteLit<String> {
ByteLit {
raw: self.raw.to_owned(),
start_suffix: self.start_suffix,
value: self.value,
}
}
Expand All @@ -69,32 +78,29 @@ impl<B: Buffer> fmt::Display for ByteLit<B> {

/// Precondition: must start with `b'`.
#[inline(never)]
pub(crate) fn parse_impl(input: &str) -> Result<u8, ParseError> {
if input.len() == 2 {
return Err(perr(None, UnterminatedByteLiteral));
}
if *input.as_bytes().last().unwrap() != b'\'' {
return Err(perr(None, UnterminatedByteLiteral));
}

let inner = &input[2..input.len() - 1];
let first = inner.as_bytes().get(0).ok_or(perr(None, EmptyByteLiteral))?;
pub(crate) fn parse_impl(input: &str) -> Result<(u8, usize), ParseError> {
let input_bytes = input.as_bytes();
let first = input_bytes.get(2).ok_or(perr(None, UnterminatedByteLiteral))?;
let (c, len) = match first {
b'\'' => return Err(perr(2, UnescapedSingleQuote)),
b'\n' | b'\t' | b'\r'
=> return Err(perr(2, UnescapedSpecialWhitespace)),

b'\\' => unescape::<u8>(inner, 2)?,
b'\'' if input_bytes.get(3) == Some(&b'\'') => return Err(perr(2, UnescapedSingleQuote)),
b'\'' => return Err(perr(None, EmptyByteLiteral)),
b'\n' | b'\t' | b'\r' => return Err(perr(2, UnescapedSpecialWhitespace)),
b'\\' => unescape::<u8>(&input[2..], 2)?,
other if other.is_ascii() => (*other, 1),
_ => return Err(perr(2, NonAsciiInByteLiteral)),
};
let rest = &inner[len..];

if !rest.is_empty() {
return Err(perr(len + 2..input.len() - 1, OverlongByteLiteral));
match input[2 + len..].find('\'') {
Some(0) => {}
Some(_) => return Err(perr(None, OverlongByteLiteral)),
None => return Err(perr(None, UnterminatedByteLiteral)),
}

Ok(c)
let start_suffix = 2 + len + 1;
let suffix = &input[start_suffix..];
check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?;

Ok((c, start_suffix))
}

#[cfg(test)]
Expand Down
34 changes: 24 additions & 10 deletions src/byte/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@ use crate::{ByteLit, Literal, test_util::{assert_parse_ok_eq, assert_roundtrip}}
// ===== Utility functions =======================================================================

macro_rules! check {
($lit:literal) => {
let input = stringify!($lit);
($lit:literal) => { check!($lit, stringify!($lit), "") };
($lit:literal, $input:expr, $suffix:literal) => {
let input = $input;
let expected = ByteLit {
raw: input,
start_suffix: input.len() - $suffix.len(),
value: $lit,
};

assert_parse_ok_eq(input, ByteLit::parse(input), expected.clone(), "ByteLit::parse");
assert_parse_ok_eq(input, Literal::parse(input), Literal::Byte(expected), "Literal::parse");
assert_eq!(ByteLit::parse(input).unwrap().value(), $lit);
let lit = ByteLit::parse(input).unwrap();
assert_eq!(lit.value(), $lit);
assert_eq!(lit.suffix(), $suffix);
assert_roundtrip(expected.to_owned(), input);
};
}
Expand Down Expand Up @@ -113,13 +117,23 @@ fn byte_escapes() {
check!(b'\xFF');
}

#[test]
fn suffixes() {
check!(b'a', r##"b'a'peter"##, "peter");
check!(b'#', r##"b'#'peter"##, "peter");
check!(b'\n', r##"b'\n'peter"##, "peter");
check!(b'\'', r##"b'\''peter"##, "peter");
check!(b'\"', r##"b'\"'peter"##, "peter");
check!(b'\xFF', r##"b'\xFF'peter"##, "peter");
}

#[test]
fn invald_escapes() {
assert_err!(ByteLit, r"b'\a'", UnknownEscape, 2..4);
assert_err!(ByteLit, r"b'\y'", UnknownEscape, 2..4);
assert_err!(ByteLit, r"b'\", UnterminatedByteLiteral, None);
assert_err!(ByteLit, r"b'\x'", UnterminatedEscape, 2..4);
assert_err!(ByteLit, r"b'\x1'", UnterminatedEscape, 2..5);
assert_err!(ByteLit, r"b'\", UnterminatedEscape, 2..3);
assert_err!(ByteLit, r"b'\x'", UnterminatedEscape, 2..5);
assert_err!(ByteLit, r"b'\x1'", InvalidXEscape, 2..6);
assert_err!(ByteLit, r"b'\xaj'", InvalidXEscape, 2..6);
assert_err!(ByteLit, r"b'\xjb'", InvalidXEscape, 2..6);
}
Expand Down Expand Up @@ -148,16 +162,16 @@ fn unicode_escape_not_allowed() {
#[test]
fn parse_err() {
assert_err!(ByteLit, r"b''", EmptyByteLiteral, None);
assert_err!(ByteLit, r"b' ''", OverlongByteLiteral, 3..4);
assert_err!(ByteLit, r"b' ''", UnexpectedChar, 4..5);

assert_err!(ByteLit, r"b'", UnterminatedByteLiteral, None);
assert_err!(ByteLit, r"b'a", UnterminatedByteLiteral, None);
assert_err!(ByteLit, r"b'\n", UnterminatedByteLiteral, None);
assert_err!(ByteLit, r"b'\x35", UnterminatedByteLiteral, None);

assert_err!(ByteLit, r"b'ab'", OverlongByteLiteral, 3..4);
assert_err!(ByteLit, r"b'a _'", OverlongByteLiteral, 3..5);
assert_err!(ByteLit, r"b'\n3'", OverlongByteLiteral, 4..5);
assert_err!(ByteLit, r"b'ab'", OverlongByteLiteral, None);
assert_err!(ByteLit, r"b'a _'", OverlongByteLiteral, None);
assert_err!(ByteLit, r"b'\n3'", OverlongByteLiteral, None);

assert_err!(ByteLit, r"", Empty, None);

Expand Down
47 changes: 25 additions & 22 deletions src/bytestr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ pub struct ByteStringLit<B: Buffer> {
/// The number of hash signs in case of a raw string literal, or `None` if
/// it's not a raw string literal.
num_hashes: Option<u32>,

/// Start index of the suffix or `raw.len()` if there is no suffix.
start_suffix: usize,
}

impl<B: Buffer> ByteStringLit<B> {
Expand All @@ -37,7 +40,8 @@ impl<B: Buffer> ByteStringLit<B> {
return Err(perr(None, InvalidByteStringLiteralStart));
}

Self::parse_impl(input)
let (value, num_hashes, start_suffix) = parse_impl(&input)?;
Ok(Self { raw: input, value, num_hashes, start_suffix })
}

/// Returns the string value this literal represents (where all escapes have
Expand All @@ -56,6 +60,11 @@ impl<B: Buffer> ByteStringLit<B> {
value.map(B::ByteCow::from).unwrap_or_else(|| raw.cut(inner_range).into_byte_cow())
}

/// The optional suffix. Returns `""` if the suffix is empty/does not exist.
pub fn suffix(&self) -> &str {
&(*self.raw)[self.start_suffix..]
}

/// Returns whether this literal is a raw string literal (starting with
/// `r`).
pub fn is_raw_byte_string(&self) -> bool {
Expand All @@ -75,27 +84,8 @@ impl<B: Buffer> ByteStringLit<B> {
/// The range within `self.raw` that excludes the quotes and potential `r#`.
fn inner_range(&self) -> Range<usize> {
match self.num_hashes {
None => 2..self.raw.len() - 1,
Some(n) => 2 + n as usize + 1..self.raw.len() - n as usize - 1,
}
}

/// Precondition: input has to start with either `b"` or `br`.
pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> {
if input.starts_with(r"br") {
let (value, num_hashes) = scan_raw_string::<u8>(&input, 2)?;
Ok(Self {
raw: input,
value: value.map(|s| s.into_bytes()),
num_hashes: Some(num_hashes),
})
} else {
let value = unescape_string::<u8>(&input, 2)?.map(|s| s.into_bytes());
Ok(Self {
raw: input,
value,
num_hashes: None,
})
None => 2..self.start_suffix - 1,
Some(n) => 2 + n as usize + 1..self.start_suffix - n as usize - 1,
}
}
}
Expand All @@ -108,6 +98,7 @@ impl ByteStringLit<&str> {
raw: self.raw.to_owned(),
value: self.value,
num_hashes: self.num_hashes,
start_suffix: self.start_suffix,
}
}
}
Expand All @@ -119,5 +110,17 @@ impl<B: Buffer> fmt::Display for ByteStringLit<B> {
}


/// Precondition: input has to start with either `b"` or `br`.
#[inline(never)]
fn parse_impl(input: &str) -> Result<(Option<Vec<u8>>, Option<u32>, usize), ParseError> {
if input.starts_with("br") {
scan_raw_string::<u8>(&input, 2)
.map(|(v, num, start_suffix)| (v.map(String::into_bytes), Some(num), start_suffix))
} else {
unescape_string::<u8>(&input, 2)
.map(|(v, start_suffix)| (v.map(String::into_bytes), None, start_suffix))
}
}

#[cfg(test)]
mod tests;
31 changes: 22 additions & 9 deletions src/bytestr/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,25 @@ use crate::{Literal, ByteStringLit, test_util::{assert_parse_ok_eq, assert_round

macro_rules! check {
($lit:literal, $has_escapes:expr, $num_hashes:expr) => {
let input = stringify!($lit);
check!($lit, stringify!($lit), $has_escapes, $num_hashes, "")
};
($lit:literal, $input:expr, $has_escapes:expr, $num_hashes:expr, $suffix:literal) => {
let input = $input;
let expected = ByteStringLit {
raw: input,
value: if $has_escapes { Some($lit.to_vec()) } else { None },
num_hashes: $num_hashes,
start_suffix: input.len() - $suffix.len(),
};

assert_parse_ok_eq(
input, ByteStringLit::parse(input), expected.clone(), "ByteStringLit::parse");
assert_parse_ok_eq(
input, Literal::parse(input), Literal::ByteString(expected.clone()), "Literal::parse");
assert_eq!(ByteStringLit::parse(input).unwrap().value(), $lit);
assert_eq!(ByteStringLit::parse(input).unwrap().into_value().as_ref(), $lit);
let lit = ByteStringLit::parse(input).unwrap();
assert_eq!(lit.value(), $lit);
assert_eq!(lit.suffix(), $suffix);
assert_eq!(lit.into_value().as_ref(), $lit);
assert_roundtrip(expected.into_owned(), input);
};
}
Expand All @@ -43,6 +49,7 @@ fn special_whitespace() {
raw: &*input,
value: None,
num_hashes,
start_suffix: input.len(),
};
assert_parse_ok_eq(
&input, ByteStringLit::parse(&*input), expected.clone(), "ByteStringLit::parse");
Expand Down Expand Up @@ -147,17 +154,23 @@ fn raw_byte_string() {
check!(br#"cat\n\t\r\0\\x60\u{123}doggo"#, false, Some(1));
}

#[test]
fn suffixes() {
check!(b"hello", r###"b"hello"suffix"###, false, None, "suffix");
check!(b"fox", r#"b"fox"peter"#, false, None, "peter");
check!(b"a\x0cb\\", r#"b"a\x0cb\\"_jürgen"#, true, None, "_jürgen");
check!(br"a\x0cb\\", r###"br#"a\x0cb\\"#_jürgen"###, false, Some(1), "_jürgen");
}

#[test]
fn parse_err() {
assert_err!(ByteStringLit, r#"b""#, UnterminatedString, None);
assert_err!(ByteStringLit, r#"b"cat"#, UnterminatedString, None);
assert_err!(ByteStringLit, r#"b"Jurgen"#, UnterminatedString, None);
assert_err!(ByteStringLit, r#"b"foo bar baz"#, UnterminatedString, None);

assert_err!(ByteStringLit, r#"b"fox"peter"#, UnexpectedChar, 6..11);
assert_err!(ByteStringLit, r#"b"fox"peter""#, UnexpectedChar, 6..12);
assert_err!(ByteStringLit, r#"b"fox"bar"#, UnexpectedChar, 6..9);
assert_err!(ByteStringLit, r###"br#"foo "# bar"#"###, UnexpectedChar, 10..16);
assert_err!(ByteStringLit, r#"b"fox"peter""#, InvalidSuffix, 6);
assert_err!(ByteStringLit, r###"br#"foo "# bar"#"###, UnexpectedChar, 10);

assert_err!(ByteStringLit, "b\"\r\"", IsolatedCr, 2);
assert_err!(ByteStringLit, "b\"fo\rx\"", IsolatedCr, 4);
Expand All @@ -179,10 +192,10 @@ fn non_ascii() {
}

#[test]
fn invald_escapes() {
fn invalid_escapes() {
assert_err!(ByteStringLit, r#"b"\a""#, UnknownEscape, 2..4);
assert_err!(ByteStringLit, r#"b"foo\y""#, UnknownEscape, 5..7);
assert_err!(ByteStringLit, r#"b"\"#, UnterminatedString, None);
assert_err!(ByteStringLit, r#"b"\"#, UnterminatedEscape, 2);
assert_err!(ByteStringLit, r#"b"\x""#, UnterminatedEscape, 2..4);
assert_err!(ByteStringLit, r#"b"foo\x1""#, UnterminatedEscape, 5..8);
assert_err!(ByteStringLit, r#"b" \xaj""#, InvalidXEscape, 3..7);
Expand Down
Loading