Skip to content

Commit fd53983

Browse files
committed
fix(parser): Better short base64 detection
Previously, we bailed out if the string is too short (<90) and there weren't non-alpha-base64 bytes present. What we ignored were the padding bytes. We key off of padding bytes to detect that a string is in fact base64 encoded. Like the other cases, there can be false positives but those strings should show up elsewhere or the compiler will fail. This was called out in #485
1 parent bd5048d commit fd53983

File tree

1 file changed

+25
-7
lines changed

1 file changed

+25
-7
lines changed

crates/typos/src/tokens.rs

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,16 @@ mod parser {
407407
<T as nom::InputIter>::Item: AsChar + Copy,
408408
{
409409
let (padding, captured) = take_while1(is_base64_digit)(input.clone())?;
410+
411+
const CHUNK: usize = 4;
412+
let padding_offset = input.offset(&padding);
413+
let mut padding_len = CHUNK - padding_offset % CHUNK;
414+
if padding_len == CHUNK {
415+
padding_len = 0;
416+
}
417+
410418
if captured.input_len() < 90
419+
&& padding_len == 0
411420
&& captured
412421
.iter_elements()
413422
.all(|c| !['/', '+'].contains(&c.as_char()))
@@ -418,14 +427,8 @@ mod parser {
418427
)));
419428
}
420429

421-
const CHUNK: usize = 4;
422-
let padding_offset = input.offset(&padding);
423-
let mut padding_len = CHUNK - padding_offset % CHUNK;
424-
if padding_len == CHUNK {
425-
padding_len = 0;
426-
}
427-
428430
let (after, _) = take_while_m_n(padding_len, padding_len, is_base64_padding)(padding)?;
431+
429432
let after_offset = input.offset(&after);
430433
Ok(input.take_split(after_offset))
431434
}
@@ -1207,6 +1210,21 @@ mod test {
12071210
assert_eq!(expected, actual);
12081211
}
12091212

1213+
#[test]
1214+
fn tokenize_ignore_base64_case_3() {
1215+
let parser = TokenizerBuilder::new().build();
1216+
1217+
let input = r#" "integrity": "sha512-hCmlUAIlUiav8Xdqw3Io4LcpA1DOt7h3LSTAC4G6JGHFFaWzI6qvFt9oilvl8BmkbBRX1IhM90ZAmpk68zccQA==","#;
1218+
let expected: Vec<Identifier> = vec![
1219+
Identifier::new_unchecked("integrity", Case::None, 8),
1220+
Identifier::new_unchecked("sha512", Case::None, 21),
1221+
];
1222+
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
1223+
assert_eq!(expected, actual);
1224+
let actual: Vec<_> = parser.parse_str(input).collect();
1225+
assert_eq!(expected, actual);
1226+
}
1227+
12101228
#[test]
12111229
fn tokenize_ignore_email() {
12121230
let parser = TokenizerBuilder::new().build();

0 commit comments

Comments
 (0)