Skip to content

Commit c2d2fbd

Browse files
committed
temp
1 parent ac12797 commit c2d2fbd

File tree

3 files changed

+61
-14
lines changed

3 files changed

+61
-14
lines changed

Changelog.md

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
### Misc Changes
2323

2424
- [#379]: Added tests for attribute value normalization
25+
- [#379]: Fixed error messages for some variants of `EscapeError`
26+
which were incorrectly described as occurring during escaping (rather than unescaping).
2527
- [#650]: Change the type of `Event::PI` to a new dedicated `BytesPI` type.
2628

2729
[#379]: https://github.com/tafia/quick-xml/pull/379
@@ -293,11 +295,6 @@ serde >= 1.0.181
293295
- [#565]: Correctly set minimum required version of tokio dependency to 1.10
294296
- [#565]: Fix compilation error when build with serde <1.0.139
295297

296-
297-
### New Tests
298-
299-
- [#379]: Added tests for attribute value normalization
300-
301298
[externally tagged]: https://serde.rs/enum-representations.html#externally-tagged
302299
[#490]: https://github.com/tafia/quick-xml/pull/490
303300
[#510]: https://github.com/tafia/quick-xml/issues/510

src/escape.rs

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,24 +27,30 @@ pub enum EscapeError {
2727
InvalidDecimal(char),
2828
/// Not a valid unicode codepoint
2929
InvalidCodepoint(u32),
30+
/// The recursion limit was reached while attempting to unescape XML entities,
31+
/// or normalize an attribute value. This could indicate an entity loop.
32+
///
33+
/// Limiting recursion prevents Denial-of-Service type attacks
34+
/// such as the "billion laughs" [attack](https://en.wikipedia.org/wiki/Billion_laughs_attack).
35+
ReachedRecursionLimit(Range<usize>),
3036
}
3137

3238
impl std::fmt::Display for EscapeError {
3339
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
3440
match self {
3541
EscapeError::EntityWithNull(e) => write!(
3642
f,
37-
"Error while escaping character at range {:?}: Null character entity not allowed",
43+
"Error while unescaping character at range {:?}: Null character entity not allowed",
3844
e
3945
),
4046
EscapeError::UnrecognizedSymbol(rge, res) => write!(
4147
f,
42-
"Error while escaping character at range {:?}: Unrecognized escape symbol: {:?}",
48+
"Error while unescaping character at range {:?}: Unrecognized escape symbol: {:?}",
4349
rge, res
4450
),
4551
EscapeError::UnterminatedEntity(e) => write!(
4652
f,
47-
"Error while escaping character at range {:?}: Cannot find ';' after '&'",
53+
"Error while unescaping character at range {:?}: Cannot find ';' after '&'",
4854
e
4955
),
5056
EscapeError::TooLongHexadecimal => write!(f, "Cannot convert hexadecimal to utf8"),
@@ -54,6 +60,10 @@ impl std::fmt::Display for EscapeError {
5460
EscapeError::TooLongDecimal => write!(f, "Cannot convert decimal to utf8"),
5561
EscapeError::InvalidDecimal(e) => write!(f, "'{}' is not a valid decimal character", e),
5662
EscapeError::InvalidCodepoint(n) => write!(f, "'{}' is not a valid codepoint", n),
63+
EscapeError::ReachedRecursionLimit(e) => write!(
64+
f,
65+
"Error while unescaping entity at range {:?}: Recursion limit reached"
66+
),
5767
}
5868
}
5969
}
@@ -310,7 +320,7 @@ where
310320
let mut iter = bytes.iter();
311321

312322
if let Some(i) = iter.position(is_normalization_char) {
313-
let mut normalized = String::new();
323+
let mut normalized = String::with_capacity(value.len());
314324
let pos = normalize_step(
315325
&mut normalized,
316326
&mut iter,
@@ -405,7 +415,7 @@ where
405415
let pat = &input[start..end];
406416
// 1. For a character reference, append the referenced character
407417
// to the normalized value.
408-
if pat.starts_with('#') {
418+
if let Some(entity) = pat.strip_prefix('#') {
409419
let entity = &pat[1..]; // starts after the #
410420
let codepoint = parse_number(entity, start..end)?;
411421
normalized.push_str(codepoint.encode_utf8(&mut [0u8; 4]));
@@ -432,6 +442,7 @@ where
432442
Ok(index + 1) // +1 - skip character
433443
}
434444

445+
// SAFETY: We call normalize_step only when is_normalization_char() return true
435446
_ => unreachable!("Only '\\t', '\\n', '\\r', ' ', and '&' are possible here"),
436447
}
437448
}
@@ -2160,14 +2171,14 @@ mod normalization {
21602171
fn unclosed_entity() {
21612172
assert_eq!(
21622173
normalize_attribute_value("string with unclosed &entity reference", |_| {
2163-
// 0 ^ = 21 ^ = 38
2174+
// 0 ^ = 21 ^ = 38
21642175
Some("replacement")
21652176
}),
21662177
Err(EscapeError::UnterminatedEntity(21..38))
21672178
);
21682179
assert_eq!(
21692180
normalize_attribute_value("string with unclosed &#32 (character) reference", |_| {
2170-
// 0 ^ = 21 ^ = 47
2181+
// 0 ^ = 21 ^ = 47
21712182
None
21722183
}),
21732184
Err(EscapeError::UnterminatedEntity(21..47))
@@ -2178,7 +2189,7 @@ mod normalization {
21782189
fn unknown_entity() {
21792190
assert_eq!(
21802191
normalize_attribute_value("string with unknown &entity; reference", |_| { None }),
2181-
// 0 ^ ^ = 21..27
2192+
// 0 ^ 21 ^ 27
21822193
Err(EscapeError::UnrecognizedSymbol(
21832194
21..27,
21842195
"entity".to_string()

src/events/attributes.rs

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
//! Provides an iterator over attributes key/value pairs
44
55
use crate::errors::Result as XmlResult;
6-
use crate::escape::{self, escape, resolve_predefined_entity, unescape_with};
76
#[cfg(any(doc, not(feature = "encoding")))]
87
use crate::escape::EscapeError;
8+
use crate::escape::{self, escape, resolve_predefined_entity, unescape_with};
99
use crate::name::QName;
1010
use crate::reader::{is_whitespace, Reader};
1111
use crate::utils::{write_byte_string, write_cow_string, Bytes};
@@ -967,6 +967,45 @@ mod xml {
967967
);
968968
}
969969

970+
/// An obvious loop should be detected immediately
971+
#[test]
972+
#[ignore]
973+
fn test_entity_loop() {
974+
let raw_value = "&d;".as_bytes();
975+
let attr = Attribute::from(("foo".as_bytes(), raw_value));
976+
fn custom_resolver(ent: &str) -> Option<&'static str> {
977+
match ent {
978+
"d" => Some("&d;"),
979+
}
980+
}
981+
assert_eq!(
982+
attr.normalized_value_with(&custom_resolver),
983+
Err(EscapeError::ReachedRecursionLimit(1..2))
984+
);
985+
}
986+
987+
/// A more complex entity loop should hit the recursion limit.
988+
#[test]
989+
#[ignore]
990+
fn test_recursion_limit() {
991+
let raw_value = "&a;".as_bytes();
992+
let attr = Attribute::from(("foo".as_bytes(), raw_value));
993+
fn custom_resolver(ent: &str) -> Option<&'static str> {
994+
match ent {
995+
"a" => Some("&b;"),
996+
"b" => Some("&c;"),
997+
"c" => Some("&d;"),
998+
"d" => Some("&e;"),
999+
"e" => Some("&f;"),
1000+
"f" => Some("&a;"),
1001+
}
1002+
}
1003+
assert_eq!(
1004+
attr.normalized_value_with(&custom_resolver),
1005+
Err(EscapeError::ReachedRecursionLimit(1..2))
1006+
);
1007+
}
1008+
9701009
#[test]
9711010
fn test_char_references() {
9721011
// character literal references are substituted without being replaced by spaces

0 commit comments

Comments
 (0)