@@ -27,24 +27,30 @@ pub enum EscapeError {
27
27
InvalidDecimal ( char ) ,
28
28
/// Not a valid unicode codepoint
29
29
InvalidCodepoint ( u32 ) ,
30
+ /// The recursion limit was reached while attempting to unescape XML entities,
31
+ /// or normalize an attribute value. This could indicate an entity loop.
32
+ ///
33
+ /// Limiting recursion prevents Denial-of-Service type attacks
34
+ /// such as the "billion laughs" [attack](https://en.wikipedia.org/wiki/Billion_laughs_attack).
35
+ ReachedRecursionLimit ( Range < usize > ) ,
30
36
}
31
37
32
38
impl std:: fmt:: Display for EscapeError {
33
39
fn fmt ( & self , f : & mut std:: fmt:: Formatter ) -> std:: fmt:: Result {
34
40
match self {
35
41
EscapeError :: EntityWithNull ( e) => write ! (
36
42
f,
37
- "Error while escaping character at range {:?}: Null character entity not allowed" ,
43
+ "Error while unescaping character at range {:?}: Null character entity not allowed" ,
38
44
e
39
45
) ,
40
46
EscapeError :: UnrecognizedSymbol ( rge, res) => write ! (
41
47
f,
42
- "Error while escaping character at range {:?}: Unrecognized escape symbol: {:?}" ,
48
+ "Error while unescaping character at range {:?}: Unrecognized escape symbol: {:?}" ,
43
49
rge, res
44
50
) ,
45
51
EscapeError :: UnterminatedEntity ( e) => write ! (
46
52
f,
47
- "Error while escaping character at range {:?}: Cannot find ';' after '&'" ,
53
+ "Error while unescaping character at range {:?}: Cannot find ';' after '&'" ,
48
54
e
49
55
) ,
50
56
EscapeError :: TooLongHexadecimal => write ! ( f, "Cannot convert hexadecimal to utf8" ) ,
@@ -54,6 +60,10 @@ impl std::fmt::Display for EscapeError {
54
60
EscapeError :: TooLongDecimal => write ! ( f, "Cannot convert decimal to utf8" ) ,
55
61
EscapeError :: InvalidDecimal ( e) => write ! ( f, "'{}' is not a valid decimal character" , e) ,
56
62
EscapeError :: InvalidCodepoint ( n) => write ! ( f, "'{}' is not a valid codepoint" , n) ,
63
+ EscapeError :: ReachedRecursionLimit ( e) => write ! (
64
+ f,
65
+ "Error while unescaping entity at range {:?}: Recursion limit reached"
66
+ ) ,
57
67
}
58
68
}
59
69
}
@@ -310,7 +320,7 @@ where
310
320
let mut iter = bytes. iter ( ) ;
311
321
312
322
if let Some ( i) = iter. position ( is_normalization_char) {
313
- let mut normalized = String :: new ( ) ;
323
+ let mut normalized = String :: with_capacity ( value . len ( ) ) ;
314
324
let pos = normalize_step (
315
325
& mut normalized,
316
326
& mut iter,
@@ -405,7 +415,7 @@ where
405
415
let pat = & input[ start..end] ;
406
416
// 1. For a character reference, append the referenced character
407
417
// to the normalized value.
408
- if pat. starts_with ( '#' ) {
418
+ if let Some ( entity ) = pat. strip_prefix ( '#' ) {
409
419
let entity = & pat[ 1 ..] ; // starts after the #
410
420
let codepoint = parse_number ( entity, start..end) ?;
411
421
normalized. push_str ( codepoint. encode_utf8 ( & mut [ 0u8 ; 4 ] ) ) ;
@@ -432,6 +442,7 @@ where
432
442
Ok ( index + 1 ) // +1 - skip character
433
443
}
434
444
445
+ // SAFETY: We call normalize_step only when is_normalization_char() return true
435
446
_ => unreachable ! ( "Only '\\ t', '\\ n', '\\ r', ' ', and '&' are possible here" ) ,
436
447
}
437
448
}
@@ -2160,14 +2171,14 @@ mod normalization {
2160
2171
fn unclosed_entity ( ) {
2161
2172
assert_eq ! (
2162
2173
normalize_attribute_value( "string with unclosed &entity reference" , |_| {
2163
- // 0 ^ = 21 ^ = 38
2174
+ // 0 ^ = 21 ^ = 38
2164
2175
Some ( "replacement" )
2165
2176
} ) ,
2166
2177
Err ( EscapeError :: UnterminatedEntity ( 21 ..38 ) )
2167
2178
) ;
2168
2179
assert_eq ! (
2169
2180
normalize_attribute_value( "string with unclosed   (character) reference" , |_| {
2170
- // 0 ^ = 21 ^ = 47
2181
+ // 0 ^ = 21 ^ = 47
2171
2182
None
2172
2183
} ) ,
2173
2184
Err ( EscapeError :: UnterminatedEntity ( 21 ..47 ) )
@@ -2178,7 +2189,7 @@ mod normalization {
2178
2189
fn unknown_entity ( ) {
2179
2190
assert_eq ! (
2180
2191
normalize_attribute_value( "string with unknown &entity; reference" , |_| { None } ) ,
2181
- // 0 ^ ^ = 21.. 27
2192
+ // 0 ^ 21 ^ 27
2182
2193
Err ( EscapeError :: UnrecognizedSymbol (
2183
2194
21 ..27 ,
2184
2195
"entity" . to_string( )
0 commit comments