@@ -2005,7 +2005,8 @@ use crate::{
2005
2005
de:: map:: ElementMapAccess ,
2006
2006
encoding:: Decoder ,
2007
2007
errors:: Error ,
2008
- events:: { BytesCData , BytesEnd , BytesStart , BytesText , Event } ,
2008
+ escape:: { parse_number, EscapeError } ,
2009
+ events:: { BytesCData , BytesEnd , BytesRef , BytesStart , BytesText , Event } ,
2009
2010
name:: QName ,
2010
2011
reader:: Reader ,
2011
2012
} ;
@@ -2105,6 +2106,8 @@ pub enum PayloadEvent<'a> {
2105
2106
CData ( BytesCData < ' a > ) ,
2106
2107
/// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
2107
2108
DocType ( BytesText < ' a > ) ,
2109
+ /// Reference `&ref;` in the textual data.
2110
+ GeneralRef ( BytesRef < ' a > ) ,
2108
2111
/// End of XML document.
2109
2112
Eof ,
2110
2113
}
@@ -2119,6 +2122,7 @@ impl<'a> PayloadEvent<'a> {
2119
2122
PayloadEvent :: Text ( e) => PayloadEvent :: Text ( e. into_owned ( ) ) ,
2120
2123
PayloadEvent :: CData ( e) => PayloadEvent :: CData ( e. into_owned ( ) ) ,
2121
2124
PayloadEvent :: DocType ( e) => PayloadEvent :: DocType ( e. into_owned ( ) ) ,
2125
+ PayloadEvent :: GeneralRef ( e) => PayloadEvent :: GeneralRef ( e. into_owned ( ) ) ,
2122
2126
PayloadEvent :: Eof => PayloadEvent :: Eof ,
2123
2127
}
2124
2128
}
@@ -2173,7 +2177,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2173
2177
// If next event is a text or CDATA, we should not trim trailing spaces
2174
2178
!matches ! (
2175
2179
self . lookahead,
2176
- Ok ( PayloadEvent :: Text ( _) ) | Ok ( PayloadEvent :: CData ( _) )
2180
+ Ok ( PayloadEvent :: Text ( _) ) | Ok ( PayloadEvent :: CData ( _) | PayloadEvent :: GeneralRef ( _ ) )
2177
2181
)
2178
2182
}
2179
2183
@@ -2198,9 +2202,10 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2198
2202
result. to_mut ( ) . push_str ( & e. decode ( ) ?) ;
2199
2203
}
2200
2204
PayloadEvent :: CData ( e) => result. to_mut ( ) . push_str ( & e. decode ( ) ?) ,
2205
+ PayloadEvent :: GeneralRef ( e) => self . resolve_reference ( result. to_mut ( ) , e) ?,
2201
2206
2202
- // SAFETY: current_event_is_last_text checks that event is Text or CData
2203
- _ => unreachable ! ( "Only `Text` and `CData` events can come here" ) ,
2207
+ // SAFETY: current_event_is_last_text checks that event is Text, CData or GeneralRef
2208
+ _ => unreachable ! ( "Only `Text`, `CData` or `GeneralRef ` events can come here" ) ,
2204
2209
}
2205
2210
}
2206
2211
Ok ( DeEvent :: Text ( Text { text : result } ) )
@@ -2226,11 +2231,32 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2226
2231
. map_err ( |err| DeError :: Custom ( format ! ( "cannot parse DTD: {}" , err) ) ) ?;
2227
2232
continue ;
2228
2233
}
2234
+ PayloadEvent :: GeneralRef ( e) => {
2235
+ let mut text = String :: new ( ) ;
2236
+ self . resolve_reference ( & mut text, e) ?;
2237
+ self . drain_text ( text. into ( ) )
2238
+ }
2229
2239
PayloadEvent :: Eof => Ok ( DeEvent :: Eof ) ,
2230
2240
} ;
2231
2241
}
2232
2242
}
2233
2243
2244
+ fn resolve_reference ( & mut self , result : & mut String , event : BytesRef ) -> Result < ( ) , DeError > {
2245
+ let len = event. len ( ) ;
2246
+ let reference = self . decoder ( ) . decode ( & event) ?;
2247
+
2248
+ if let Some ( num) = reference. strip_prefix ( '#' ) {
2249
+ let codepoint = parse_number ( num) . map_err ( EscapeError :: InvalidCharRef ) ?;
2250
+ result. push_str ( codepoint. encode_utf8 ( & mut [ 0u8 ; 4 ] ) ) ;
2251
+ return Ok ( ( ) ) ;
2252
+ }
2253
+ if let Some ( value) = self . entity_resolver . resolve ( reference. as_ref ( ) ) {
2254
+ result. push_str ( value) ;
2255
+ return Ok ( ( ) ) ;
2256
+ }
2257
+ Err ( EscapeError :: UnrecognizedEntity ( 0 ..len, reference. to_string ( ) ) . into ( ) )
2258
+ }
2259
+
2234
2260
#[ inline]
2235
2261
fn read_to_end ( & mut self , name : QName ) -> Result < ( ) , DeError > {
2236
2262
match self . lookahead {
@@ -3029,7 +3055,7 @@ impl StartTrimmer {
3029
3055
Event :: End ( e) => ( PayloadEvent :: End ( e) , true ) ,
3030
3056
Event :: Eof => ( PayloadEvent :: Eof , true ) ,
3031
3057
3032
- // Do not trim next text event after Text or CDATA event
3058
+ // Do not trim next text event after Text, CDATA or reference event
3033
3059
Event :: CData ( e) => ( PayloadEvent :: CData ( e) , false ) ,
3034
3060
Event :: Text ( mut e) => {
3035
3061
// If event is empty after trimming, skip it
@@ -3038,6 +3064,7 @@ impl StartTrimmer {
3038
3064
}
3039
3065
( PayloadEvent :: Text ( e) , false )
3040
3066
}
3067
+ Event :: GeneralRef ( e) => ( PayloadEvent :: GeneralRef ( e) , false ) ,
3041
3068
3042
3069
_ => return None ,
3043
3070
} ;
0 commit comments