@@ -2003,7 +2003,8 @@ use crate::{
2003
2003
de:: map:: ElementMapAccess ,
2004
2004
encoding:: Decoder ,
2005
2005
errors:: Error ,
2006
- events:: { BytesCData , BytesEnd , BytesStart , BytesText , Event } ,
2006
+ escape:: { parse_number, EscapeError } ,
2007
+ events:: { BytesCData , BytesEnd , BytesRef , BytesStart , BytesText , Event } ,
2007
2008
name:: QName ,
2008
2009
reader:: Reader ,
2009
2010
} ;
@@ -2103,6 +2104,8 @@ pub enum PayloadEvent<'a> {
2103
2104
CData ( BytesCData < ' a > ) ,
2104
2105
/// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
2105
2106
DocType ( BytesText < ' a > ) ,
2107
+ /// Reference `&ref;` in the textual data.
2108
+ GeneralRef ( BytesRef < ' a > ) ,
2106
2109
/// End of XML document.
2107
2110
Eof ,
2108
2111
}
@@ -2117,6 +2120,7 @@ impl<'a> PayloadEvent<'a> {
2117
2120
PayloadEvent :: Text ( e) => PayloadEvent :: Text ( e. into_owned ( ) ) ,
2118
2121
PayloadEvent :: CData ( e) => PayloadEvent :: CData ( e. into_owned ( ) ) ,
2119
2122
PayloadEvent :: DocType ( e) => PayloadEvent :: DocType ( e. into_owned ( ) ) ,
2123
+ PayloadEvent :: GeneralRef ( e) => PayloadEvent :: GeneralRef ( e. into_owned ( ) ) ,
2120
2124
PayloadEvent :: Eof => PayloadEvent :: Eof ,
2121
2125
}
2122
2126
}
@@ -2171,7 +2175,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2171
2175
// If next event is a text or CDATA, we should not trim trailing spaces
2172
2176
!matches ! (
2173
2177
self . lookahead,
2174
- Ok ( PayloadEvent :: Text ( _) ) | Ok ( PayloadEvent :: CData ( _) )
2178
+ Ok ( PayloadEvent :: Text ( _) ) | Ok ( PayloadEvent :: CData ( _) | PayloadEvent :: GeneralRef ( _ ) )
2175
2179
)
2176
2180
}
2177
2181
@@ -2196,9 +2200,10 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2196
2200
result. to_mut ( ) . push_str ( & e. decode ( ) ?) ;
2197
2201
}
2198
2202
PayloadEvent :: CData ( e) => result. to_mut ( ) . push_str ( & e. decode ( ) ?) ,
2203
+ PayloadEvent :: GeneralRef ( e) => self . resolve_reference ( result. to_mut ( ) , e) ?,
2199
2204
2200
- // SAFETY: current_event_is_last_text checks that event is Text or CData
2201
- _ => unreachable ! ( "Only `Text` and `CData` events can come here" ) ,
2205
+ // SAFETY: current_event_is_last_text checks that event is Text, CData or GeneralRef
2206
+ _ => unreachable ! ( "Only `Text`, `CData` or `GeneralRef ` events can come here" ) ,
2202
2207
}
2203
2208
}
2204
2209
Ok ( DeEvent :: Text ( Text { text : result } ) )
@@ -2224,11 +2229,32 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2224
2229
. map_err ( |err| DeError :: Custom ( format ! ( "cannot parse DTD: {}" , err) ) ) ?;
2225
2230
continue ;
2226
2231
}
2232
+ PayloadEvent :: GeneralRef ( e) => {
2233
+ let mut text = String :: new ( ) ;
2234
+ self . resolve_reference ( & mut text, e) ?;
2235
+ self . drain_text ( text. into ( ) )
2236
+ }
2227
2237
PayloadEvent :: Eof => Ok ( DeEvent :: Eof ) ,
2228
2238
} ;
2229
2239
}
2230
2240
}
2231
2241
2242
+ fn resolve_reference ( & mut self , result : & mut String , event : BytesRef ) -> Result < ( ) , DeError > {
2243
+ let len = event. len ( ) ;
2244
+ let reference = self . decoder ( ) . decode ( & event) ?;
2245
+
2246
+ if let Some ( num) = reference. strip_prefix ( '#' ) {
2247
+ let codepoint = parse_number ( num) . map_err ( EscapeError :: InvalidCharRef ) ?;
2248
+ result. push_str ( codepoint. encode_utf8 ( & mut [ 0u8 ; 4 ] ) ) ;
2249
+ return Ok ( ( ) ) ;
2250
+ }
2251
+ if let Some ( value) = self . entity_resolver . resolve ( reference. as_ref ( ) ) {
2252
+ result. push_str ( value) ;
2253
+ return Ok ( ( ) ) ;
2254
+ }
2255
+ Err ( EscapeError :: UnrecognizedEntity ( 0 ..len, reference. to_string ( ) ) . into ( ) )
2256
+ }
2257
+
2232
2258
#[ inline]
2233
2259
fn read_to_end ( & mut self , name : QName ) -> Result < ( ) , DeError > {
2234
2260
match self . lookahead {
@@ -3027,7 +3053,7 @@ impl StartTrimmer {
3027
3053
Event :: End ( e) => ( PayloadEvent :: End ( e) , true ) ,
3028
3054
Event :: Eof => ( PayloadEvent :: Eof , true ) ,
3029
3055
3030
- // Do not trim next text event after Text or CDATA event
3056
+ // Do not trim next text event after Text, CDATA or reference event
3031
3057
Event :: CData ( e) => ( PayloadEvent :: CData ( e) , false ) ,
3032
3058
Event :: Text ( mut e) => {
3033
3059
// If event is empty after trimming, skip it
@@ -3036,6 +3062,7 @@ impl StartTrimmer {
3036
3062
}
3037
3063
( PayloadEvent :: Text ( e) , false )
3038
3064
}
3065
+ Event :: GeneralRef ( e) => ( PayloadEvent :: GeneralRef ( e) , false ) ,
3039
3066
3040
3067
_ => return None ,
3041
3068
} ;
0 commit comments