Skip to content

Commit b500f48

Browse files
committed
Rework entity resolution in serde Deserializer
Fixed (18): serde-de (9): borrow::escaped::element borrow::escaped::top_level resolve::resolve_custom_entity trivial::text::byte_buf trivial::text::bytes trivial::text::string::field trivial::text::string::naked trivial::text::string::text xml_schema_lists::element::text::string serde-migrated (1): test_parse_string serde-se (5): with_root::char_amp with_root::char_gt with_root::char_lt with_root::str_escaped with_root::tuple --doc (3): src\de\resolver.rs - de::resolver::EntityResolver (line 13)
1 parent 91bef64 commit b500f48

File tree

1 file changed

+32
-5
lines changed

1 file changed

+32
-5
lines changed

src/de/mod.rs

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2005,7 +2005,8 @@ use crate::{
20052005
de::map::ElementMapAccess,
20062006
encoding::Decoder,
20072007
errors::Error,
2008-
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
2008+
escape::{parse_number, EscapeError},
2009+
events::{BytesCData, BytesEnd, BytesRef, BytesStart, BytesText, Event},
20092010
name::QName,
20102011
reader::Reader,
20112012
};
@@ -2105,6 +2106,8 @@ pub enum PayloadEvent<'a> {
21052106
CData(BytesCData<'a>),
21062107
/// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
21072108
DocType(BytesText<'a>),
2109+
/// Reference `&ref;` in the textual data.
2110+
GeneralRef(BytesRef<'a>),
21082111
/// End of XML document.
21092112
Eof,
21102113
}
@@ -2119,6 +2122,7 @@ impl<'a> PayloadEvent<'a> {
21192122
PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()),
21202123
PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()),
21212124
PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()),
2125+
PayloadEvent::GeneralRef(e) => PayloadEvent::GeneralRef(e.into_owned()),
21222126
PayloadEvent::Eof => PayloadEvent::Eof,
21232127
}
21242128
}
@@ -2173,7 +2177,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
21732177
// If next event is a text or CDATA, we should not trim trailing spaces
21742178
!matches!(
21752179
self.lookahead,
2176-
Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_))
2180+
Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_) | PayloadEvent::GeneralRef(_))
21772181
)
21782182
}
21792183

@@ -2198,9 +2202,10 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
21982202
result.to_mut().push_str(&e.decode()?);
21992203
}
22002204
PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
2205+
PayloadEvent::GeneralRef(e) => self.resolve_reference(result.to_mut(), e)?,
22012206

2202-
// SAFETY: current_event_is_last_text checks that event is Text or CData
2203-
_ => unreachable!("Only `Text` and `CData` events can come here"),
2207+
// SAFETY: current_event_is_last_text checks that event is Text, CData or GeneralRef
2208+
_ => unreachable!("Only `Text`, `CData` or `GeneralRef` events can come here"),
22042209
}
22052210
}
22062211
Ok(DeEvent::Text(Text { text: result }))
@@ -2226,11 +2231,32 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22262231
.map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?;
22272232
continue;
22282233
}
2234+
PayloadEvent::GeneralRef(e) => {
2235+
let mut text = String::new();
2236+
self.resolve_reference(&mut text, e)?;
2237+
self.drain_text(text.into())
2238+
}
22292239
PayloadEvent::Eof => Ok(DeEvent::Eof),
22302240
};
22312241
}
22322242
}
22332243

2244+
fn resolve_reference(&mut self, result: &mut String, event: BytesRef) -> Result<(), DeError> {
2245+
let len = event.len();
2246+
let reference = self.decoder().decode(&event)?;
2247+
2248+
if let Some(num) = reference.strip_prefix('#') {
2249+
let codepoint = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
2250+
result.push_str(codepoint.encode_utf8(&mut [0u8; 4]));
2251+
return Ok(());
2252+
}
2253+
if let Some(value) = self.entity_resolver.resolve(reference.as_ref()) {
2254+
result.push_str(value);
2255+
return Ok(());
2256+
}
2257+
Err(EscapeError::UnrecognizedEntity(0..len, reference.to_string()).into())
2258+
}
2259+
22342260
#[inline]
22352261
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
22362262
match self.lookahead {
@@ -3029,7 +3055,7 @@ impl StartTrimmer {
30293055
Event::End(e) => (PayloadEvent::End(e), true),
30303056
Event::Eof => (PayloadEvent::Eof, true),
30313057

3032-
// Do not trim next text event after Text or CDATA event
3058+
// Do not trim next text event after Text, CDATA or reference event
30333059
Event::CData(e) => (PayloadEvent::CData(e), false),
30343060
Event::Text(mut e) => {
30353061
// If event is empty after trimming, skip it
@@ -3038,6 +3064,7 @@ impl StartTrimmer {
30383064
}
30393065
(PayloadEvent::Text(e), false)
30403066
}
3067+
Event::GeneralRef(e) => (PayloadEvent::GeneralRef(e), false),
30413068

30423069
_ => return None,
30433070
};

0 commit comments

Comments
 (0)