Skip to content

Commit db000c1

Browse files
committed
Rework entity resolution in serde Deserializer
Fixed (18): serde-de (9): borrow::escaped::element borrow::escaped::top_level resolve::resolve_custom_entity trivial::text::byte_buf trivial::text::bytes trivial::text::string::field trivial::text::string::naked trivial::text::string::text xml_schema_lists::element::text::string serde-migrated (1): test_parse_string serde-se (5): with_root::char_amp with_root::char_gt with_root::char_lt with_root::str_escaped with_root::tuple --doc (3): src\de\resolver.rs - de::resolver::EntityResolver (line 13)
1 parent 1a4f096 commit db000c1

File tree

1 file changed

+32
-5
lines changed

1 file changed

+32
-5
lines changed

src/de/mod.rs

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2003,7 +2003,8 @@ use crate::{
20032003
de::map::ElementMapAccess,
20042004
encoding::Decoder,
20052005
errors::Error,
2006-
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
2006+
escape::{parse_number, EscapeError},
2007+
events::{BytesCData, BytesEnd, BytesRef, BytesStart, BytesText, Event},
20072008
name::QName,
20082009
reader::Reader,
20092010
};
@@ -2103,6 +2104,8 @@ pub enum PayloadEvent<'a> {
21032104
CData(BytesCData<'a>),
21042105
/// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
21052106
DocType(BytesText<'a>),
2107+
/// Reference `&ref;` in the textual data.
2108+
GeneralRef(BytesRef<'a>),
21062109
/// End of XML document.
21072110
Eof,
21082111
}
@@ -2117,6 +2120,7 @@ impl<'a> PayloadEvent<'a> {
21172120
PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()),
21182121
PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()),
21192122
PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()),
2123+
PayloadEvent::GeneralRef(e) => PayloadEvent::GeneralRef(e.into_owned()),
21202124
PayloadEvent::Eof => PayloadEvent::Eof,
21212125
}
21222126
}
@@ -2171,7 +2175,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
21712175
// If next event is a text or CDATA, we should not trim trailing spaces
21722176
!matches!(
21732177
self.lookahead,
2174-
Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_))
2178+
Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_) | PayloadEvent::GeneralRef(_))
21752179
)
21762180
}
21772181

@@ -2196,9 +2200,10 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
21962200
result.to_mut().push_str(&e.decode()?);
21972201
}
21982202
PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
2203+
PayloadEvent::GeneralRef(e) => self.resolve_reference(result.to_mut(), e)?,
21992204

2200-
// SAFETY: current_event_is_last_text checks that event is Text or CData
2201-
_ => unreachable!("Only `Text` and `CData` events can come here"),
2205+
// SAFETY: current_event_is_last_text checks that event is Text, CData or GeneralRef
2206+
_ => unreachable!("Only `Text`, `CData` or `GeneralRef` events can come here"),
22022207
}
22032208
}
22042209
Ok(DeEvent::Text(Text { text: result }))
@@ -2224,11 +2229,32 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22242229
.map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?;
22252230
continue;
22262231
}
2232+
PayloadEvent::GeneralRef(e) => {
2233+
let mut text = String::new();
2234+
self.resolve_reference(&mut text, e)?;
2235+
self.drain_text(text.into())
2236+
}
22272237
PayloadEvent::Eof => Ok(DeEvent::Eof),
22282238
};
22292239
}
22302240
}
22312241

2242+
fn resolve_reference(&mut self, result: &mut String, event: BytesRef) -> Result<(), DeError> {
2243+
let len = event.len();
2244+
let reference = self.decoder().decode(&event)?;
2245+
2246+
if let Some(num) = reference.strip_prefix('#') {
2247+
let codepoint = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
2248+
result.push_str(codepoint.encode_utf8(&mut [0u8; 4]));
2249+
return Ok(());
2250+
}
2251+
if let Some(value) = self.entity_resolver.resolve(reference.as_ref()) {
2252+
result.push_str(value);
2253+
return Ok(());
2254+
}
2255+
Err(EscapeError::UnrecognizedEntity(0..len, reference.to_string()).into())
2256+
}
2257+
22322258
#[inline]
22332259
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
22342260
match self.lookahead {
@@ -3027,7 +3053,7 @@ impl StartTrimmer {
30273053
Event::End(e) => (PayloadEvent::End(e), true),
30283054
Event::Eof => (PayloadEvent::Eof, true),
30293055

3030-
// Do not trim next text event after Text or CDATA event
3056+
// Do not trim next text event after Text, CDATA or reference event
30313057
Event::CData(e) => (PayloadEvent::CData(e), false),
30323058
Event::Text(mut e) => {
30333059
// If event is empty after trimming, skip it
@@ -3036,6 +3062,7 @@ impl StartTrimmer {
30363062
}
30373063
(PayloadEvent::Text(e), false)
30383064
}
3065+
Event::GeneralRef(e) => (PayloadEvent::GeneralRef(e), false),
30393066

30403067
_ => return None,
30413068
};

0 commit comments

Comments
 (0)