Skip to content

Commit 91bef64

Browse files
committed
Replace BytesText::unescape and unescape_with by decode
Text events produces by the Reader can not contain escaped data anymore, all such data is represented by the Event::GeneralRef
1 parent 7e94386 commit 91bef64

15 files changed

+28
-48
lines changed

Changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ XML specification. See the updated `custom_entities` example!
4444
`Vec<String>` in `$value` fields. They cannot be deserialized back with the same result
4545
- [#827]: Make `escape` and it variants take a `impl Into<Cow<str>>` argument and implement
4646
`From<(&'a str, Cow<'a, str>)>` on `Attribute`
47+
- [#766]: `BytesText::unescape` and `BytesText::unescape_with` replaced by `BytesText::decode`.
48+
Now Text events does not contain escaped parts which are reported as `Event::GeneralRef`.
4749

4850
[#227]: https://github.com/tafia/quick-xml/issues/227
4951
[#655]: https://github.com/tafia/quick-xml/issues/655

benches/macrobenches.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
5454
}
5555
}
5656
Event::Text(e) => {
57-
criterion::black_box(e.unescape()?);
57+
criterion::black_box(e.decode()?);
5858
}
5959
Event::CData(e) => {
6060
criterion::black_box(e.into_inner());
@@ -79,7 +79,7 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
7979
}
8080
}
8181
Event::Text(e) => {
82-
criterion::black_box(e.unescape()?);
82+
criterion::black_box(e.decode()?);
8383
}
8484
Event::CData(e) => {
8585
criterion::black_box(e.into_inner());
@@ -105,7 +105,7 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
105105
}
106106
}
107107
(resolved_ns, Event::Text(e)) => {
108-
criterion::black_box(e.unescape()?);
108+
criterion::black_box(e.decode()?);
109109
criterion::black_box(resolved_ns);
110110
}
111111
(resolved_ns, Event::CData(e)) => {
@@ -133,7 +133,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
133133
}
134134
}
135135
(resolved_ns, Event::Text(e)) => {
136-
criterion::black_box(e.unescape()?);
136+
criterion::black_box(e.decode()?);
137137
criterion::black_box(resolved_ns);
138138
}
139139
(resolved_ns, Event::CData(e)) => {

benches/microbenches.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ fn one_event(c: &mut Criterion) {
145145
config.trim_text(true);
146146
config.check_end_names = false;
147147
match r.read_event() {
148-
Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(),
148+
Ok(Event::Comment(e)) => nbtxt += e.decode().unwrap().len(),
149149
something_else => panic!("Did not expect {:?}", something_else),
150150
};
151151

fuzz/fuzz_targets/fuzz_target_1.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ where
4343
| Ok(Event::Comment(ref e))
4444
| Ok(Event::DocType(ref e)) => {
4545
debug_format!(e);
46-
if let Err(err) = e.unescape() {
46+
if let Err(err) = e.decode() {
4747
debug_format!(err);
4848
break;
4949
}

src/de/mod.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2195,9 +2195,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
21952195
// FIXME: Actually, we should trim after decoding text, but now we trim before
21962196
e.inplace_trim_end();
21972197
}
2198-
result
2199-
.to_mut()
2200-
.push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?);
2198+
result.to_mut().push_str(&e.decode()?);
22012199
}
22022200
PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
22032201

@@ -2219,7 +2217,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22192217
// FIXME: Actually, we should trim after decoding text, but now we trim before
22202218
continue;
22212219
}
2222-
self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
2220+
self.drain_text(e.decode()?)
22232221
}
22242222
PayloadEvent::CData(e) => self.drain_text(e.decode()?),
22252223
PayloadEvent::DocType(e) => {

src/events/mod.rs

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,7 @@ use std::str::from_utf8;
4747

4848
use crate::encoding::{Decoder, EncodingError};
4949
use crate::errors::{Error, IllFormedError};
50-
use crate::escape::{
51-
escape, minimal_escape, parse_number, partial_escape, resolve_predefined_entity, unescape_with,
52-
EscapeError,
53-
};
50+
use crate::escape::{escape, minimal_escape, parse_number, partial_escape, EscapeError};
5451
use crate::name::{LocalName, QName};
5552
#[cfg(feature = "serialize")]
5653
use crate::utils::CowRef;
@@ -580,29 +577,12 @@ impl<'a> BytesText<'a> {
580577
}
581578
}
582579

583-
/// Decodes then unescapes the content of the event.
584-
///
585-
/// This will allocate if the value contains any escape sequences or in
586-
/// non-UTF-8 encoding.
587-
pub fn unescape(&self) -> Result<Cow<'a, str>, Error> {
588-
self.unescape_with(resolve_predefined_entity)
589-
}
590-
591-
/// Decodes then unescapes the content of the event with custom entities.
580+
/// Decodes the content of the event.
592581
///
593582
/// This will allocate if the value contains any escape sequences or in
594583
/// non-UTF-8 encoding.
595-
pub fn unescape_with<'entity>(
596-
&self,
597-
resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
598-
) -> Result<Cow<'a, str>, Error> {
599-
let decoded = self.decoder.decode_cow(&self.content)?;
600-
601-
match unescape_with(&decoded, resolve_entity)? {
602-
// Because result is borrowed, no replacements was done and we can use original string
603-
Cow::Borrowed(_) => Ok(decoded),
604-
Cow::Owned(s) => Ok(s.into()),
605-
}
584+
pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
585+
self.decoder.decode_cow(&self.content)
606586
}
607587

608588
/// Removes leading XML whitespace bytes from text content.

src/reader/async_tokio.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
103103
/// loop {
104104
/// match reader.read_event_into_async(&mut buf).await {
105105
/// Ok(Event::Start(_)) => count += 1,
106-
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
106+
/// Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
107107
/// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
108108
/// Ok(Event::Eof) => break,
109109
/// _ => (),
@@ -237,7 +237,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
237237
/// }
238238
/// }
239239
/// Event::Text(e) => {
240-
/// txt.push(e.unescape().unwrap().into_owned())
240+
/// txt.push(e.decode().unwrap().into_owned())
241241
/// }
242242
/// Event::Eof => break,
243243
/// _ => (),
@@ -373,7 +373,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
373373
/// (_, Event::Start(_)) => unreachable!(),
374374
///
375375
/// (_, Event::Text(e)) => {
376-
/// txt.push(e.unescape().unwrap().into_owned())
376+
/// txt.push(e.decode().unwrap().into_owned())
377377
/// }
378378
/// (_, Event::Eof) => break,
379379
/// _ => (),

src/reader/buffered_reader.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ impl<R: BufRead> Reader<R> {
372372
/// loop {
373373
/// match reader.read_event_into(&mut buf) {
374374
/// Ok(Event::Start(_)) => count += 1,
375-
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
375+
/// Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
376376
/// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
377377
/// Ok(Event::Eof) => break,
378378
/// _ => (),

src/reader/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,7 @@ where
718718
/// _ => (),
719719
/// }
720720
/// }
721-
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
721+
/// Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
722722
///
723723
/// // There are several other `Event`s we do not consider here
724724
/// _ => (),

src/reader/ns_reader.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ impl<R: BufRead> NsReader<R> {
419419
/// }
420420
/// }
421421
/// Event::Text(e) => {
422-
/// txt.push(e.unescape().unwrap().into_owned())
422+
/// txt.push(e.decode().unwrap().into_owned())
423423
/// }
424424
/// Event::Eof => break,
425425
/// _ => (),
@@ -478,7 +478,7 @@ impl<R: BufRead> NsReader<R> {
478478
/// (_, Event::Start(_)) => unreachable!(),
479479
///
480480
/// (_, Event::Text(e)) => {
481-
/// txt.push(e.unescape().unwrap().into_owned())
481+
/// txt.push(e.decode().unwrap().into_owned())
482482
/// }
483483
/// (_, Event::Eof) => break,
484484
/// _ => (),
@@ -664,7 +664,7 @@ impl<'i> NsReader<&'i [u8]> {
664664
/// }
665665
/// }
666666
/// Event::Text(e) => {
667-
/// txt.push(e.unescape().unwrap().into_owned())
667+
/// txt.push(e.decode().unwrap().into_owned())
668668
/// }
669669
/// Event::Eof => break,
670670
/// _ => (),
@@ -726,7 +726,7 @@ impl<'i> NsReader<&'i [u8]> {
726726
/// (_, Event::Start(_)) => unreachable!(),
727727
///
728728
/// (_, Event::Text(e)) => {
729-
/// txt.push(e.unescape().unwrap().into_owned())
729+
/// txt.push(e.decode().unwrap().into_owned())
730730
/// }
731731
/// (_, Event::Eof) => break,
732732
/// _ => (),

src/reader/slice_reader.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ impl<'a> Reader<&'a [u8]> {
6262
/// loop {
6363
/// match reader.read_event().unwrap() {
6464
/// Event::Start(e) => count += 1,
65-
/// Event::Text(e) => txt.push(e.unescape().unwrap().into_owned()),
65+
/// Event::Text(e) => txt.push(e.decode().unwrap().into_owned()),
6666
/// Event::Eof => break,
6767
/// _ => (),
6868
/// }

tests/encodings.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ fn test_koi8_r_encoding() {
3737
loop {
3838
match r.read_event_into(&mut buf) {
3939
Ok(Text(e)) => {
40-
e.unescape().unwrap();
40+
e.decode().unwrap();
4141
}
4242
Ok(Eof) => break,
4343
_ => (),

tests/fuzzing.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ fn fuzz_101() {
3838
}
3939
}
4040
Ok(Event::Text(e)) => {
41-
if e.unescape().is_err() {
41+
if e.decode().is_err() {
4242
break;
4343
}
4444
}

tests/reader.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ fn test_escaped_content() {
172172
"content unexpected: expecting 'test', got '{:?}'",
173173
from_utf8(&e)
174174
);
175-
match e.unescape() {
175+
match e.decode() {
176176
Ok(c) => assert_eq!(c, "test"),
177177
Err(e) => panic!(
178178
"cannot escape content at position {}: {:?}",

tests/roundtrip.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ fn reescape_text() {
236236
match reader.read_event().unwrap() {
237237
Eof => break,
238238
Text(e) => {
239-
let t = e.unescape().unwrap();
239+
let t = e.decode().unwrap();
240240
assert!(writer.write_event(Text(BytesText::new(&t))).is_ok());
241241
}
242242
e => assert!(writer.write_event(e).is_ok()),

0 commit comments

Comments
 (0)