Skip to content

Change all event and Attributes constructors to accept strings #428

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jul 24, 2022
24 changes: 17 additions & 7 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,12 @@

- [#415]: Changed custom entity unescaping API to accept closures rather than a mapping of entity to
replacement text. This avoids needing to allocate a map and provides the user with more flexibility.
- [#415]: Renamed many functions following the pattern `unescape_and_decode*` to `decode_and_unescape*`
to better communicate their function. Renamed functions following the pattern `*_with_custom_entities`
to `decode_and_unescape_with` to be more consistent across the API.
- [#415]: `BytesText::escaped()` renamed to `BytesText::escape()`, `BytesText::unescaped()` renamed to
`BytesText::unescape()`, `BytesText::unescaped_with()` renamed to `BytesText::unescape_with()`,
`Attribute::escaped_value()` renamed to `Attribute::escape_value()`, and `Attribute::escaped_value_with()`
renamed to `Attribute::escape_value_with()` for consistency across the API.
- [#415]: Renamed functions for consistency across the API:
|Old Name |New Name
|------------------------|-------------------------------------------
|`*_with_custom_entities`|`*_with`
|`BytesText::unescaped()`|`BytesText::unescape()`
|`Attribute::unescaped_*`|`Attribute::unescape_*`

- [#416]: `BytesStart::to_borrowed` renamed to `BytesStart::borrow`, the same method
added to all events
Expand All @@ -150,6 +149,16 @@
- [#423]: Removed `BytesText::from_plain` because it internally did escaping of a byte array,
but since now escaping works on strings. Use `BytesText::from_plain_str` instead

- [#428]: Removed `BytesText::escaped()`. Use `.as_ref()` provided by `Deref` impl instead.
- [#428]: Removed `BytesText::from_escaped()`. Use constructors from strings instead,
because writer anyway works in UTF-8 only
- [#428]: Removed `BytesCData::new()`. Use constructors from strings instead,
because writer anyway works in UTF-8 only
- [#428]: Changed the event and `Attributes` constructors to accept a `&str` slices instead of `&[u8]` slices.
Handmade events has always been assumed to store their content UTF-8 encoded.
- [#428]: Removed `Decoder` parameter from `_and_decode` versions of functions for
`BytesText` (remember, that those functions was renamed in #415).

### New Tests

- [#9]: Added tests for incorrect nested tags in input
Expand Down Expand Up @@ -180,6 +189,7 @@
[#418]: https://github.com/tafia/quick-xml/pull/418
[#421]: https://github.com/tafia/quick-xml/pull/421
[#423]: https://github.com/tafia/quick-xml/pull/423
[#428]: https://github.com/tafia/quick-xml/pull/428
[#434]: https://github.com/tafia/quick-xml/pull/434
[#437]: https://github.com/tafia/quick-xml/pull/437

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ loop {
_ => (),
}
}
Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),

// There are several other `Event`s we do not consider here
_ => (),
Expand All @@ -80,7 +80,7 @@ loop {

// crates a new element ... alternatively we could reuse `e` by calling
// `e.into_owned()`
let mut elem = BytesStart::owned_name(b"my_elem".to_vec());
let mut elem = BytesStart::owned_name("my_elem");

// collect existing attributes
elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
Expand All @@ -92,7 +92,7 @@ loop {
assert!(writer.write_event(Event::Start(elem)).is_ok());
},
Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok());
assert!(writer.write_event(Event::End(BytesEnd::borrowed("my_elem"))).is_ok());
},
Ok(Event::Eof) => break,
// we can either move or borrow the event to write, depending on your use-case
Expand Down
2 changes: 1 addition & 1 deletion benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ fn parse_document(doc: &[u8]) -> XmlResult<()> {
}
}
Event::Text(e) => {
criterion::black_box(e.decode_and_unescape(&r)?);
criterion::black_box(e.unescape()?);
}
Event::CData(e) => {
criterion::black_box(e.into_inner());
Expand Down
2 changes: 1 addition & 1 deletion benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ fn one_event(c: &mut Criterion) {
.check_comments(false)
.trim_text(true);
match r.read_event_into(&mut buf) {
Ok(Event::Comment(e)) => nbtxt += e.decode_and_unescape(&r).unwrap().len(),
Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
};

Expand Down
4 changes: 1 addition & 3 deletions examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
Ok(Event::Text(ref e)) => {
println!(
"text value: {}",
e.decode_and_unescape_with(&reader, |ent| custom_entities
.get(ent)
.map(|s| s.as_str()))
e.unescape_with(|ent| custom_entities.get(ent).map(|s| s.as_str()))
.unwrap()
);
}
Expand Down
132 changes: 58 additions & 74 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -612,16 +612,15 @@ where
unescape: bool,
allow_start: bool,
) -> Result<Cow<'de, str>, DeError> {
let decoder = self.reader.decoder();
match self.next()? {
DeEvent::Text(e) => Ok(e.decode(decoder, unescape)?),
DeEvent::CData(e) => Ok(e.decode(decoder)?),
DeEvent::Text(e) => Ok(e.decode(unescape)?),
DeEvent::CData(e) => Ok(e.decode()?),
DeEvent::Start(e) if allow_start => {
// allow one nested level
let inner = self.next()?;
let t = match inner {
DeEvent::Text(t) => t.decode(decoder, unescape)?,
DeEvent::CData(t) => t.decode(decoder)?,
DeEvent::Text(t) => t.decode(unescape)?,
DeEvent::CData(t) => t.decode()?,
DeEvent::Start(s) => {
return Err(DeError::UnexpectedStart(s.name().as_ref().to_owned()))
}
Expand Down Expand Up @@ -1042,13 +1041,10 @@ mod tests {
assert_eq!(de.read, vec![]);
assert_eq!(de.write, vec![]);

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"root"))
);
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("root")));
assert_eq!(
de.peek().unwrap(),
&Start(BytesStart::borrowed_name(b"inner"))
&Start(BytesStart::borrowed_name("inner"))
);

// Should skip first <inner> tree
Expand All @@ -1057,11 +1053,11 @@ mod tests {
assert_eq!(
de.write,
vec![
Start(BytesStart::borrowed_name(b"inner")),
Start(BytesStart::borrowed_name("inner")),
Text(BytesText::from_escaped_str("text")),
Start(BytesStart::borrowed_name(b"inner")),
End(BytesEnd::borrowed(b"inner")),
End(BytesEnd::borrowed(b"inner")),
Start(BytesStart::borrowed_name("inner")),
End(BytesEnd::borrowed("inner")),
End(BytesEnd::borrowed("inner")),
]
);

Expand All @@ -1073,11 +1069,8 @@ mod tests {
// </inner>
// <target/>
// </root>
assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"next"))
);
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"next")));
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("next")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("next")));

// We finish writing. Next call to `next()` should start replay that messages:
//
Expand All @@ -1094,27 +1087,27 @@ mod tests {
assert_eq!(
de.read,
vec![
Start(BytesStart::borrowed_name(b"inner")),
Start(BytesStart::borrowed_name("inner")),
Text(BytesText::from_escaped_str("text")),
Start(BytesStart::borrowed_name(b"inner")),
End(BytesEnd::borrowed(b"inner")),
End(BytesEnd::borrowed(b"inner")),
Start(BytesStart::borrowed_name("inner")),
End(BytesEnd::borrowed("inner")),
End(BytesEnd::borrowed("inner")),
]
);
assert_eq!(de.write, vec![]);
assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"inner"))
Start(BytesStart::borrowed_name("inner"))
);

// Skip `#text` node and consume <inner/> after it
de.skip().unwrap();
assert_eq!(
de.read,
vec![
Start(BytesStart::borrowed_name(b"inner")),
End(BytesEnd::borrowed(b"inner")),
End(BytesEnd::borrowed(b"inner")),
Start(BytesStart::borrowed_name("inner")),
End(BytesEnd::borrowed("inner")),
End(BytesEnd::borrowed("inner")),
]
);
assert_eq!(
Expand All @@ -1128,9 +1121,9 @@ mod tests {

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"inner"))
Start(BytesStart::borrowed_name("inner"))
);
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"inner")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("inner")));

// We finish writing. Next call to `next()` should start replay messages:
//
Expand All @@ -1146,21 +1139,21 @@ mod tests {
de.read,
vec![
Text(BytesText::from_escaped_str("text")),
End(BytesEnd::borrowed(b"inner")),
End(BytesEnd::borrowed("inner")),
]
);
assert_eq!(de.write, vec![]);
assert_eq!(
de.next().unwrap(),
Text(BytesText::from_escaped_str("text"))
);
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"inner")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("inner")));
assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"target"))
Start(BytesStart::borrowed_name("target"))
);
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"target")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("target")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("root")));
}

/// Checks that `read_to_end()` behaves correctly after `skip()`
Expand All @@ -1184,22 +1177,19 @@ mod tests {
assert_eq!(de.read, vec![]);
assert_eq!(de.write, vec![]);

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"root"))
);
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("root")));

// Skip the <skip> tree
de.skip().unwrap();
assert_eq!(de.read, vec![]);
assert_eq!(
de.write,
vec![
Start(BytesStart::borrowed_name(b"skip")),
Start(BytesStart::borrowed_name("skip")),
Text(BytesText::from_escaped_str("text")),
Start(BytesStart::borrowed_name(b"skip")),
End(BytesEnd::borrowed(b"skip")),
End(BytesEnd::borrowed(b"skip")),
Start(BytesStart::borrowed_name("skip")),
End(BytesEnd::borrowed("skip")),
End(BytesEnd::borrowed("skip")),
]
);

Expand All @@ -1212,18 +1202,18 @@ mod tests {
// </root>
assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"target"))
Start(BytesStart::borrowed_name("target"))
);
de.read_to_end(QName(b"target")).unwrap();
assert_eq!(de.read, vec![]);
assert_eq!(
de.write,
vec![
Start(BytesStart::borrowed_name(b"skip")),
Start(BytesStart::borrowed_name("skip")),
Text(BytesText::from_escaped_str("text")),
Start(BytesStart::borrowed_name(b"skip")),
End(BytesEnd::borrowed(b"skip")),
End(BytesEnd::borrowed(b"skip")),
Start(BytesStart::borrowed_name("skip")),
End(BytesEnd::borrowed("skip")),
End(BytesEnd::borrowed("skip")),
]
);

Expand All @@ -1241,22 +1231,19 @@ mod tests {
assert_eq!(
de.read,
vec![
Start(BytesStart::borrowed_name(b"skip")),
Start(BytesStart::borrowed_name("skip")),
Text(BytesText::from_escaped_str("text")),
Start(BytesStart::borrowed_name(b"skip")),
End(BytesEnd::borrowed(b"skip")),
End(BytesEnd::borrowed(b"skip")),
Start(BytesStart::borrowed_name("skip")),
End(BytesEnd::borrowed("skip")),
End(BytesEnd::borrowed("skip")),
]
);
assert_eq!(de.write, vec![]);

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"skip"))
);
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("skip")));
de.read_to_end(QName(b"skip")).unwrap();

assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("root")));
}

/// Checks that limiting buffer size works correctly
Expand Down Expand Up @@ -1306,34 +1293,31 @@ mod tests {
"#,
);

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"root"))
);
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("root")));

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed(br#"tag a="1""#, 3))
Start(BytesStart::borrowed(r#"tag a="1""#, 3))
);
assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed(br#"tag a="2""#, 3))
Start(BytesStart::borrowed(r#"tag a="2""#, 3))
);
assert_eq!(
de.next().unwrap(),
CData(BytesCData::from_str("cdata content"))
);
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"tag")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("tag")));

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed(b"self-closed", 11))
Start(BytesStart::borrowed_name("self-closed"))
);
assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());

assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("root")));
assert_eq!(de.next().unwrap(), Eof);
}

Expand Down Expand Up @@ -1402,17 +1386,17 @@ mod tests {
events,
vec![
Start(BytesStart::borrowed(
br#"item name="hello" source="world.rs""#,
r#"item name="hello" source="world.rs""#,
4
)),
Text(BytesText::from_escaped_str("Some text")),
End(BytesEnd::borrowed(b"item")),
Start(BytesStart::borrowed(b"item2", 5)),
End(BytesEnd::borrowed(b"item2")),
Start(BytesStart::borrowed(b"item3", 5)),
End(BytesEnd::borrowed(b"item3")),
Start(BytesStart::borrowed(br#"item4 value="world" "#, 5)),
End(BytesEnd::borrowed(b"item4")),
End(BytesEnd::borrowed("item")),
Start(BytesStart::borrowed("item2", 5)),
End(BytesEnd::borrowed("item2")),
Start(BytesStart::borrowed("item3", 5)),
End(BytesEnd::borrowed("item3")),
Start(BytesStart::borrowed(r#"item4 value="world" "#, 5)),
End(BytesEnd::borrowed("item4")),
]
)
}
Expand All @@ -1432,7 +1416,7 @@ mod tests {

assert_eq!(
reader.next().unwrap(),
DeEvent::Start(BytesStart::borrowed(b"item ", 4))
DeEvent::Start(BytesStart::borrowed("item ", 4))
);
reader.read_to_end(QName(b"item")).unwrap();
assert_eq!(reader.next().unwrap(), DeEvent::Eof);
Expand Down
Loading