Skip to content

Use borrowing read_event instead of buffering read_event_into where possible #426

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 22 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,8 @@ use quick_xml::events::Event;

let xml = r#"<tag1 att1 = "test">
<tag2><!--Test comment-->Test</tag2>
<tag2>
Test 2
</tag2>
</tag1>"#;

<tag2>Test 2</tag2>
</tag1>"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);

Expand All @@ -43,20 +40,24 @@ loop {
// when the input is a &str or a &[u8], we don't actually need to use another
// buffer, we could directly call `reader.read_event()`
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
match e.name() {
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
// exits the loop when reaching end of file
Ok(Event::Eof) => break,

Ok(Event::Start(e)) => {
match e.name().as_ref() {
b"tag1" => println!("attributes values: {:?}",
e.attributes().map(|a| a.unwrap().value).collect::<Vec<_>>()),
e.attributes().map(|a| a.unwrap().value)
.collect::<Vec<_>>()),
b"tag2" => count += 1,
_ => (),
}
},
Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).unwrap().into_owned()),
Ok(Event::Eof) => break, // exits the loop when reaching end of file
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => (), // There are several other `Event`s we do not consider here
}
}
Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),

// There are several other `Event`s we do not consider here
_ => (),
}
// if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
buf.clear();
}
Expand All @@ -65,24 +66,21 @@ loop {
### Writer

```rust
use quick_xml::Writer;
use quick_xml::Reader;
use quick_xml::events::{Event, BytesEnd, BytesStart};
use quick_xml::{Reader, Writer};
use std::io::Cursor;
use std::iter;

let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);
let mut writer = Writer::new(Cursor::new(Vec::new()));
let mut buf = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) if e.name() == b"this_tag" => {
match reader.read_event() {
Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => {

// crates a new element ... alternatively we could reuse `e` by calling
// `e.into_owned()`
let mut elem = BytesStart::owned(b"my_elem".to_vec(), "my_elem".len());
let mut elem = BytesStart::owned_name(b"my_elem".to_vec());

// collect existing attributes
elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
Expand All @@ -93,15 +91,14 @@ loop {
// writes the event to the writer
assert!(writer.write_event(Event::Start(elem)).is_ok());
},
Ok(Event::End(ref e)) if e.name() == b"this_tag" => {
Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok());
},
Ok(Event::Eof) => break,
// you can use either `e` or `&e` if you don't want to move the event
Ok(e) => assert!(writer.write_event(&e).is_ok()),
// we can either move or borrow the event to write, depending on your use-case
Ok(e) => assert!(writer.write_event(e).is_ok()),
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
}
buf.clear();
}

let result = writer.into_inner().into_inner();
Expand Down
3 changes: 1 addition & 2 deletions examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut reader = Reader::from_str(DATA);
reader.trim_text(true);

let mut buf = Vec::new();
let mut custom_entities: HashMap<String, String> = HashMap::new();
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;

loop {
match reader.read_event_into(&mut buf) {
match reader.read_event() {
Ok(Event::DocType(ref e)) => {
for cap in entity_re.captures_iter(&e) {
custom_entities.insert(
Expand Down
104 changes: 5 additions & 99 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
//! High performance XML reader/writer.
//!
//! ## Description
//! # Description
//!
//! quick-xml contains two modes of operation:
//!
//! A streaming API based on the [StAX] model. This is suited for larger XML documents which
//! cannot completely read into memory at once.
//!
//! The user has to expicitely _ask_ for the next XML event, similar
//! The user has to explicitly _ask_ for the next XML event, similar
//! to a database cursor.
//! This is achieved by the following two structs:
//!
Expand All @@ -20,104 +20,10 @@
//! Furthermore, quick-xml also contains optional [Serde] support to directly serialize and deserialize from
//! structs, without having to deal with the XML events.
//!
//! ## Examples
//! # Examples
//!
//! ### Reader
//!
//! ```rust
//! use quick_xml::Reader;
//! use quick_xml::events::Event;
//!
//! let xml = r#"<tag1 att1 = "test">
//! <tag2><!--Test comment-->Test</tag2>
//! <tag2>
//! Test 2
//! </tag2>
//! </tag1>"#;
//!
//! let mut reader = Reader::from_str(xml);
//! reader.trim_text(true);
//!
//! let mut count = 0;
//! let mut txt = Vec::new();
//! let mut buf = Vec::new();
//!
//! // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
//! loop {
//! match reader.read_event_into(&mut buf) {
//! // for triggering namespaced events, use this instead:
//! // match reader.read_namespaced_event(&mut buf) {
//! Ok(Event::Start(ref e)) => {
//! // for namespaced:
//! // Ok((ref namespace_value, Event::Start(ref e)))
//! match e.name().as_ref() {
//! b"tag1" => println!("attributes values: {:?}",
//! e.attributes().map(|a| a.unwrap().value)
//! .collect::<Vec<_>>()),
//! b"tag2" => count += 1,
//! _ => (),
//! }
//! },
//! // unescape and decode the text event using the reader encoding
//! Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
//! Ok(Event::Eof) => break, // exits the loop when reaching end of file
//! Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
//! _ => (), // There are several other `Event`s we do not consider here
//! }
//!
//! // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
//! buf.clear();
//! }
//! ```
//!
//! ### Writer
//!
//! ```rust
//! # use pretty_assertions::assert_eq;
//! use quick_xml::Writer;
//! use quick_xml::events::{Event, BytesEnd, BytesStart};
//! use quick_xml::Reader;
//! use std::io::Cursor;
//! use std::iter;
//!
//! let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
//! let mut reader = Reader::from_str(xml);
//! reader.trim_text(true);
//! let mut writer = Writer::new(Cursor::new(Vec::new()));
//! let mut buf = Vec::new();
//! loop {
//! match reader.read_event_into(&mut buf) {
//! Ok(Event::Start(ref e)) if e.name().as_ref() == b"this_tag" => {
//!
//! // crates a new element ... alternatively we could reuse `e` by calling
//! // `e.into_owned()`
//! let mut elem = BytesStart::owned(b"my_elem".to_vec(), "my_elem".len());
//!
//! // collect existing attributes
//! elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
//!
//! // copy existing attributes, adds a new my-key="some value" attribute
//! elem.push_attribute(("my-key", "some value"));
//!
//! // writes the event to the writer
//! assert!(writer.write_event(Event::Start(elem)).is_ok());
//! },
//! Ok(Event::End(ref e)) if e.name().as_ref() == b"this_tag" => {
//! assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok());
//! },
//! Ok(Event::Eof) => break,
//! Ok(e) => assert!(writer.write_event(e).is_ok()),
//! // or using the buffer
//! // Ok(e) => assert!(writer.write(&buf).is_ok()),
//! Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
//! }
//! buf.clear();
//! }
//!
//! let result = writer.into_inner().into_inner();
//! let expected = r#"<my_elem k1="v1" k2="v2" my-key="some value"><child>text</child></my_elem>"#;
//! assert_eq!(result, expected.as_bytes());
//! ```
//! - For a reading example see [`Reader`]
//! - For a writing example see [`Writer`]
//!
//! # Features
//!
Expand Down
21 changes: 16 additions & 5 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,28 +118,39 @@ impl EncodingRef {
/// let xml = r#"<tag1 att1 = "test">
/// <tag2><!--Test comment-->Test</tag2>
/// <tag2>Test 2</tag2>
/// </tag1>"#;
/// </tag1>"#;
/// let mut reader = Reader::from_str(xml);
/// reader.trim_text(true);
///
/// let mut count = 0;
/// let mut txt = Vec::new();
/// let mut buf = Vec::new();
///
/// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
/// loop {
/// // NOTE: this is the generic case when we don't know about the input BufRead.
/// // when the input is a &str or a &[u8], we don't actually need to use another
/// // buffer, we could directly call `reader.read_event()`
/// match reader.read_event_into(&mut buf) {
/// Ok(Event::Start(ref e)) => {
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
/// // exits the loop when reaching end of file
/// Ok(Event::Eof) => break,
///
/// Ok(Event::Start(e)) => {
/// match e.name().as_ref() {
/// b"tag1" => println!("attributes values: {:?}",
/// e.attributes().map(|a| a.unwrap().value)
/// .collect::<Vec<_>>()),
/// b"tag2" => count += 1,
/// _ => (),
/// }
/// },
/// }
/// Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
/// Ok(Event::Eof) => break,
///
/// // There are several other `Event`s we do not consider here
/// _ => (),
/// }
/// // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
/// buf.clear();
/// }
/// ```
Expand Down
18 changes: 8 additions & 10 deletions src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,23 @@ use std::io::Write;
///
/// # Examples
///
/// ```rust
/// ```
/// # use pretty_assertions::assert_eq;
/// use quick_xml::{Reader, Writer};
/// use quick_xml::events::{Event, BytesEnd, BytesStart};
/// use quick_xml::{Reader, Writer};
/// use std::io::Cursor;
///
/// let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
/// let mut reader = Reader::from_str(xml);
/// reader.trim_text(true);
/// let mut writer = Writer::new(Cursor::new(Vec::new()));
/// let mut buf = Vec::new();
/// loop {
/// match reader.read_event_into(&mut buf) {
/// Ok(Event::Start(ref e)) if e.name().as_ref() == b"this_tag" => {
/// match reader.read_event() {
/// Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => {
///
/// // crates a new element ... alternatively we could reuse `e` by calling
/// // `e.into_owned()`
/// let mut elem = BytesStart::owned(b"my_elem".to_vec(), "my_elem".len());
/// let mut elem = BytesStart::owned_name(b"my_elem".to_vec());
///
/// // collect existing attributes
/// elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
Expand All @@ -38,15 +37,14 @@ use std::io::Write;
/// // writes the event to the writer
/// assert!(writer.write_event(Event::Start(elem)).is_ok());
/// },
/// Ok(Event::End(ref e)) if e.name().as_ref() == b"this_tag" => {
/// Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
/// assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok());
/// },
/// Ok(Event::Eof) => break,
/// // we can either move or borrow the event to write, depending on your use-case
/// Ok(e) => assert!(writer.write_event(&e).is_ok()),
/// Err(e) => panic!("{}", e),
/// Ok(e) => assert!(writer.write_event(e).is_ok()),
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
/// }
/// buf.clear();
/// }
///
/// let result = writer.into_inner().into_inner();
Expand Down
Loading