diff --git a/Cargo.toml b/Cargo.toml index f41bb994..af05e661 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,13 +8,14 @@ documentation = "https://docs.rs/quick-xml" repository = "https://github.com/tafia/quick-xml" keywords = ["xml", "serde", "parser", "writer", "html"] -categories = ["encoding", "parsing", "parser-implementations"] +categories = ["asynchronous", "encoding", "parsing", "parser-implementations"] license = "MIT" [dependencies] document-features = { version = "0.2", optional = true } encoding_rs = { version = "0.8", optional = true } serde = { version = "1.0", optional = true } +tokio = { version = "1.20", optional = true, default-features = false, features = ["io-util"] } memchr = "2.5" [dev-dependencies] @@ -23,6 +24,8 @@ pretty_assertions = "1.2" regex = "1" serde = { version = "1.0", features = ["derive"] } serde-value = "0.7" +tokio = { version = "1.20", default-features = false, features = ["macros", "rt"] } +tokio-test = "0.4" [lib] bench = false @@ -37,6 +40,13 @@ harness = false [features] default = [] + +## Enables support for asynchronous reading from `tokio`'s IO-Traits by enabling +## [reading events] from types implementing [`tokio::io::AsyncBufRead`]. +## +## [reading events]: crate::reader::Reader::read_event_into_async +async-tokio = ["tokio"] + ## Enables support of non-UTF-8 encoded documents. Encoding will be inferred from ## the XML declaration if it will be found, otherwise UTF-8 is assumed. ## @@ -123,3 +133,7 @@ required-features = ["serialize"] [[test]] name = "serde-migrated" required-features = ["serialize"] + +[[test]] +name = "async-tokio" +required-features = ["async-tokio"] diff --git a/Changelog.md b/Changelog.md index e81cab0d..23f5e72b 100644 --- a/Changelog.md +++ b/Changelog.md @@ -39,6 +39,7 @@ |`attribute_namespace` |`resolve_attribute` - [#439]: Added utilities `detect_encoding()`, `decode()`, and `decode_with_bom_removal()` under the `quick-xml::encoding` namespace. +- [#450]: Added support of asynchronous [tokio](https://tokio.rs/) readers ### Bug Fixes @@ -218,6 +219,7 @@ [#439]: https://github.com/tafia/quick-xml/pull/439 [#440]: https://github.com/tafia/quick-xml/pull/440 [#443]: https://github.com/tafia/quick-xml/pull/443 +[#450]: https://github.com/tafia/quick-xml/pull/450 ## 0.23.0 -- 2022-05-08 diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs new file mode 100644 index 00000000..1e06b0b8 --- /dev/null +++ b/src/reader/async_tokio.rs @@ -0,0 +1,388 @@ +//! This is an implementation of [`Reader`] for reading from a [`AsyncBufRead`] +//! as underlying byte stream. This reader fully implements async/await so reading +//! can use non-blocking I/O. + +use std::future::Future; +use std::pin::Pin; +use tokio::io::{self, AsyncBufRead, AsyncBufReadExt}; + +use crate::events::Event; +use crate::name::{QName, ResolveResult}; +use crate::reader::buffered_reader::impl_buffered_source; +use crate::reader::{is_whitespace, BangType, NsReader, ParseState, ReadElementState, Reader}; +use crate::{Error, Result}; + +/// A struct for read XML asynchronously from an [`AsyncBufRead`]. +/// +/// Having own struct allows us to implement anything without risk of name conflicts +/// and does not suffer from the impossibility of having `async` in traits. +struct TokioAdapter<'a, R>(&'a mut R); + +impl<'a, R: AsyncBufRead + Unpin> TokioAdapter<'a, R> { + impl_buffered_source!('b, 0, async, await); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +impl Reader { + /// An asynchronous version of [`read_event_into()`]. Reads the next event into + /// given buffer. + /// + /// > This function should be defined as + /// > ```ignore + /// > pub async fn read_event_into_async<'b>( + /// > &mut self, + /// > buf: &'b mut Vec + /// > ) -> Result>; + /// > ``` + /// > but Rust does not allow to write that for recursive asynchronous function + /// + /// This is the main entry point for reading XML `Event`s when using an async reader. + /// + /// See the documentation of [`read_event_into()`] for more information. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::Event; + /// use quick_xml::Reader; + /// + /// // This explicitly uses `from_reader("...".as_bytes())` to use a buffered + /// // reader instead of relying on the zero-copy optimizations for reading + /// // from byte slices, which is provides the sync interface anyway. + /// let mut reader = Reader::from_reader(r#" + /// + /// Test + /// Test 2 + /// + /// "#.as_bytes()); + /// reader.trim_text(true); + /// + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_event_into_async(&mut buf).await { + /// Ok(Event::Start(_)) => count += 1, + /// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), + /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), + /// Ok(Event::Eof) => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// assert_eq!(count, 3); + /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); + /// # }) // tokio_test::block_on + /// ``` + /// + /// [`read_event_into()`]: Reader::read_event_into + pub fn read_event_into_async<'reader, 'b: 'reader>( + &'reader mut self, + buf: &'b mut Vec, + ) -> Pin>> + 'reader>> { + Box::pin(async move { + read_event_impl!(self, buf, read_until_open_async, read_until_close_async, await) + }) + } + + /// An asynchronous version of [`read_to_end_into()`]. + /// Reads asynchronously until end element is found using provided buffer as + /// intermediate storage for events content. This function is supposed to be + /// called after you already read a [`Start`] event. + /// + /// See the documentation of [`read_to_end_into()`] for more information. + /// + /// # Examples + /// + /// This example shows, how you can skip XML content after you read the + /// start event. + /// + /// ``` + /// # tokio_test::block_on(async { + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::{BytesStart, Event}; + /// use quick_xml::Reader; + /// + /// let mut reader = Reader::from_reader(r#" + /// + /// + /// + /// + /// + /// + /// + /// + /// "#.as_bytes()); + /// reader.trim_text(true); + /// let mut buf = Vec::new(); + /// + /// let start = BytesStart::new("outer"); + /// let end = start.to_end().into_owned(); + /// + /// // First, we read a start event... + /// assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Event::Start(start)); + /// + /// //...then, we could skip all events to the corresponding end event. + /// // This call will correctly handle nested elements. + /// // Note, however, that this method does not handle namespaces. + /// reader.read_to_end_into_async(end.name(), &mut buf).await.unwrap(); + /// + /// // At the end we should get an Eof event, because we ate the whole XML + /// assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Event::Eof); + /// # }) // tokio_test::block_on + /// ``` + /// + /// [`read_to_end_into()`]: Self::read_to_end_into + /// [`Start`]: Event::Start + pub async fn read_to_end_into_async<'n>( + &mut self, + // We should name that lifetime due to https://github.com/rust-lang/rust/issues/63033` + end: QName<'n>, + buf: &mut Vec, + ) -> Result<()> { + read_to_end!(self, end, buf, read_event_into_async, { buf.clear(); }, await) + } + + /// Read until '<' is found and moves reader to an `Opened` state. + /// + /// Return a `StartText` event if `first` is `true` and a `Text` event otherwise + async fn read_until_open_async<'b>( + &mut self, + buf: &'b mut Vec, + first: bool, + ) -> Result> { + read_until_open!(self, buf, first, TokioAdapter(&mut self.reader), read_event_into_async, await) + } + + /// Private function to read until `>` is found. This function expects that + /// it was called just after encounter a `<` symbol. + async fn read_until_close_async<'b>(&mut self, buf: &'b mut Vec) -> Result> { + read_until_close!(self, buf, TokioAdapter(&mut self.reader), await) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +impl NsReader { + /// An asynchronous version of [`read_event_into()`]. Reads the next event into + /// given buffer. + /// + /// This method manages namespaces but doesn't resolve them automatically. + /// You should call [`resolve_element()`] if you want to get a namespace. + /// + /// You also can use [`read_resolved_event_into_async()`] instead if you want + /// to resolve namespace as soon as you get an event. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::Event; + /// use quick_xml::name::{Namespace, ResolveResult::*}; + /// use quick_xml::NsReader; + /// + /// let mut reader = NsReader::from_reader(r#" + /// + /// Test + /// Test 2 + /// + /// "#.as_bytes()); + /// reader.trim_text(true); + /// + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_event_into_async(&mut buf).await.unwrap() { + /// Event::Start(e) => { + /// count += 1; + /// let (ns, local) = reader.resolve_element(e.name()); + /// match local.as_ref() { + /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))), + /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))), + /// _ => unreachable!(), + /// } + /// } + /// Event::Text(e) => { + /// txt.push(e.unescape().unwrap().into_owned()) + /// } + /// Event::Eof => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// assert_eq!(count, 3); + /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); + /// # }) // tokio_test::block_on + /// ``` + /// + /// [`read_event_into()`]: NsReader::read_event_into + /// [`resolve_element()`]: Self::resolve_element + /// [`read_resolved_event_into_async()`]: Self::read_resolved_event_into_async + pub async fn read_event_into_async<'b>(&mut self, buf: &'b mut Vec) -> Result> { + self.pop(); + let event = self.reader.read_event_into_async(buf).await; + self.process_event(event) + } + + /// An asynchronous version of [`read_to_end_into()`]. + /// Reads asynchronously until end element is found using provided buffer as + /// intermediate storage for events content. This function is supposed to be + /// called after you already read a [`Start`] event. + /// + /// See the documentation of [`read_to_end_into()`] for more information. + /// + /// # Examples + /// + /// This example shows, how you can skip XML content after you read the + /// start event. + /// + /// ``` + /// # tokio_test::block_on(async { + /// # use pretty_assertions::assert_eq; + /// use quick_xml::name::{Namespace, ResolveResult}; + /// use quick_xml::events::{BytesStart, Event}; + /// use quick_xml::NsReader; + /// + /// let mut reader = NsReader::from_reader(r#" + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// "#.as_bytes()); + /// reader.trim_text(true); + /// let mut buf = Vec::new(); + /// + /// let ns = Namespace(b"namespace 1"); + /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); + /// let end = start.to_end().into_owned(); + /// + /// // First, we read a start event... + /// assert_eq!( + /// reader.read_resolved_event_into_async(&mut buf).await.unwrap(), + /// (ResolveResult::Bound(ns), Event::Start(start)) + /// ); + /// + /// //...then, we could skip all events to the corresponding end event. + /// // This call will correctly handle nested elements. + /// // Note, however, that this method does not handle namespaces. + /// reader.read_to_end_into_async(end.name(), &mut buf).await.unwrap(); + /// + /// // At the end we should get an Eof event, because we ate the whole XML + /// assert_eq!( + /// reader.read_resolved_event_into_async(&mut buf).await.unwrap(), + /// (ResolveResult::Unbound, Event::Eof) + /// ); + /// # }) // tokio_test::block_on + /// ``` + /// + /// [`read_to_end_into()`]: Self::read_to_end_into + /// [`Start`]: Event::Start + pub async fn read_to_end_into_async<'n>( + &mut self, + // We should name that lifetime due to https://github.com/rust-lang/rust/issues/63033` + end: QName<'n>, + buf: &mut Vec, + ) -> Result<()> { + // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should + // match literally the start name. See `Reader::check_end_names` documentation + self.reader.read_to_end_into_async(end, buf).await + } + + /// An asynchronous version of [`read_resolved_event_into()`]. Reads the next + /// event into given buffer asynchronously and resolves its namespace (if applicable). + /// + /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. + /// For all other events the concept of namespace is not defined, so + /// a [`ResolveResult::Unbound`] is returned. + /// + /// If you are not interested in namespaces, you can use [`read_event_into_async()`] + /// which will not automatically resolve namespaces for you. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::Event; + /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; + /// use quick_xml::NsReader; + /// + /// let mut reader = NsReader::from_reader(r#" + /// + /// Test + /// Test 2 + /// + /// "#.as_bytes()); + /// reader.trim_text(true); + /// + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_resolved_event_into_async(&mut buf).await.unwrap() { + /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { + /// count += 1; + /// assert_eq!(e.local_name(), QName(b"tag1").into()); + /// } + /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { + /// count += 1; + /// assert_eq!(e.local_name(), QName(b"tag2").into()); + /// } + /// (_, Event::Start(_)) => unreachable!(), + /// + /// (_, Event::Text(e)) => { + /// txt.push(e.unescape().unwrap().into_owned()) + /// } + /// (_, Event::Eof) => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// assert_eq!(count, 3); + /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); + /// # }) // tokio_test::block_on + /// ``` + /// + /// [`read_resolved_event_into()`]: NsReader::read_resolved_event_into + /// [`Start`]: Event::Start + /// [`Empty`]: Event::Empty + /// [`End`]: Event::End + /// [`read_event_into_async()`]: Self::read_event_into_async + pub async fn read_resolved_event_into_async<'ns, 'b>( + // Name 'ns lifetime, because otherwise we get an error + // "implicit elided lifetime not allowed here" on ResolveResult + &'ns mut self, + buf: &'b mut Vec, + ) -> Result<(ResolveResult<'ns>, Event<'b>)> { + let event = self.read_event_into_async(buf).await; + self.resolve_event(event) + } +} + +#[cfg(test)] +mod test { + use super::TokioAdapter; + use crate::reader::test::check; + + check!( + #[tokio::test] + read_event_into_async, + read_until_close_async, + TokioAdapter, + &mut Vec::new(), + async, await + ); +} diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index fcc9ec38..f09ba706 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -12,6 +12,216 @@ use crate::events::Event; use crate::name::QName; use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, XmlSource}; +macro_rules! impl_buffered_source { + ($($lf:lifetime, $reader:tt, $async:ident, $await:ident)?) => { + #[inline] + $($async)? fn read_bytes_until $(<$lf>)? ( + &mut self, + byte: u8, + buf: &'b mut Vec, + position: &mut usize, + ) -> Result> { + // search byte must be within the ascii range + debug_assert!(byte.is_ascii()); + + let mut read = 0; + let mut done = false; + let start = buf.len(); + while !done { + let used = { + let available = match self $(.$reader)? .fill_buf() $(.$await)? { + Ok(n) if n.is_empty() => break, + Ok(n) => n, + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + }; + + match memchr::memchr(byte, available) { + Some(i) => { + buf.extend_from_slice(&available[..i]); + done = true; + i + 1 + } + None => { + buf.extend_from_slice(available); + available.len() + } + } + }; + self $(.$reader)? .consume(used); + read += used; + } + *position += read; + + if read == 0 { + Ok(None) + } else { + Ok(Some(&buf[start..])) + } + } + + $($async)? fn read_bang_element $(<$lf>)? ( + &mut self, + buf: &'b mut Vec, + position: &mut usize, + ) -> Result> { + // Peeked one bang ('!') before being called, so it's guaranteed to + // start with it. + let start = buf.len(); + let mut read = 1; + buf.push(b'!'); + self $(.$reader)? .consume(1); + + let bang_type = BangType::new(self.peek_one() $(.$await)? ?)?; + + loop { + match self $(.$reader)? .fill_buf() $(.$await)? { + // Note: Do not update position, so the error points to + // somewhere sane rather than at the EOF + Ok(n) if n.is_empty() => return Err(bang_type.to_err()), + Ok(available) => { + if let Some((consumed, used)) = bang_type.parse(available, read) { + buf.extend_from_slice(consumed); + + self $(.$reader)? .consume(used); + read += used; + + *position += read; + break; + } else { + buf.extend_from_slice(available); + + let used = available.len(); + self $(.$reader)? .consume(used); + read += used; + } + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + } + } + + if read == 0 { + Ok(None) + } else { + Ok(Some((bang_type, &buf[start..]))) + } + } + + #[inline] + $($async)? fn read_element $(<$lf>)? ( + &mut self, + buf: &'b mut Vec, + position: &mut usize, + ) -> Result> { + let mut state = ReadElementState::Elem; + let mut read = 0; + + let start = buf.len(); + loop { + match self $(.$reader)? .fill_buf() $(.$await)? { + Ok(n) if n.is_empty() => break, + Ok(available) => { + if let Some((consumed, used)) = state.change(available) { + buf.extend_from_slice(consumed); + + self $(.$reader)? .consume(used); + read += used; + + *position += read; + break; + } else { + buf.extend_from_slice(available); + + let used = available.len(); + self $(.$reader)? .consume(used); + read += used; + } + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + }; + } + + if read == 0 { + Ok(None) + } else { + Ok(Some(&buf[start..])) + } + } + + /// Consume and discard all the whitespace until the next non-whitespace + /// character or EOF. + $($async)? fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { + loop { + break match self $(.$reader)? .fill_buf() $(.$await)? { + Ok(n) => { + let count = n.iter().position(|b| !is_whitespace(*b)).unwrap_or(n.len()); + if count > 0 { + self $(.$reader)? .consume(count); + *position += count; + continue; + } else { + Ok(()) + } + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => Err(Error::Io(e)), + }; + } + } + + /// Consume and discard one character if it matches the given byte. Return + /// true if it matched. + $($async)? fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result { + // search byte must be within the ascii range + debug_assert!(byte.is_ascii()); + + match self.peek_one() $(.$await)? ? { + Some(b) if b == byte => { + *position += 1; + self $(.$reader)? .consume(1); + Ok(true) + } + _ => Ok(false), + } + } + + /// Return one character without consuming it, so that future `read_*` calls + /// will still include it. On EOF, return None. + $($async)? fn peek_one(&mut self) -> Result> { + loop { + break match self $(.$reader)? .fill_buf() $(.$await)? { + Ok(n) if n.is_empty() => Ok(None), + Ok(n) => Ok(Some(n[0])), + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => Err(Error::Io(e)), + }; + } + } + }; +} + +// Make it public for use in async implementations +pub(super) use impl_buffered_source; + +/// Implementation of `XmlSource` for any `BufRead` reader using a user-given +/// `Vec` as buffer that will be borrowed by events. +impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec> for R { + impl_buffered_source!(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + /// This is an implementation of [`Reader`] for reading from a [`BufRead`] as /// underlying byte stream. impl Reader { @@ -146,26 +356,9 @@ impl Reader { /// [`check_end_names`]: Self::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result<()> { - let mut depth = 0; - loop { + read_to_end!(self, end, buf, read_event_impl, { buf.clear(); - match self.read_event_into(buf) { - Err(e) => return Err(e), - - Ok(Event::Start(e)) if e.name() == end => depth += 1, - Ok(Event::End(e)) if e.name() == end => { - if depth == 0 { - return Ok(()); - } - depth -= 1; - } - Ok(Event::Eof) => { - let name = self.decoder().decode(end.as_ref()); - return Err(Error::UnexpectedEof(format!("", name))); - } - _ => (), - } - } + }) } /// Reads optional text between start and end tags. @@ -226,213 +419,23 @@ impl Reader> { } } -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/// Implementation of `XmlSource` for any `BufRead` reader using a user-given -/// `Vec` as buffer that will be borrowed by events. -impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec> for R { - #[inline] - fn read_bytes_until( - &mut self, - byte: u8, - buf: &'b mut Vec, - position: &mut usize, - ) -> Result> { - // search byte must be within the ascii range - debug_assert!(byte.is_ascii()); - - let mut read = 0; - let mut done = false; - let start = buf.len(); - while !done { - let used = { - let available = match self.fill_buf() { - Ok(n) if n.is_empty() => break, - Ok(n) => n, - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => { - *position += read; - return Err(Error::Io(e)); - } - }; - - match memchr::memchr(byte, available) { - Some(i) => { - buf.extend_from_slice(&available[..i]); - done = true; - i + 1 - } - None => { - buf.extend_from_slice(available); - available.len() - } - } - }; - self.consume(used); - read += used; - } - *position += read; - - if read == 0 { - Ok(None) - } else { - Ok(Some(&buf[start..])) - } - } - - fn read_bang_element( - &mut self, - buf: &'b mut Vec, - position: &mut usize, - ) -> Result> { - // Peeked one bang ('!') before being called, so it's guaranteed to - // start with it. - let start = buf.len(); - let mut read = 1; - buf.push(b'!'); - self.consume(1); - - let bang_type = BangType::new(self.peek_one()?)?; - - loop { - match self.fill_buf() { - // Note: Do not update position, so the error points to - // somewhere sane rather than at the EOF - Ok(n) if n.is_empty() => return Err(bang_type.to_err()), - Ok(available) => { - if let Some((consumed, used)) = bang_type.parse(available, read) { - buf.extend_from_slice(consumed); - - self.consume(used); - read += used; - - *position += read; - break; - } else { - buf.extend_from_slice(available); - - let used = available.len(); - self.consume(used); - read += used; - } - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => { - *position += read; - return Err(Error::Io(e)); - } - } - } - - if read == 0 { - Ok(None) - } else { - Ok(Some((bang_type, &buf[start..]))) - } - } - - #[inline] - fn read_element( - &mut self, - buf: &'b mut Vec, - position: &mut usize, - ) -> Result> { - let mut state = ReadElementState::Elem; - let mut read = 0; - - let start = buf.len(); - loop { - match self.fill_buf() { - Ok(n) if n.is_empty() => break, - Ok(available) => { - if let Some((consumed, used)) = state.change(available) { - buf.extend_from_slice(consumed); - - self.consume(used); - read += used; - - *position += read; - break; - } else { - buf.extend_from_slice(available); - - let used = available.len(); - self.consume(used); - read += used; - } - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => { - *position += read; - return Err(Error::Io(e)); - } - }; - } - - if read == 0 { - Ok(None) - } else { - Ok(Some(&buf[start..])) - } - } - - /// Consume and discard all the whitespace until the next non-whitespace - /// character or EOF. - fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { - loop { - break match self.fill_buf() { - Ok(n) => { - let count = n.iter().position(|b| !is_whitespace(*b)).unwrap_or(n.len()); - if count > 0 { - self.consume(count); - *position += count; - continue; - } else { - Ok(()) - } - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => Err(Error::Io(e)), - }; - } - } - - /// Consume and discard one character if it matches the given byte. Return - /// true if it matched. - fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result { - // search byte must be within the ascii range - debug_assert!(byte.is_ascii()); - - match self.peek_one()? { - Some(b) if b == byte => { - *position += 1; - self.consume(1); - Ok(true) - } - _ => Ok(false), - } - } - - /// Return one character without consuming it, so that future `read_*` calls - /// will still include it. On EOF, return None. - fn peek_one(&mut self) -> Result> { - loop { - break match self.fill_buf() { - Ok(n) if n.is_empty() => Ok(None), - Ok(n) => Ok(Some(n[0])), - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => Err(Error::Io(e)), - }; - } - } -} - #[cfg(test)] mod test { use crate::reader::test::check; use crate::reader::XmlSource; - check!(&mut Vec::new()); + /// Default buffer constructor just pass the byte array from the test + fn identity(input: T) -> T { + input + } + + check!( + #[test] + read_event_impl, + read_until_close, + identity, + &mut Vec::new() + ); #[cfg(feature = "encoding")] mod encoding { diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 3320acc4..73ff5061 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -127,6 +127,142 @@ macro_rules! configure_methods { }; } +macro_rules! read_event_impl { + ( + $self:ident, $buf:ident, + $read_until_open:ident, + $read_until_close:ident + $(, $await:ident)? + ) => {{ + let event = match $self.parser.state { + ParseState::Init => $self.$read_until_open($buf, true) $(.$await)?, + ParseState::ClosedTag => $self.$read_until_open($buf, false) $(.$await)?, + ParseState::OpenedTag => $self.$read_until_close($buf) $(.$await)?, + ParseState::Empty => $self.parser.close_expanded_empty(), + ParseState::Exit => return Ok(Event::Eof), + }; + match event { + Err(_) | Ok(Event::Eof) => $self.parser.state = ParseState::Exit, + _ => {} + } + event + }}; +} + +macro_rules! read_until_open { + ( + $self:ident, $buf:ident, $first:ident, + $reader:expr, + $read_event:ident + $(, $await:ident)? + ) => {{ + $self.parser.state = ParseState::OpenedTag; + + if $self.parser.trim_text_start { + $reader.skip_whitespace(&mut $self.parser.offset) $(.$await)? ?; + } + + // If we already at the `<` symbol, do not try to return an empty Text event + if $reader.skip_one(b'<', &mut $self.parser.offset) $(.$await)? ? { + return $self.$read_event($buf) $(.$await)?; + } + + match $reader + .read_bytes_until(b'<', $buf, &mut $self.parser.offset) + $(.$await)? + { + Ok(Some(bytes)) => $self.parser.read_text(bytes, $first), + Ok(None) => Ok(Event::Eof), + Err(e) => Err(e), + } + }}; +} + +macro_rules! read_until_close { + ( + $self:ident, $buf:ident, + $reader:expr + $(, $await:ident)? + ) => {{ + $self.parser.state = ParseState::ClosedTag; + + match $reader.peek_one() $(.$await)? { + // ` match $reader + .read_bang_element($buf, &mut $self.parser.offset) + $(.$await)? + { + Ok(None) => Ok(Event::Eof), + Ok(Some((bang_type, bytes))) => $self.parser.read_bang(bang_type, bytes), + Err(e) => Err(e), + }, + // ` match $reader + .read_bytes_until(b'>', $buf, &mut $self.parser.offset) + $(.$await)? + { + Ok(None) => Ok(Event::Eof), + Ok(Some(bytes)) => $self.parser.read_end(bytes), + Err(e) => Err(e), + }, + // ` match $reader + .read_bytes_until(b'>', $buf, &mut $self.parser.offset) + $(.$await)? + { + Ok(None) => Ok(Event::Eof), + Ok(Some(bytes)) => $self.parser.read_question_mark(bytes), + Err(e) => Err(e), + }, + // `<...` - opening or self-closed tag + Ok(Some(_)) => match $reader + .read_element($buf, &mut $self.parser.offset) + $(.$await)? + { + Ok(None) => Ok(Event::Eof), + Ok(Some(bytes)) => $self.parser.read_start(bytes), + Err(e) => Err(e), + }, + Ok(None) => Ok(Event::Eof), + Err(e) => Err(e), + } + }}; +} + +/// Generalization of `read_to_end` method for buffered and borrowed readers +macro_rules! read_to_end { + ( + $self:expr, $end:expr, $buf:expr, + $read_event:ident, + // Code block that performs clearing of internal buffer after read of each event + $clear:block + $(, $await:ident)? + ) => {{ + let mut depth = 0; + loop { + $clear + match $self.$read_event($buf) $(.$await)? { + Err(e) => return Err(e), + + Ok(Event::Start(e)) if e.name() == $end => depth += 1, + Ok(Event::End(e)) if e.name() == $end => { + if depth == 0 { + return Ok(()); + } + depth -= 1; + } + Ok(Event::Eof) => { + let name = $self.decoder().decode($end.as_ref()); + return Err(Error::UnexpectedEof(format!("", name))); + } + _ => (), + } + } + }}; +} + +#[cfg(feature = "async-tokio")] +mod async_tokio; mod buffered_reader; mod ns_reader; mod parser; @@ -318,7 +454,7 @@ impl Reader { /// let xml = r#" /// Test /// Test 2 - /// "#; + /// "#; /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes())); /// let mut buf = Vec::new(); /// @@ -405,18 +541,7 @@ impl Reader { where R: XmlSource<'i, B>, { - let event = match self.parser.state { - ParseState::Init => self.read_until_open(buf, true), - ParseState::ClosedTag => self.read_until_open(buf, false), - ParseState::OpenedTag => self.read_until_close(buf), - ParseState::Empty => self.parser.close_expanded_empty(), - ParseState::Exit => return Ok(Event::Eof), - }; - match event { - Err(_) | Ok(Event::Eof) => self.parser.state = ParseState::Exit, - _ => {} - } - event + read_event_impl!(self, buf, read_until_open, read_until_close) } /// Read until '<' is found and moves reader to an `OpenedTag` state. @@ -426,25 +551,7 @@ impl Reader { where R: XmlSource<'i, B>, { - self.parser.state = ParseState::OpenedTag; - - if self.parser.trim_text_start { - self.reader.skip_whitespace(&mut self.parser.offset)?; - } - - // If we already at the `<` symbol, do not try to return an empty Text event - if self.reader.skip_one(b'<', &mut self.parser.offset)? { - return self.read_event_impl(buf); - } - - match self - .reader - .read_bytes_until(b'<', buf, &mut self.parser.offset) - { - Ok(Some(bytes)) => self.parser.read_text(bytes, first), - Ok(None) => Ok(Event::Eof), - Err(e) => Err(e), - } + read_until_open!(self, buf, first, self.reader, read_event_impl) } /// Private function to read until `>` is found. This function expects that @@ -453,42 +560,7 @@ impl Reader { where R: XmlSource<'i, B>, { - self.parser.state = ParseState::ClosedTag; - - match self.reader.peek_one() { - // ` match self.reader.read_bang_element(buf, &mut self.parser.offset) { - Ok(None) => Ok(Event::Eof), - Ok(Some((bang_type, bytes))) => self.parser.read_bang(bang_type, bytes), - Err(e) => Err(e), - }, - // ` match self - .reader - .read_bytes_until(b'>', buf, &mut self.parser.offset) - { - Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => self.parser.read_end(bytes), - Err(e) => Err(e), - }, - // ` match self - .reader - .read_bytes_until(b'>', buf, &mut self.parser.offset) - { - Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => self.parser.read_question_mark(bytes), - Err(e) => Err(e), - }, - // `<...` - opening or self-closed tag - Ok(Some(_)) => match self.reader.read_element(buf, &mut self.parser.offset) { - Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => self.parser.read_start(bytes), - Err(e) => Err(e), - }, - Ok(None) => Ok(Event::Eof), - Err(e) => Err(e), - } + read_until_close!(self, buf, self.reader) } } @@ -712,7 +784,16 @@ pub(crate) fn is_whitespace(b: u8) -> bool { mod test { /// Checks the internal implementation of the various reader methods macro_rules! check { - ($buf:expr) => { + ( + #[$test:meta] + $read_event:ident, + $read_until_close:ident, + // constructor of the XML source on which internal functions will be called + $source:path, + // constructor of the buffer to which read data will stored + $buf:expr + $(, $async:ident, $await:ident)? + ) => { mod read_bytes_until { use super::*; // Use Bytes for printing bytes as strings for ASCII range @@ -720,16 +801,17 @@ mod test { use pretty_assertions::assert_eq; /// Checks that search in the empty buffer returns `None` - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"".as_ref(); // ^= 0 assert_eq!( - input + $source(&mut input) .read_bytes_until(b'*', buf, &mut position) + $(.$await)? .unwrap() .map(Bytes), None @@ -739,16 +821,17 @@ mod test { /// Checks that search in the buffer non-existent value returns entire buffer /// as a result and set `position` to `len()` - #[test] - fn non_existent() { + #[$test] + $($async)? fn non_existent() { let buf = $buf; let mut position = 0; let mut input = b"abcdef".as_ref(); // ^= 6 assert_eq!( - input + $source(&mut input) .read_bytes_until(b'*', buf, &mut position) + $(.$await)? .unwrap() .map(Bytes), Some(Bytes(b"abcdef")) @@ -759,16 +842,17 @@ mod test { /// Checks that search in the buffer an element that is located in the front of /// buffer returns empty slice as a result and set `position` to one symbol /// after match (`1`) - #[test] - fn at_the_start() { + #[$test] + $($async)? fn at_the_start() { let buf = $buf; let mut position = 0; let mut input = b"*abcdef".as_ref(); // ^= 1 assert_eq!( - input + $source(&mut input) .read_bytes_until(b'*', buf, &mut position) + $(.$await)? .unwrap() .map(Bytes), Some(Bytes(b"")) @@ -779,16 +863,17 @@ mod test { /// Checks that search in the buffer an element that is located in the middle of /// buffer returns slice before that symbol as a result and set `position` to one /// symbol after match - #[test] - fn inside() { + #[$test] + $($async)? fn inside() { let buf = $buf; let mut position = 0; let mut input = b"abc*def".as_ref(); // ^= 4 assert_eq!( - input + $source(&mut input) .read_bytes_until(b'*', buf, &mut position) + $(.$await)? .unwrap() .map(Bytes), Some(Bytes(b"abc")) @@ -799,16 +884,17 @@ mod test { /// Checks that search in the buffer an element that is located in the end of /// buffer returns slice before that symbol as a result and set `position` to one /// symbol after match (`len()`) - #[test] - fn in_the_end() { + #[$test] + $($async)? fn in_the_end() { let buf = $buf; let mut position = 0; let mut input = b"abcdef*".as_ref(); // ^= 7 assert_eq!( - input + $source(&mut input) .read_bytes_until(b'*', buf, &mut position) + $(.$await)? .unwrap() .map(Bytes), Some(Bytes(b"abcdef")) @@ -830,15 +916,15 @@ mod test { /// Checks that if input begins like CDATA element, but CDATA start sequence /// is not finished, parsing ends with an error - #[test] + #[$test] #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"] - fn not_properly_start() { + $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; let mut input = b"![]]>other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, @@ -851,14 +937,14 @@ mod test { /// Checks that if CDATA startup sequence was matched, but an end sequence /// is not found, parsing ends with an error - #[test] - fn not_closed() { + #[$test] + $($async)? fn not_closed() { let buf = $buf; let mut position = 0; let mut input = b"![CDATA[other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, @@ -870,16 +956,17 @@ mod test { } /// Checks that CDATA element without content inside parsed successfully - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"![CDATA[]]>other content".as_ref(); // ^= 11 assert_eq!( - input + $source(&mut input) .read_bang_element(buf, &mut position) + $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::CData, Bytes(b"![CDATA["))) @@ -890,16 +977,17 @@ mod test { /// Checks that CDATA element with content parsed successfully. /// Additionally checks that sequences inside CDATA that may look like /// a CDATA end sequence do not interrupt CDATA parsing - #[test] - fn with_content() { + #[$test] + $($async)? fn with_content() { let buf = $buf; let mut position = 0; let mut input = b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref(); // ^= 28 assert_eq!( - input + $source(&mut input) .read_bang_element(buf, &mut position) + $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::CData, Bytes(b"![CDATA[cdata]] ]>content"))) @@ -931,15 +1019,15 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] + #[$test] #[ignore = "start comment sequence fully checked outside of `read_bang_element`"] - fn not_properly_start() { + $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; let mut input = b"!- -->other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -950,14 +1038,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn not_properly_end() { + #[$test] + $($async)? fn not_properly_end() { let buf = $buf; let mut position = 0; let mut input = b"!->other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -968,14 +1056,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn not_closed1() { + #[$test] + $($async)? fn not_closed1() { let buf = $buf; let mut position = 0; let mut input = b"!--other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -986,14 +1074,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn not_closed2() { + #[$test] + $($async)? fn not_closed2() { let buf = $buf; let mut position = 0; let mut input = b"!-->other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1004,14 +1092,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn not_closed3() { + #[$test] + $($async)? fn not_closed3() { let buf = $buf; let mut position = 0; let mut input = b"!--->other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1022,16 +1110,17 @@ mod test { assert_eq!(position, 0); } - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"!---->other content".as_ref(); // ^= 6 assert_eq!( - input + $source(&mut input) .read_bang_element(buf, &mut position) + $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::Comment, Bytes(b"!----"))) @@ -1039,16 +1128,17 @@ mod test { assert_eq!(position, 6); } - #[test] - fn with_content() { + #[$test] + $($async)? fn with_content() { let buf = $buf; let mut position = 0; let mut input = b"!--->comment<--->other content".as_ref(); // ^= 17 assert_eq!( - input + $source(&mut input) .read_bang_element(buf, &mut position) + $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::Comment, Bytes(b"!--->comment<---"))) @@ -1068,14 +1158,14 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] - fn not_properly_start() { + #[$test] + $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; let mut input = b"!D other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1086,14 +1176,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn without_space() { + #[$test] + $($async)? fn without_space() { let buf = $buf; let mut position = 0; let mut input = b"!DOCTYPEother content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1104,16 +1194,17 @@ mod test { assert_eq!(position, 0); } - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"!DOCTYPE>other content".as_ref(); // ^= 9 assert_eq!( - input + $source(&mut input) .read_bang_element(buf, &mut position) + $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::DocType, Bytes(b"!DOCTYPE"))) @@ -1121,14 +1212,14 @@ mod test { assert_eq!(position, 9); } - #[test] - fn not_closed() { + #[$test] + $($async)? fn not_closed() { let buf = $buf; let mut position = 0; let mut input = b"!DOCTYPE other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1147,14 +1238,14 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] - fn not_properly_start() { + #[$test] + $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; let mut input = b"!d other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1165,14 +1256,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn without_space() { + #[$test] + $($async)? fn without_space() { let buf = $buf; let mut position = 0; let mut input = b"!doctypeother content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1183,16 +1274,17 @@ mod test { assert_eq!(position, 0); } - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"!doctype>other content".as_ref(); // ^= 9 assert_eq!( - input + $source(&mut input) .read_bang_element(buf, &mut position) + $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::DocType, Bytes(b"!doctype"))) @@ -1200,14 +1292,14 @@ mod test { assert_eq!(position, 9); } - #[test] - fn not_closed() { + #[$test] + $($async)? fn not_closed() { let buf = $buf; let mut position = 0; let mut input = b"!doctype other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1227,14 +1319,17 @@ mod test { use pretty_assertions::assert_eq; /// Checks that nothing was read from empty buffer - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"".as_ref(); // ^= 0 - assert_eq!(input.read_element(buf, &mut position).unwrap().map(Bytes), None); + assert_eq!( + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), + None + ); assert_eq!(position, 0); } @@ -1243,71 +1338,71 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] - fn empty_tag() { + #[$test] + $($async)? fn empty_tag() { let buf = $buf; let mut position = 0; let mut input = b">".as_ref(); // ^= 1 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b"")) ); assert_eq!(position, 1); } - #[test] - fn normal() { + #[$test] + $($async)? fn normal() { let buf = $buf; let mut position = 0; let mut input = b"tag>".as_ref(); // ^= 4 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b"tag")) ); assert_eq!(position, 4); } - #[test] - fn empty_ns_empty_tag() { + #[$test] + $($async)? fn empty_ns_empty_tag() { let buf = $buf; let mut position = 0; let mut input = b":>".as_ref(); // ^= 2 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b":")) ); assert_eq!(position, 2); } - #[test] - fn empty_ns() { + #[$test] + $($async)? fn empty_ns() { let buf = $buf; let mut position = 0; let mut input = b":tag>".as_ref(); // ^= 5 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b":tag")) ); assert_eq!(position, 5); } - #[test] - fn with_attributes() { + #[$test] + $($async)? fn with_attributes() { let buf = $buf; let mut position = 0; let mut input = br#"tag attr-1=">" attr2 = '>' 3attr>"#.as_ref(); // ^= 38 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(br#"tag attr-1=">" attr2 = '>' 3attr"#)) ); assert_eq!(position, 38); @@ -1319,71 +1414,71 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] - fn empty_tag() { + #[$test] + $($async)? fn empty_tag() { let buf = $buf; let mut position = 0; let mut input = b"/>".as_ref(); // ^= 2 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b"/")) ); assert_eq!(position, 2); } - #[test] - fn normal() { + #[$test] + $($async)? fn normal() { let buf = $buf; let mut position = 0; let mut input = b"tag/>".as_ref(); // ^= 5 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b"tag/")) ); assert_eq!(position, 5); } - #[test] - fn empty_ns_empty_tag() { + #[$test] + $($async)? fn empty_ns_empty_tag() { let buf = $buf; let mut position = 0; let mut input = b":/>".as_ref(); // ^= 3 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b":/")) ); assert_eq!(position, 3); } - #[test] - fn empty_ns() { + #[$test] + $($async)? fn empty_ns() { let buf = $buf; let mut position = 0; let mut input = b":tag/>".as_ref(); // ^= 6 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b":tag/")) ); assert_eq!(position, 6); } - #[test] - fn with_attributes() { + #[$test] + $($async)? fn with_attributes() { let buf = $buf; let mut position = 0; let mut input = br#"tag attr-1="/>" attr2 = '/>' 3attr/>"#.as_ref(); // ^= 41 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/"#)) ); assert_eq!(position, 41); @@ -1394,12 +1489,12 @@ mod test { mod issue_344 { use crate::errors::Error; - #[test] - fn cdata() { + #[$test] + $($async)? fn cdata() { let doc = "![]]>"; let mut reader = crate::Reader::from_str(doc); - match reader.read_until_close($buf) { + match reader.$read_until_close($buf) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, @@ -1409,12 +1504,12 @@ mod test { } } - #[test] - fn comment() { + #[$test] + $($async)? fn comment() { let doc = "!- -->"; let mut reader = crate::Reader::from_str(doc); - match reader.read_until_close($buf) { + match reader.$read_until_close($buf) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1424,12 +1519,12 @@ mod test { } } - #[test] - fn doctype_uppercase() { + #[$test] + $($async)? fn doctype_uppercase() { let doc = "!D>"; let mut reader = crate::Reader::from_str(doc); - match reader.read_until_close($buf) { + match reader.$read_until_close($buf) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1439,12 +1534,12 @@ mod test { } } - #[test] - fn doctype_lowercase() { + #[$test] + $($async)? fn doctype_lowercase() { let doc = "!d>"; let mut reader = crate::Reader::from_str(doc); - match reader.read_until_close($buf) { + match reader.$read_until_close($buf) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1456,126 +1551,126 @@ mod test { } /// Ensures, that no empty `Text` events are generated - mod read_event_impl { + mod $read_event { use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use crate::reader::Reader; use pretty_assertions::assert_eq; - #[test] - fn start_text() { + #[$test] + $($async)? fn start_text() { let mut reader = Reader::from_str("bom"); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::StartText(BytesText::from_escaped("bom").into()) ); } - #[test] - fn declaration() { + #[$test] + $($async)? fn declaration() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3))) ); } - #[test] - fn doctype() { + #[$test] + $($async)? fn doctype() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::DocType(BytesText::from_escaped("x")) ); } - #[test] - fn processing_instruction() { + #[$test] + $($async)? fn processing_instruction() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::PI(BytesText::from_escaped("xml-stylesheet")) ); } - #[test] - fn start() { + #[$test] + $($async)? fn start() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Start(BytesStart::new("tag")) ); } - #[test] - fn end() { + #[$test] + $($async)? fn end() { let mut reader = Reader::from_str(""); // Because we expect invalid XML, do not check that // the end name paired with the start name reader.check_end_names(false); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::End(BytesEnd::new("tag")) ); } - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Empty(BytesStart::new("tag")) ); } /// Text event cannot be generated without preceding event of another type - #[test] - fn text() { + #[$test] + $($async)? fn text() { let mut reader = Reader::from_str("text"); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Empty(BytesStart::new("tag")) ); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Text(BytesText::from_escaped("text")) ); } - #[test] - fn cdata() { + #[$test] + $($async)? fn cdata() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::CData(BytesCData::new("")) ); } - #[test] - fn comment() { + #[$test] + $($async)? fn comment() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Comment(BytesText::from_escaped("")) ); } - #[test] - fn eof() { + #[$test] + $($async)? fn eof() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Eof ); } diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index a7ecc0a6..3f56d248 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -19,7 +19,7 @@ use crate::reader::{Reader, XmlSource}; /// Consumes a [`BufRead`] and streams XML `Event`s. pub struct NsReader { /// An XML reader - reader: Reader, + pub(super) reader: Reader, /// Buffer that contains names of namespace prefixes (the part between `xmlns:` /// and an `=`) and namespace values. buffer: Vec, @@ -63,14 +63,14 @@ impl NsReader { self.process_event(event) } - fn pop(&mut self) { + pub(super) fn pop(&mut self) { if self.pending_pop { self.ns_resolver.pop(&mut self.buffer); self.pending_pop = false; } } - fn process_event<'i>(&mut self, event: Result>) -> Result> { + pub(super) fn process_event<'i>(&mut self, event: Result>) -> Result> { match event { Ok(Event::Start(e)) => { self.ns_resolver.push(&e, &mut self.buffer); @@ -93,7 +93,7 @@ impl NsReader { } } - fn resolve_event<'i>( + pub(super) fn resolve_event<'i>( &mut self, event: Result>, ) -> Result<(ResolveResult, Event<'i>)> { @@ -538,6 +538,10 @@ impl<'i> NsReader<&'i [u8]> { /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace /// as soon as you get an event. /// + /// There is no asynchronous `read_event_async()` version of this function, + /// because it is not necessary -- the contents are already in memory and no IO + /// is needed, therefore there is no potential for blocking. + /// /// # Examples /// /// ``` @@ -595,6 +599,10 @@ impl<'i> NsReader<&'i [u8]> { /// If you are not interested in namespaces, you can use [`read_event()`] /// which will not automatically resolve namespaces for you. /// + /// There is no asynchronous `read_resolved_event_async()` version of this function, + /// because it is not necessary -- the contents are already in memory and no IO + /// is needed, therefore there is no potential for blocking. + /// /// # Examples /// /// ``` @@ -661,6 +669,10 @@ impl<'i> NsReader<&'i [u8]> { /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// + /// There is no asynchronous `read_to_end_async()` version of this function, + /// because it is not necessary -- the contents are already in memory and no IO + /// is needed, therefore there is no potential for blocking. + /// /// # Namespaces /// /// Unlike [`Reader::read_to_end()`], this method resolves namespace diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index f4bb8706..d4bf0d7e 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -34,6 +34,10 @@ impl<'a> Reader<&'a [u8]> { /// Read an event that borrows from the input rather than a buffer. /// + /// There is no asynchronous `read_event_async()` version of this function, + /// because it is not necessary -- the contents are already in memory and no IO + /// is needed, therefore there is no potential for blocking. + /// /// # Examples /// /// ``` @@ -83,6 +87,10 @@ impl<'a> Reader<&'a [u8]> { /// The correctness of the skipped events does not checked, if you disabled /// the [`check_end_names`] option. /// + /// There is no asynchronous `read_to_end_async()` version of this function, + /// because it is not necessary -- the contents are already in memory and no IO + /// is needed, therefore there is no potential for blocking. + /// /// # Namespaces /// /// While the [`Reader`] does not support namespace resolution, namespaces @@ -138,25 +146,7 @@ impl<'a> Reader<&'a [u8]> { /// [`check_end_names`]: Self::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end(&mut self, end: QName) -> Result<()> { - let mut depth = 0; - loop { - match self.read_event() { - Err(e) => return Err(e), - - Ok(Event::Start(e)) if e.name() == end => depth += 1, - Ok(Event::End(e)) if e.name() == end => { - if depth == 0 { - return Ok(()); - } - depth -= 1; - } - Ok(Event::Eof) => { - let name = self.decoder().decode(end.as_ref()); - return Err(Error::UnexpectedEof(format!("", name))); - } - _ => (), - } - } + read_to_end!(self, end, (), read_event_impl, {}) } } @@ -264,7 +254,18 @@ mod test { use crate::reader::test::check; use crate::reader::XmlSource; - check!(()); + /// Default buffer constructor just pass the byte array from the test + fn identity(input: T) -> T { + input + } + + check!( + #[test] + read_event_impl, + read_until_close, + identity, + () + ); #[cfg(feature = "encoding")] mod encoding { diff --git a/tests/async-tokio.rs b/tests/async-tokio.rs new file mode 100644 index 00000000..4d811580 --- /dev/null +++ b/tests/async-tokio.rs @@ -0,0 +1,20 @@ +use quick_xml::events::Event::*; +use quick_xml::Reader; + +#[tokio::test] +async fn test_sample() { + let src = include_str!("documents/sample_rss.xml"); + let mut reader = Reader::from_reader(src.as_bytes()); + let mut buf = Vec::new(); + let mut count = 0; + loop { + match reader.read_event_into_async(&mut buf).await.unwrap() { + Start(_) => count += 1, + Decl(e) => println!("{:?}", e.version()), + Eof => break, + _ => (), + } + buf.clear(); + } + println!("{}", count); +}