diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 4d96b9a8..ad159105 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -63,6 +63,10 @@ jobs: env: LLVM_PROFILE_FILE: coverage/serialize-escape-html-%p-%m.profraw run: cargo test --features serialize,escape-html + - name: Run tests (serialize+span) + env: + LLVM_PROFILE_FILE: coverage/serialize-span-%p-%m.profraw + run: cargo test --features serialize,span - name: Run tests (all features) env: LLVM_PROFILE_FILE: coverage/all-features-%p-%m.profraw diff --git a/Cargo.toml b/Cargo.toml index a1fe074d..eab8dd12 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -180,6 +180,10 @@ serde-types = ["serde/derive"] ## If you need that, use the `serde-types` feature. serialize = ["serde"] # "dep:" prefix only avalible from Rust 1.60 +## Enables gathering spans in events, which can slowdown parser a bit and increase +## memory consumption for events +span = [] + [package.metadata.docs.rs] # document all features all-features = true diff --git a/src/de/mod.rs b/src/de/mod.rs index 09f9c59f..bc3f2001 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -1885,6 +1885,11 @@ mod map; mod simple_type; mod var; +#[cfg(feature = "span")] +use crate::events::Spanned; +#[cfg(feature = "span")] +use crate::reader::Span; + pub use crate::errors::serialize::DeError; use crate::{ encoding::Decoder, @@ -1907,7 +1912,7 @@ pub(crate) const TEXT_KEY: &str = "$text"; pub(crate) const VALUE_KEY: &str = "$value"; /// Simplified event which contains only these variants that used by deserializer -#[derive(Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum DeEvent<'a> { /// Start tag (with attributes) ``. Start(BytesStart<'a>), @@ -1922,6 +1927,31 @@ pub enum DeEvent<'a> { Eof, } +#[cfg(feature = "span")] +impl<'a> Spanned for DeEvent<'a> { + #[inline] + fn span(&self) -> Span { + match self { + DeEvent::Start(e) => e.span(), + DeEvent::End(e) => e.span(), + DeEvent::Text(e) => e.span(), + DeEvent::CData(e) => e.span(), + DeEvent::Eof => Span::default(), + } + } + + #[inline] + fn with_span(self, span: Span) -> Self { + match self { + DeEvent::Start(e) => DeEvent::Start(e.with_span(span)), + DeEvent::End(e) => DeEvent::End(e.with_span(span)), + DeEvent::Text(e) => DeEvent::Text(e.with_span(span)), + DeEvent::CData(e) => DeEvent::CData(e.with_span(span)), + DeEvent::Eof => DeEvent::Eof, + } + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// /// A structure that deserializes XML into Rust values. @@ -2662,6 +2692,28 @@ mod tests { use super::*; use pretty_assertions::assert_eq; + /// Helper function that removes span information from the events + fn next<'de>(de: &mut Deserializer<'de, SliceReader<'de>>) -> DeEvent<'de> { + let event = de.next().unwrap(); + + // We do not test correctness of spans here so just clear them + #[cfg(feature = "span")] + let event = event.with_span(Span::default()); + + event + } + + /// Helper function that removes span information from the events + fn peek<'de>(de: &mut Deserializer<'de, SliceReader<'de>>) -> DeEvent<'de> { + let event = de.peek().unwrap().clone(); + + // We do not test correctness of spans here so just clear them + #[cfg(feature = "span")] + let event = event.with_span(Span::default()); + + event + } + #[cfg(feature = "overlapped-lists")] mod skip { use super::*; @@ -2669,6 +2721,21 @@ mod tests { use crate::events::{BytesEnd, BytesText}; use pretty_assertions::assert_eq; + fn clear<'de>(events: &VecDeque>) -> Vec> { + events + .iter() + .map(|event| { + let event = event.clone(); + + // We do not test correctness of spans here so just clear them + #[cfg(feature = "span")] + let event = event.with_span(Span::default()); + + event + }) + .collect() + } + /// Checks that `peek()` and `read()` behaves correctly after `skip()` #[test] fn read_and_peek() { @@ -2689,8 +2756,8 @@ mod tests { assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); - assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner"))); + assert_eq!(next(&mut de), Start(BytesStart::new("root"))); + assert_eq!(peek(&mut de), Start(BytesStart::new("inner"))); // Mark that start_replay() should begin replay from this point let checkpoint = de.skip_checkpoint(); @@ -2700,7 +2767,7 @@ mod tests { de.skip().unwrap(); assert_eq!(de.read, vec![]); assert_eq!( - de.write, + clear(&de.write), vec![ Start(BytesStart::new("inner")), Text(BytesText::from_escaped("text")), @@ -2718,8 +2785,8 @@ mod tests { // // // - assert_eq!(de.next().unwrap(), Start(BytesStart::new("next"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("next"))); + assert_eq!(next(&mut de), Start(BytesStart::new("next"))); + assert_eq!(next(&mut de), End(BytesEnd::new("next"))); // We finish writing. Next call to `next()` should start replay that messages: // @@ -2734,7 +2801,7 @@ mod tests { // de.start_replay(checkpoint); assert_eq!( - de.read, + clear(&de.read), vec![ Start(BytesStart::new("inner")), Text(BytesText::from_escaped("text")), @@ -2744,7 +2811,7 @@ mod tests { ] ); assert_eq!(de.write, vec![]); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); + assert_eq!(next(&mut de), Start(BytesStart::new("inner"))); // Mark that start_replay() should begin replay from this point let checkpoint = de.skip_checkpoint(); @@ -2753,7 +2820,7 @@ mod tests { // Skip `$text` node and consume after it de.skip().unwrap(); assert_eq!( - de.read, + clear(&de.read), vec![ Start(BytesStart::new("inner")), End(BytesEnd::new("inner")), @@ -2761,7 +2828,7 @@ mod tests { ] ); assert_eq!( - de.write, + clear(&de.write), vec![ // This comment here to keep the same formatting of both arrays // otherwise rustfmt suggest one-line it @@ -2769,8 +2836,8 @@ mod tests { ] ); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); + assert_eq!(next(&mut de), Start(BytesStart::new("inner"))); + assert_eq!(next(&mut de), End(BytesEnd::new("inner"))); // We finish writing. Next call to `next()` should start replay messages: // @@ -2783,19 +2850,19 @@ mod tests { // de.start_replay(checkpoint); assert_eq!( - de.read, + clear(&de.read), vec![ Text(BytesText::from_escaped("text")), End(BytesEnd::new("inner")), ] ); assert_eq!(de.write, vec![]); - assert_eq!(de.next().unwrap(), Text(BytesText::from_escaped("text"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("target"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("target"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); - assert_eq!(de.next().unwrap(), Eof); + assert_eq!(next(&mut de), Text(BytesText::from_escaped("text"))); + assert_eq!(next(&mut de), End(BytesEnd::new("inner"))); + assert_eq!(next(&mut de), Start(BytesStart::new("target"))); + assert_eq!(next(&mut de), End(BytesEnd::new("target"))); + assert_eq!(next(&mut de), End(BytesEnd::new("root"))); + assert_eq!(next(&mut de), Eof); } /// Checks that `read_to_end()` behaves correctly after `skip()` @@ -2819,7 +2886,7 @@ mod tests { assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); + assert_eq!(next(&mut de), Start(BytesStart::new("root"))); // Mark that start_replay() should begin replay from this point let checkpoint = de.skip_checkpoint(); @@ -2829,7 +2896,7 @@ mod tests { de.skip().unwrap(); assert_eq!(de.read, vec![]); assert_eq!( - de.write, + clear(&de.write), vec![ Start(BytesStart::new("skip")), Text(BytesText::from_escaped("text")), @@ -2846,11 +2913,11 @@ mod tests { // // // - assert_eq!(de.next().unwrap(), Start(BytesStart::new("target"))); + assert_eq!(next(&mut de), Start(BytesStart::new("target"))); de.read_to_end(QName(b"target")).unwrap(); assert_eq!(de.read, vec![]); assert_eq!( - de.write, + clear(&de.write), vec![ Start(BytesStart::new("skip")), Text(BytesText::from_escaped("text")), @@ -2872,7 +2939,7 @@ mod tests { // de.start_replay(checkpoint); assert_eq!( - de.read, + clear(&de.read), vec![ Start(BytesStart::new("skip")), Text(BytesText::from_escaped("text")), @@ -2883,11 +2950,11 @@ mod tests { ); assert_eq!(de.write, vec![]); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip"))); + assert_eq!(next(&mut de), Start(BytesStart::new("skip"))); de.read_to_end(QName(b"skip")).unwrap(); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); - assert_eq!(de.next().unwrap(), Eof); + assert_eq!(next(&mut de), End(BytesEnd::new("root"))); + assert_eq!(next(&mut de), Eof); } /// Checks that replay replayes only part of events @@ -2913,7 +2980,7 @@ mod tests { assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); + assert_eq!(next(&mut de), Start(BytesStart::new("root"))); // start_replay() should start replay from this point let checkpoint1 = de.skip_checkpoint(); @@ -2924,7 +2991,7 @@ mod tests { de.skip().unwrap(); // skipped-2 assert_eq!(de.read, vec![]); assert_eq!( - de.write, + clear(&de.write), vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), @@ -2935,10 +3002,10 @@ mod tests { //////////////////////////////////////////////////////////////////////////////////////// - assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); - assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3"))); + assert_eq!(next(&mut de), Start(BytesStart::new("inner"))); + assert_eq!(peek(&mut de), Start(BytesStart::new("skipped-3"))); assert_eq!( - de.read, + clear(&de.read), vec![ // This comment here to keep the same formatting of both arrays // otherwise rustfmt suggest one-line it @@ -2946,7 +3013,7 @@ mod tests { ] ); assert_eq!( - de.write, + clear(&de.write), vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), @@ -2964,7 +3031,7 @@ mod tests { de.skip().unwrap(); // skipped-4 assert_eq!(de.read, vec![]); assert_eq!( - de.write, + clear(&de.write), vec![ // checkpoint 1 Start(BytesStart::new("skipped-1")), @@ -2978,11 +3045,11 @@ mod tests { End(BytesEnd::new("skipped-4")), ] ); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2"))); - assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner"))); + assert_eq!(next(&mut de), Start(BytesStart::new("target-2"))); + assert_eq!(next(&mut de), End(BytesEnd::new("target-2"))); + assert_eq!(peek(&mut de), End(BytesEnd::new("inner"))); assert_eq!( - de.read, + clear(&de.read), vec![ // This comment here to keep the same formatting of both arrays // otherwise rustfmt suggest one-line it @@ -2990,7 +3057,7 @@ mod tests { ] ); assert_eq!( - de.write, + clear(&de.write), vec![ // checkpoint 1 Start(BytesStart::new("skipped-1")), @@ -3008,7 +3075,7 @@ mod tests { // Start replay events from checkpoint 2 de.start_replay(checkpoint2); assert_eq!( - de.read, + clear(&de.read), vec![ Start(BytesStart::new("skipped-3")), End(BytesEnd::new("skipped-3")), @@ -3018,7 +3085,7 @@ mod tests { ] ); assert_eq!( - de.write, + clear(&de.write), vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), @@ -3028,15 +3095,15 @@ mod tests { ); // Replayed events - assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3"))); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4"))); + assert_eq!(next(&mut de), Start(BytesStart::new("skipped-3"))); + assert_eq!(next(&mut de), End(BytesEnd::new("skipped-3"))); + assert_eq!(next(&mut de), Start(BytesStart::new("skipped-4"))); + assert_eq!(next(&mut de), End(BytesEnd::new("skipped-4"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); + assert_eq!(next(&mut de), End(BytesEnd::new("inner"))); assert_eq!(de.read, vec![]); assert_eq!( - de.write, + clear(&de.write), vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), @@ -3048,12 +3115,12 @@ mod tests { //////////////////////////////////////////////////////////////////////////////////////// // New events - assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1"))); + assert_eq!(next(&mut de), Start(BytesStart::new("target-1"))); + assert_eq!(next(&mut de), End(BytesEnd::new("target-1"))); assert_eq!(de.read, vec![]); assert_eq!( - de.write, + clear(&de.write), vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), @@ -3065,7 +3132,7 @@ mod tests { // Start replay events from checkpoint 1 de.start_replay(checkpoint1); assert_eq!( - de.read, + clear(&de.read), vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), @@ -3076,17 +3143,17 @@ mod tests { assert_eq!(de.write, vec![]); // Replayed events - assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1"))); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2"))); + assert_eq!(next(&mut de), Start(BytesStart::new("skipped-1"))); + assert_eq!(next(&mut de), End(BytesEnd::new("skipped-1"))); + assert_eq!(next(&mut de), Start(BytesStart::new("skipped-2"))); + assert_eq!(next(&mut de), End(BytesEnd::new("skipped-2"))); assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); // New events - assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); - assert_eq!(de.next().unwrap(), Eof); + assert_eq!(next(&mut de), End(BytesEnd::new("root"))); + assert_eq!(next(&mut de), Eof); } /// Checks that limiting buffer size works correctly @@ -3132,7 +3199,7 @@ mod tests { let checkpoint = de.skip_checkpoint(); de.skip().unwrap(); de.start_replay(checkpoint); - assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]); + assert_eq!(clear(&de.read), vec![Start(BytesStart::new("root")), Eof]); } } @@ -3153,34 +3220,34 @@ mod tests { "#, ); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); + assert_eq!(next(&mut de), Start(BytesStart::new("root"))); assert_eq!( - de.next().unwrap(), + next(&mut de), Start(BytesStart::from_content(r#"tag a="1""#, 3)) ); assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ()); assert_eq!( - de.next().unwrap(), + next(&mut de), Start(BytesStart::from_content(r#"tag a="2""#, 3)) ); - assert_eq!(de.next().unwrap(), CData(BytesCData::new("cdata content"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag"))); + assert_eq!(next(&mut de), CData(BytesCData::new("cdata content"))); + assert_eq!(next(&mut de), End(BytesEnd::new("tag"))); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed"))); + assert_eq!(next(&mut de), Start(BytesStart::new("self-closed"))); assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ()); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); - assert_eq!(de.next().unwrap(), Eof); + assert_eq!(next(&mut de), End(BytesEnd::new("root"))); + assert_eq!(next(&mut de), Eof); } #[test] fn invalid_xml() { let mut de = Deserializer::from_str(""); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag"))); - assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag"))); + assert_eq!(next(&mut de), Start(BytesStart::new("tag"))); + assert_eq!(peek(&mut de), Start(BytesStart::new("tag"))); match de.read_to_end(QName(b"tag")) { Err(DeError::UnexpectedEof) => (), @@ -3241,6 +3308,11 @@ mod tests { loop { let event = reader.next().unwrap(); + + // We do not test correctness of spans here so just clear them + #[cfg(feature = "span")] + let event = event.with_span(Span::default()); + if let DeEvent::Eof = event { break; } diff --git a/src/events/mod.rs b/src/events/mod.rs index 535032a9..dcb7c953 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -48,9 +48,23 @@ use crate::encoding::Decoder; use crate::errors::{Error, Result}; use crate::escape::{escape, partial_escape, unescape_with}; use crate::name::{LocalName, QName}; +#[cfg(feature = "span")] +use crate::reader::Span; use crate::utils::write_cow_string; use attributes::{Attribute, Attributes}; +/// A trait for acquiring the start and end locations of a parsing event in an input +#[cfg(feature = "span")] +pub trait Spanned { + /// Returns a span over the location of a parsing event + fn span(&self) -> Span; + + /// Sets the span of this parsing event to a given value + fn with_span(self, span: Span) -> Self; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + /// Opening tag data (`Event::Start`), with optional attributes. /// /// ``. @@ -67,6 +81,11 @@ pub struct BytesStart<'a> { pub(crate) buf: Cow<'a, [u8]>, /// end of the element name, the name starts at that the start of `buf` pub(crate) name_len: usize, + + /// A span that covers event from beginning `<` to the end `>` (i.e. [`Span::end`] + /// is a one byte after `>`) + #[cfg(feature = "span")] + span: Span, } impl<'a> BytesStart<'a> { @@ -76,6 +95,9 @@ impl<'a> BytesStart<'a> { BytesStart { buf: Cow::Borrowed(content), name_len, + + #[cfg(feature = "span")] + span: Span::default(), } } @@ -90,6 +112,9 @@ impl<'a> BytesStart<'a> { BytesStart { name_len: buf.len(), buf, + + #[cfg(feature = "span")] + span: Span::default(), } } @@ -105,6 +130,9 @@ impl<'a> BytesStart<'a> { BytesStart { buf: str_cow_to_bytes(content), name_len, + + #[cfg(feature = "span")] + span: Span::default(), } } @@ -113,6 +141,9 @@ impl<'a> BytesStart<'a> { BytesStart { buf: Cow::Owned(self.buf.into_owned()), name_len: self.name_len, + + #[cfg(feature = "span")] + span: self.span, } } @@ -121,6 +152,9 @@ impl<'a> BytesStart<'a> { BytesStart { buf: Cow::Owned(self.buf.to_owned().into()), name_len: self.name_len, + + #[cfg(feature = "span")] + span: self.span.clone(), } } @@ -153,6 +187,9 @@ impl<'a> BytesStart<'a> { BytesStart { buf: Cow::Borrowed(&self.buf), name_len: self.name_len, + + #[cfg(feature = "span")] + span: self.span.clone(), } } @@ -267,13 +304,26 @@ impl<'a> BytesStart<'a> { } Ok(None) } + + /// Returns a span for a tag name + #[cfg(feature = "span")] + pub fn name_span(&self) -> Span { + // +1: skip `<` + let start = self.span.start + 1; + start..start + self.name_len + } } impl<'a> Debug for BytesStart<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesStart {{ buf: ")?; write_cow_string(f, &self.buf)?; - write!(f, ", name_len: {} }}", self.name_len) + write!(f, ", name_len: {}", self.name_len)?; + + #[cfg(feature = "span")] + write!(f, ", span: {:?}", &self.span)?; + + write!(f, " }}") } } @@ -285,6 +335,20 @@ impl<'a> Deref for BytesStart<'a> { } } +#[cfg(feature = "span")] +impl<'a> Spanned for BytesStart<'a> { + #[inline] + fn span(&self) -> Span { + self.span.clone() + } + + #[inline] + fn with_span(mut self, span: Span) -> Self { + self.span = span; + self + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// /// An XML declaration (`Event::Decl`). @@ -531,19 +595,43 @@ impl<'a> Deref for BytesDecl<'a> { } } +#[cfg(feature = "span")] +impl<'a> Spanned for BytesDecl<'a> { + #[inline] + fn span(&self) -> Span { + self.content.span() + } + + #[inline] + fn with_span(mut self, span: Span) -> Self { + self.content.span = span; + self + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// /// A struct to manage `Event::End` events #[derive(Clone, Eq, PartialEq)] pub struct BytesEnd<'a> { name: Cow<'a, [u8]>, + + /// A span that covers event from beginning `<` to the end `>` (i.e. [`Span::end`] + /// is one byte after `>`) + #[cfg(feature = "span")] + span: Span, } impl<'a> BytesEnd<'a> { /// Internal constructor, used by `Reader`. Supplies data in reader's encoding #[inline] pub(crate) fn wrap(name: Cow<'a, [u8]>) -> Self { - BytesEnd { name } + Self { + name, + + #[cfg(feature = "span")] + span: Span::default(), + } } /// Creates a new `BytesEnd` borrowing a slice. @@ -560,6 +648,9 @@ impl<'a> BytesEnd<'a> { pub fn into_owned(self) -> BytesEnd<'static> { BytesEnd { name: Cow::Owned(self.name.into_owned()), + + #[cfg(feature = "span")] + span: self.span, } } @@ -568,6 +659,9 @@ impl<'a> BytesEnd<'a> { pub fn borrow(&self) -> BytesEnd { BytesEnd { name: Cow::Borrowed(&self.name), + + #[cfg(feature = "span")] + span: self.span.clone(), } } @@ -585,12 +679,24 @@ impl<'a> BytesEnd<'a> { pub fn local_name(&self) -> LocalName { self.name().into() } + + /// Returns a span for a tag name + #[cfg(feature = "span")] + pub fn name_span(&self) -> Span { + // +2: skip ` Debug for BytesEnd<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesEnd {{ name: ")?; write_cow_string(f, &self.name)?; + + #[cfg(feature = "span")] + write!(f, ", span: {:?}", &self.span)?; + write!(f, " }}") } } @@ -603,6 +709,20 @@ impl<'a> Deref for BytesEnd<'a> { } } +#[cfg(feature = "span")] +impl<'a> Spanned for BytesEnd<'a> { + #[inline] + fn span(&self) -> Span { + self.span.clone() + } + + #[inline] + fn with_span(mut self, span: Span) -> Self { + self.span = span; + self + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// /// Data from various events (most notably, `Event::Text`) that stored in XML @@ -615,6 +735,10 @@ pub struct BytesText<'a> { content: Cow<'a, [u8]>, /// Encoding in which the `content` is stored inside the event decoder: Decoder, + + /// A span that covers event + #[cfg(feature = "span")] + span: Span, } impl<'a> BytesText<'a> { @@ -624,6 +748,9 @@ impl<'a> BytesText<'a> { Self { content: content.into(), decoder, + + #[cfg(feature = "span")] + span: Span::default(), } } @@ -647,6 +774,9 @@ impl<'a> BytesText<'a> { BytesText { content: self.content.into_owned().into(), decoder: self.decoder, + + #[cfg(feature = "span")] + span: self.span, } } @@ -662,6 +792,9 @@ impl<'a> BytesText<'a> { BytesText { content: Cow::Borrowed(&self.content), decoder: self.decoder, + + #[cfg(feature = "span")] + span: self.span.clone(), } } @@ -721,6 +854,10 @@ impl<'a> Debug for BytesText<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesText {{ content: ")?; write_cow_string(f, &self.content)?; + + #[cfg(feature = "span")] + write!(f, ", span: {:?}", &self.span)?; + write!(f, " }}") } } @@ -733,6 +870,20 @@ impl<'a> Deref for BytesText<'a> { } } +#[cfg(feature = "span")] +impl<'a> Spanned for BytesText<'a> { + #[inline] + fn span(&self) -> Span { + self.span.clone() + } + + #[inline] + fn with_span(mut self, span: Span) -> Self { + self.span = span; + self + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// /// CDATA content contains unescaped data from the reader. If you want to write them as a text, @@ -742,6 +893,11 @@ pub struct BytesCData<'a> { content: Cow<'a, [u8]>, /// Encoding in which the `content` is stored inside the event decoder: Decoder, + + /// A span that covers event from beginning `<` to the end `>` (i.e. [`Span::end`] + /// is one byte after `>`) + #[cfg(feature = "span")] + span: Span, } impl<'a> BytesCData<'a> { @@ -751,6 +907,9 @@ impl<'a> BytesCData<'a> { Self { content: content.into(), decoder, + + #[cfg(feature = "span")] + span: Span::default(), } } @@ -771,6 +930,9 @@ impl<'a> BytesCData<'a> { BytesCData { content: self.content.into_owned().into(), decoder: self.decoder, + + #[cfg(feature = "span")] + span: self.span, } } @@ -786,6 +948,9 @@ impl<'a> BytesCData<'a> { BytesCData { content: Cow::Borrowed(&self.content), decoder: self.decoder, + + #[cfg(feature = "span")] + span: self.span.clone(), } } @@ -852,6 +1017,10 @@ impl<'a> Debug for BytesCData<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesCData {{ content: ")?; write_cow_string(f, &self.content)?; + + #[cfg(feature = "span")] + write!(f, ", span: {:?}", &self.span)?; + write!(f, " }}") } } @@ -864,6 +1033,20 @@ impl<'a> Deref for BytesCData<'a> { } } +#[cfg(feature = "span")] +impl<'a> Spanned for BytesCData<'a> { + #[inline] + fn span(&self) -> Span { + self.span.clone() + } + + #[inline] + fn with_span(mut self, span: Span) -> Self { + self.span = span; + self + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// /// Event emitted by [`Reader::read_event_into`]. @@ -953,6 +1136,41 @@ impl<'a> AsRef> for Event<'a> { } } +#[cfg(feature = "span")] +impl<'a> Spanned for Event<'a> { + #[inline] + fn span(&self) -> Span { + match self { + Event::Start(e) => e.span(), + Event::End(e) => e.span(), + Event::Empty(e) => e.span(), + Event::Text(e) => e.span(), + Event::Comment(e) => e.span(), + Event::CData(e) => e.span(), + Event::Decl(e) => e.span(), + Event::PI(e) => e.span(), + Event::DocType(e) => e.span(), + Event::Eof => Span::default(), + } + } + + #[inline] + fn with_span(self, span: Span) -> Self { + match self { + Event::Start(e) => Event::Start(e.with_span(span)), + Event::End(e) => Event::End(e.with_span(span)), + Event::Empty(e) => Event::Empty(e.with_span(span)), + Event::Text(e) => Event::Text(e.with_span(span)), + Event::Comment(e) => Event::Comment(e.with_span(span)), + Event::CData(e) => Event::CData(e.with_span(span)), + Event::Decl(e) => Event::Decl(e.with_span(span)), + Event::PI(e) => Event::PI(e.with_span(span)), + Event::DocType(e) => Event::DocType(e.with_span(span)), + Event::Eof => Event::Eof, + } + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// #[inline] diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs index ba8ef70a..264532d1 100644 --- a/src/reader/async_tokio.rs +++ b/src/reader/async_tokio.rs @@ -111,6 +111,8 @@ impl Reader { /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; + /// # #[cfg(feature = "span")] + /// # use quick_xml::events::Spanned; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// @@ -130,6 +132,9 @@ impl Reader { /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// + /// # #[cfg(feature = "span")] + /// # let start = start.with_span(5..12); + /// /// // First, we read a start event... /// assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Event::Start(start)); /// @@ -246,6 +251,8 @@ impl NsReader { /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; + /// # #[cfg(feature = "span")] + /// # use quick_xml::events::Spanned; /// use quick_xml::name::{Namespace, ResolveResult}; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::NsReader; @@ -271,6 +278,9 @@ impl NsReader { /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// + /// # #[cfg(feature = "span")] + /// # let start = start.with_span(5..32); + /// /// // First, we read a start event... /// assert_eq!( /// reader.read_resolved_event_into_async(&mut buf).await.unwrap(), diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index d17946de..7dba51ff 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -350,6 +350,8 @@ impl Reader { /// /// ``` /// # use pretty_assertions::assert_eq; + /// # #[cfg(feature = "span")] + /// # use quick_xml::events::Spanned; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// @@ -369,6 +371,9 @@ impl Reader { /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// + /// # #[cfg(feature = "span")] + /// # let start = start.with_span(5..12); + /// /// // First, we read a start event... /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start)); /// diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 77ed187c..1fc216b3 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -165,7 +165,7 @@ macro_rules! read_event_impl { }, ParseState::ClosedTag => $self.$read_until_open($buf) $(.$await)?, ParseState::OpenedTag => $self.$read_until_close($buf) $(.$await)?, - ParseState::Empty => $self.parser.close_expanded_empty(), + ParseState::Empty(start) => $self.parser.close_expanded_empty(start), ParseState::Exit => return Ok(Event::Eof), }; match event { @@ -194,11 +194,14 @@ macro_rules! read_until_open { return $self.$read_event($buf) $(.$await)?; } + // Position just after `>` + let start = $self.parser.offset; + match $reader .read_bytes_until(b'<', $buf, &mut $self.parser.offset) $(.$await)? { - Ok(Some(bytes)) => $self.parser.read_text(bytes), + Ok(Some(bytes)) => $self.parser.read_text(start, bytes), Ok(None) => Ok(Event::Eof), Err(e) => Err(e), } @@ -213,6 +216,8 @@ macro_rules! read_until_close { ) => {{ $self.parser.state = ParseState::ClosedTag; + // Cannot substract -1 here because in case of malformed documents `offset` is zero + let start = $self.parser.offset; match $reader.peek_one() $(.$await)? { // ` match $reader @@ -220,7 +225,7 @@ macro_rules! read_until_close { $(.$await)? { Ok(None) => Ok(Event::Eof), - Ok(Some((bang_type, bytes))) => $self.parser.read_bang(bang_type, bytes), + Ok(Some((bang_type, bytes))) => $self.parser.read_bang(bang_type, start, bytes), Err(e) => Err(e), }, // ` Ok(Event::Eof), - Ok(Some(bytes)) => $self.parser.read_end(bytes), + Ok(Some(bytes)) => $self.parser.read_end(start - 1, bytes), Err(e) => Err(e), }, // ` Ok(Event::Eof), - Ok(Some(bytes)) => $self.parser.read_question_mark(bytes), + Ok(Some(bytes)) => $self.parser.read_question_mark(start - 1, bytes), Err(e) => Err(e), }, // `<...` - opening or self-closed tag @@ -247,7 +252,7 @@ macro_rules! read_until_close { $(.$await)? { Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => $self.parser.read_start(bytes), + Ok(Some(bytes)) => $self.parser.read_start(start - 1, bytes), Err(e) => Err(e), }, Ok(None) => Ok(Event::Eof), @@ -341,7 +346,9 @@ enum ParseState { /// Reader enters to this state when it is in a `ClosedTag` state and emits an /// [`Event::Start`] event. The next event emitted will be an [`Event::End`], /// after which reader returned to the `ClosedTag` state. - Empty, + /// + /// Contains start offset of the buffer, used to close empty tags + Empty(usize), /// Reader enters this state when `Eof` event generated or an error occurred. /// This is the last state, the reader stay in it forever. Exit, @@ -1627,6 +1634,8 @@ mod test { /// Ensures, that no empty `Text` events are generated mod $read_event { use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; + #[cfg(feature = "span")] + use crate::events::Spanned; use crate::reader::Reader; use pretty_assertions::assert_eq; @@ -1639,9 +1648,13 @@ mod test { $($async)? fn bom_from_reader() { let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes()); + let expected = BytesText::from_escaped("\u{feff}"); + #[cfg(feature = "span")] + let expected = expected.with_span(0..3); + assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), - Event::Text(BytesText::from_escaped("\u{feff}")) + Event::Text(expected) ); assert_eq!( @@ -1659,9 +1672,13 @@ mod test { $($async)? fn bom_from_str() { let mut reader = Reader::from_str("\u{feff}\u{feff}"); + let expected = BytesText::from_escaped("\u{feff}"); + #[cfg(feature = "span")] + let expected = expected.with_span(0..3); + assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), - Event::Text(BytesText::from_escaped("\u{feff}")) + Event::Text(expected) ); assert_eq!( @@ -1674,9 +1691,14 @@ mod test { $($async)? fn declaration() { let mut reader = Reader::from_str(""); + let expected = BytesDecl::from_start(BytesStart::from_content("xml ", 3)); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..8); + assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), - Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3))) + Event::Decl(expected) ); } @@ -1684,9 +1706,14 @@ mod test { $($async)? fn doctype() { let mut reader = Reader::from_str(""); + let expected = BytesText::from_escaped("x"); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..12); + assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), - Event::DocType(BytesText::from_escaped("x")) + Event::DocType(expected) ); } @@ -1694,9 +1721,14 @@ mod test { $($async)? fn processing_instruction() { let mut reader = Reader::from_str(""); + let expected = BytesText::from_escaped("xml-stylesheet"); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..18); + assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), - Event::PI(BytesText::from_escaped("xml-stylesheet")) + Event::PI(expected) ); } @@ -1704,9 +1736,14 @@ mod test { $($async)? fn start() { let mut reader = Reader::from_str(""); + let expected = BytesStart::new("tag"); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..5); + assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), - Event::Start(BytesStart::new("tag")) + Event::Start(expected) ); } @@ -1717,9 +1754,14 @@ mod test { // the end name paired with the start name reader.check_end_names(false); + let expected = BytesEnd::new("tag"); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..6); + assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), - Event::End(BytesEnd::new("tag")) + Event::End(expected) ); } @@ -1727,9 +1769,14 @@ mod test { $($async)? fn empty() { let mut reader = Reader::from_str(""); + let expected = BytesStart::new("tag"); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..6); + assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), - Event::Empty(BytesStart::new("tag")) + Event::Empty(expected) ); } @@ -1737,9 +1784,14 @@ mod test { $($async)? fn text() { let mut reader = Reader::from_str("text"); + let expected = BytesText::from_escaped("text"); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..4); + assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), - Event::Text(BytesText::from_escaped("text")) + Event::Text(expected) ); } @@ -1747,9 +1799,14 @@ mod test { $($async)? fn cdata() { let mut reader = Reader::from_str(""); + let expected = BytesCData::new(""); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..12); + assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), - Event::CData(BytesCData::new("")) + Event::CData(expected) ); } @@ -1757,9 +1814,14 @@ mod test { $($async)? fn comment() { let mut reader = Reader::from_str(""); + let expected = BytesText::from_escaped(""); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..7); + assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), - Event::Comment(BytesText::from_escaped("")) + Event::Comment(expected) ); } @@ -1785,6 +1847,8 @@ mod test { ) => { mod small_buffers { use crate::events::{BytesCData, BytesDecl, BytesStart, BytesText, Event}; + #[cfg(feature = "span")] + use crate::events::Spanned; use crate::reader::Reader; use pretty_assertions::assert_eq; @@ -1797,9 +1861,15 @@ mod test { let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); + let expected = BytesDecl::from_start(BytesStart::from_content("xml ", 3)); + + // We do not test correctness of spans here so just clear them + #[cfg(feature = "span")] + let expected = expected.with_span(0..8); + assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3))) + Event::Decl(expected) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), @@ -1816,9 +1886,14 @@ mod test { let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); + let expected = BytesText::new("pi"); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..6); + assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::PI(BytesText::new("pi")) + Event::PI(expected) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), @@ -1835,9 +1910,14 @@ mod test { let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); + let expected = BytesStart::new("empty"); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..8); + assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Empty(BytesStart::new("empty")) + Event::Empty(expected) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), @@ -1854,9 +1934,14 @@ mod test { let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); + let expected = BytesCData::new("cdata"); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..17); + assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::CData(BytesCData::new("cdata")) + Event::CData(expected) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), @@ -1873,9 +1958,14 @@ mod test { let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); + let expected = BytesCData::new("cdata"); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..17); + assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::CData(BytesCData::new("cdata")) + Event::CData(expected) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), @@ -1892,9 +1982,14 @@ mod test { let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); + let expected = BytesText::new("comment"); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..14); + assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Comment(BytesText::new("comment")) + Event::Comment(expected) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), @@ -1911,9 +2006,14 @@ mod test { let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); + let expected = BytesText::new("comment"); + + #[cfg(feature = "span")] + let expected = expected.with_span(0..14); + assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Comment(BytesText::new("comment")) + Event::Comment(expected) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 4ce816fa..a59a940d 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -470,6 +470,8 @@ impl NsReader { /// /// ``` /// # use pretty_assertions::assert_eq; + /// # #[cfg(feature = "span")] + /// # use quick_xml::events::Spanned; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::name::{Namespace, ResolveResult}; /// use quick_xml::reader::NsReader; @@ -495,6 +497,9 @@ impl NsReader { /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// + /// # #[cfg(feature = "span")] + /// # let start = start.with_span(5..32); + /// /// // First, we read a start event... /// assert_eq!( /// reader.read_resolved_event_into(&mut buf).unwrap(), @@ -707,6 +712,8 @@ impl<'i> NsReader<&'i [u8]> { /// /// ``` /// # use pretty_assertions::assert_eq; + /// # #[cfg(feature = "span")] + /// # use quick_xml::events::Spanned; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::name::{Namespace, ResolveResult}; /// use quick_xml::reader::NsReader; @@ -731,6 +738,9 @@ impl<'i> NsReader<&'i [u8]> { /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// + /// # #[cfg(feature = "span")] + /// # let start = start.with_span(5..32); + /// /// // First, we read a start event... /// assert_eq!( /// reader.read_resolved_event().unwrap(), @@ -789,6 +799,8 @@ impl<'i> NsReader<&'i [u8]> { /// ``` /// # use pretty_assertions::assert_eq; /// # use std::borrow::Cow; + /// # #[cfg(feature = "span")] + /// # use quick_xml::events::Spanned; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::NsReader; /// @@ -804,6 +816,9 @@ impl<'i> NsReader<&'i [u8]> { /// let start = BytesStart::new("html"); /// let end = start.to_end().into_owned(); /// + /// # #[cfg(feature = "span")] + /// # let start = start.with_span(5..11); + /// /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// // ...and disable checking of end names because we expect HTML further... diff --git a/src/reader/parser.rs b/src/reader/parser.rs index 38b6f49a..b6f01744 100644 --- a/src/reader/parser.rs +++ b/src/reader/parser.rs @@ -8,6 +8,9 @@ use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Even use crate::reader::EncodingRef; use crate::reader::{is_whitespace, BangType, ParseState}; +#[cfg(feature = "span")] +use crate::events::Spanned; + use memchr; /// A struct that holds a current parse state and a parser configuration. @@ -65,7 +68,7 @@ impl Parser { /// - `bytes`: data from the start of stream to the first `<` or from `>` to `<` /// /// [`Text`]: Event::Text - pub fn read_text<'b>(&mut self, bytes: &'b [u8]) -> Result> { + pub fn read_text<'b>(&mut self, _start: usize, bytes: &'b [u8]) -> Result> { let mut content = bytes; if self.trim_text_end { @@ -77,12 +80,23 @@ impl Parser { content = &bytes[..len]; } - Ok(Event::Text(BytesText::wrap(content, self.decoder()))) + let event = BytesText::wrap(content, self.decoder()); + + // Represents text content without trimmed spaces + #[cfg(feature = "span")] + let event = event.with_span(_start.._start + content.len()); + + Ok(Event::Text(event)) } /// reads `BytesElement` starting with a `!`, /// return `Comment`, `CData` or `DocType` event - pub fn read_bang<'b>(&mut self, bang_type: BangType, buf: &'b [u8]) -> Result> { + pub fn read_bang<'b>( + &mut self, + bang_type: BangType, + _start: usize, + buf: &'b [u8], + ) -> Result> { let uncased_starts_with = |string: &[u8], prefix: &[u8]| { string.len() >= prefix.len() && string[..prefix.len()].eq_ignore_ascii_case(prefix) }; @@ -100,17 +114,22 @@ impl Parser { return Err(Error::UnexpectedToken("--".to_string())); } } - Ok(Event::Comment(BytesText::wrap( - &buf[3..len - 2], - self.decoder(), - ))) + + let event = BytesText::wrap(&buf[3..len - 2], self.decoder()); + + #[cfg(feature = "span")] + let event = event.with_span(_start - 1..self.offset); + + Ok(Event::Comment(event)) } BangType::CData if uncased_starts_with(buf, b"![CDATA[") => { debug_assert!(buf.ends_with(b"]]")); - Ok(Event::CData(BytesCData::wrap( - &buf[8..len - 2], - self.decoder(), - ))) + let event = BytesCData::wrap(&buf[8..len - 2], self.decoder()); + + #[cfg(feature = "span")] + let event = event.with_span(_start - 1..self.offset); + + Ok(Event::CData(event)) } BangType::DocType if uncased_starts_with(buf, b"!DOCTYPE") => { let start = buf[8..] @@ -118,10 +137,12 @@ impl Parser { .position(|b| !is_whitespace(*b)) .unwrap_or(len - 8); debug_assert!(start < len - 8, "DocType must have a name"); - Ok(Event::DocType(BytesText::wrap( - &buf[8 + start..], - self.decoder(), - ))) + let event = BytesText::wrap(&buf[8 + start..], self.decoder()); + + #[cfg(feature = "span")] + let event = event.with_span(_start - 1..self.offset); + + Ok(Event::DocType(event)) } _ => Err(bang_type.to_err()), } @@ -129,7 +150,7 @@ impl Parser { /// Wraps content of `buf` into the [`Event::End`] event. Does the check that /// end name matches the last opened start name if `self.check_end_names` is set. - pub fn read_end<'b>(&mut self, buf: &'b [u8]) -> Result> { + pub fn read_end<'b>(&mut self, _start: usize, buf: &'b [u8]) -> Result> { // XML standard permits whitespaces after the markup name in closing tags. // Let's strip them from the buffer before comparing tag names. let name = if self.trim_markup_names_in_closing_tags { @@ -175,17 +196,25 @@ impl Parser { } } - Ok(Event::End(BytesEnd::wrap(name.into()))) + let event = BytesEnd::wrap(name.into()); + + #[cfg(feature = "span")] + let event = event.with_span(_start..self.offset); + + Ok(Event::End(event)) } /// reads `BytesElement` starting with a `?`, /// return `Decl` or `PI` event - pub fn read_question_mark<'b>(&mut self, buf: &'b [u8]) -> Result> { + pub fn read_question_mark<'b>(&mut self, _start: usize, buf: &'b [u8]) -> Result> { let len = buf.len(); if len > 2 && buf[len - 1] == b'?' { if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) { let event = BytesDecl::from_start(BytesStart::wrap(&buf[1..len - 1], 3)); + #[cfg(feature = "span")] + let event = event.with_span(_start..self.offset); + // Try getting encoding from the declaration event #[cfg(feature = "encoding")] if self.encoding.can_be_refined() { @@ -196,7 +225,12 @@ impl Parser { Ok(Event::Decl(event)) } else { - Ok(Event::PI(BytesText::wrap(&buf[1..len - 1], self.decoder()))) + let event = BytesText::wrap(&buf[1..len - 1], self.decoder()); + + #[cfg(feature = "span")] + let event = event.with_span(_start..self.offset); + + Ok(Event::PI(event)) } } else { self.offset -= len; @@ -206,19 +240,24 @@ impl Parser { /// reads `BytesElement` starting with any character except `/`, `!` or ``?` /// return `Start` or `Empty` event - pub fn read_start<'b>(&mut self, buf: &'b [u8]) -> Result> { + pub fn read_start<'b>(&mut self, start: usize, buf: &'b [u8]) -> Result> { // TODO: do this directly when reading bufreader ... let len = buf.len(); let name_end = buf.iter().position(|&b| is_whitespace(b)).unwrap_or(len); if let Some(&b'/') = buf.last() { let end = if name_end < len { name_end } else { len - 1 }; + let event = BytesStart::wrap(&buf[..len - 1], end); + + #[cfg(feature = "span")] + let event = event.with_span(start..self.offset); + if self.expand_empty_elements { - self.state = ParseState::Empty; + self.state = ParseState::Empty(start); self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(&buf[..end]); - Ok(Event::Start(BytesStart::wrap(&buf[..len - 1], end))) + Ok(Event::Start(event)) } else { - Ok(Event::Empty(BytesStart::wrap(&buf[..len - 1], end))) + Ok(Event::Empty(event)) } } else { // #514: Always store names event when .check_end_names == false, @@ -226,17 +265,29 @@ impl Parser { // enabled, we should have that information self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(&buf[..name_end]); - Ok(Event::Start(BytesStart::wrap(buf, name_end))) + let event = BytesStart::wrap(buf, name_end); + + // Represents ``, including `<` and `>` + #[cfg(feature = "span")] + let event = event.with_span(start..self.offset); + + Ok(Event::Start(event)) } } #[inline] - pub fn close_expanded_empty(&mut self) -> Result> { + pub fn close_expanded_empty(&mut self, _start: usize) -> Result> { self.state = ParseState::ClosedTag; let name = self .opened_buffer .split_off(self.opened_starts.pop().unwrap()); - Ok(Event::End(BytesEnd::wrap(name.into()))) + let event = BytesEnd::wrap(name.into()); + + // `offset` unchanged since emitting `Start` event + #[cfg(feature = "span")] + let event = event.with_span(_start..self.offset); + + Ok(Event::End(event)) } /// Get the decoder, used to decode bytes, read by this reader, to the strings. diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index f9ff503a..8354478f 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -117,6 +117,8 @@ impl<'a> Reader<&'a [u8]> { /// /// ``` /// # use pretty_assertions::assert_eq; + /// # #[cfg(feature = "span")] + /// # use quick_xml::events::Spanned; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// @@ -135,6 +137,9 @@ impl<'a> Reader<&'a [u8]> { /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// + /// # #[cfg(feature = "span")] + /// # let start = start.with_span(5..12); + /// /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// @@ -184,6 +189,8 @@ impl<'a> Reader<&'a [u8]> { /// ``` /// # use pretty_assertions::assert_eq; /// # use std::borrow::Cow; + /// # #[cfg(feature = "span")] + /// # use quick_xml::events::Spanned; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// @@ -199,6 +206,9 @@ impl<'a> Reader<&'a [u8]> { /// let start = BytesStart::new("html"); /// let end = start.to_end().into_owned(); /// + /// # #[cfg(feature = "span")] + /// # let start = start.with_span(5..11); + /// /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// // ...and disable checking of end names because we expect HTML further... diff --git a/tests/issues.rs b/tests/issues.rs index a19355ba..a9306d97 100644 --- a/tests/issues.rs +++ b/tests/issues.rs @@ -49,6 +49,20 @@ mod issue514 { use super::*; use pretty_assertions::assert_eq; + #[cfg(not(feature = "span"))] + fn read_event<'a>(reader: &'a mut Reader<&[u8]>) -> Event<'a> { + reader.read_event().unwrap() + } + + // We do not test correctness of spans here so just clear them + #[cfg(feature = "span")] + fn read_event<'a>(reader: &'a mut Reader<&[u8]>) -> Event<'a> { + use quick_xml::events::Spanned; + use quick_xml::reader::Span; + + reader.read_event().unwrap().with_span(Span::default()) + } + /// Check that there is no unexpected error #[test] fn no_mismatch() { @@ -60,8 +74,8 @@ mod issue514 { let html_start = BytesStart::new("html"); let html_end = html_start.to_end().into_owned(); - assert_eq!(reader.read_event().unwrap(), Event::Start(outer_start)); - assert_eq!(reader.read_event().unwrap(), Event::Start(html_start)); + assert_eq!(read_event(&mut reader), Event::Start(outer_start)); + assert_eq!(read_event(&mut reader), Event::Start(html_start)); reader.check_end_names(false); @@ -69,8 +83,8 @@ mod issue514 { reader.check_end_names(true); - assert_eq!(reader.read_event().unwrap(), Event::End(outer_end)); - assert_eq!(reader.read_event().unwrap(), Event::Eof); + assert_eq!(read_event(&mut reader), Event::End(outer_end)); + assert_eq!(read_event(&mut reader), Event::Eof); } /// Canary check that legitimate error is reported @@ -83,8 +97,8 @@ mod issue514 { let html_start = BytesStart::new("html"); let html_end = html_start.to_end().into_owned(); - assert_eq!(reader.read_event().unwrap(), Event::Start(outer_start)); - assert_eq!(reader.read_event().unwrap(), Event::Start(html_start)); + assert_eq!(read_event(&mut reader), Event::Start(outer_start)); + assert_eq!(read_event(&mut reader), Event::Start(html_start)); reader.check_end_names(false);