diff --git a/Changelog.md b/Changelog.md index 4e05eef2..cf7bdfea 100644 --- a/Changelog.md +++ b/Changelog.md @@ -37,6 +37,7 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition. - [#649]: Make features linkable and reference them in the docs. - [#619]: Allow to raise application errors in `ElementWriter::write_inner_content` (and newly added `ElementWriter::write_inner_content_async` of course). +- [#662]: Get rid of some allocations during serde deserialization. [#545]: https://github.com/tafia/quick-xml/pull/545 [#580]: https://github.com/tafia/quick-xml/issues/580 @@ -47,6 +48,7 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition. [#651]: https://github.com/tafia/quick-xml/pull/651 [#660]: https://github.com/tafia/quick-xml/pull/660 [#661]: https://github.com/tafia/quick-xml/pull/661 +[#662]: https://github.com/tafia/quick-xml/pull/662 ## 0.30.0 -- 2023-07-23 diff --git a/src/de/map.rs b/src/de/map.rs index 2202194d..3e4af396 100644 --- a/src/de/map.rs +++ b/src/de/map.rs @@ -12,13 +12,13 @@ use crate::{ name::QName, }; use serde::de::value::BorrowedStrDeserializer; -use serde::de::{self, DeserializeSeed, SeqAccess, Visitor}; +use serde::de::{self, DeserializeSeed, MapAccess, SeqAccess, Visitor}; use serde::serde_if_integer128; use std::borrow::Cow; use std::ops::Range; /// Defines a source that should be used to deserialize a value in the next call -/// to [`next_value_seed()`](de::MapAccess::next_value_seed) +/// to [`next_value_seed()`](MapAccess::next_value_seed) #[derive(Debug, PartialEq)] enum ValueSource { /// Source are not specified, because [`next_key_seed()`] not yet called. @@ -28,8 +28,8 @@ enum ValueSource { /// Attempt to call [`next_value_seed()`] while accessor in this state would /// return a [`DeError::KeyNotRead`] error. /// - /// [`next_key_seed()`]: de::MapAccess::next_key_seed - /// [`next_value_seed()`]: de::MapAccess::next_value_seed + /// [`next_key_seed()`]: MapAccess::next_key_seed + /// [`next_value_seed()`]: MapAccess::next_value_seed Unknown, /// Next value should be deserialized from an attribute value; value is located /// at specified span. @@ -62,7 +62,7 @@ enum ValueSource { /// When in this state, next event, returned by [`next()`], will be a [`Start`], /// which represents both a key, and a value. Value would be deserialized from /// the whole element and how is will be done determined by the value deserializer. - /// The [`MapAccess`] do not consume any events in that state. + /// The [`ElementMapAccess`] do not consume any events in that state. /// /// Because in that state any encountered `` is mapped to the [`VALUE_KEY`] /// field, it is possible to use tag name as an enum discriminator, so `enum`s @@ -105,7 +105,7 @@ enum ValueSource { /// [`next()`]: Deserializer::next() /// [`name()`]: BytesStart::name() /// [`Text`]: Self::Text - /// [list of known fields]: MapAccess::fields + /// [list of known fields]: ElementMapAccess::fields Content, /// Next value should be deserialized from an element with a dedicated name. /// If deserialized type is a sequence, then that sequence will collect all @@ -118,7 +118,7 @@ enum ValueSource { /// When in this state, next event, returned by [`next()`], will be a [`Start`], /// which represents both a key, and a value. Value would be deserialized from /// the whole element and how is will be done determined by the value deserializer. - /// The [`MapAccess`] do not consume any events in that state. + /// The [`ElementMapAccess`] do not consume any events in that state. /// /// An illustration below shows, what data is used to deserialize key and value: /// ```xml @@ -164,16 +164,16 @@ enum ValueSource { /// internal buffer of deserializer (i.e. deserializer itself) or an input /// (in that case it is possible to approach zero-copy deserialization). /// -/// - `'a` lifetime represents a parent deserializer, which could own the data +/// - `'d` lifetime represents a parent deserializer, which could own the data /// buffer. -pub(crate) struct MapAccess<'de, 'a, R, E> +pub(crate) struct ElementMapAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Tag -- owner of attributes start: BytesStart<'de>, - de: &'a mut Deserializer<'de, R, E>, + de: &'d mut Deserializer<'de, R, E>, /// State of the iterator over attributes. Contains the next position in the /// inner `start` slice, from which next attribute should be parsed. iter: IterState, @@ -192,18 +192,18 @@ where has_value_field: bool, } -impl<'de, 'a, R, E> MapAccess<'de, 'a, R, E> +impl<'de, 'd, R, E> ElementMapAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { - /// Create a new MapAccess + /// Create a new ElementMapAccess pub fn new( - de: &'a mut Deserializer<'de, R, E>, + de: &'d mut Deserializer<'de, R, E>, start: BytesStart<'de>, fields: &'static [&'static str], ) -> Result { - Ok(MapAccess { + Ok(Self { de, iter: IterState::new(start.name().as_ref().len(), false), start, @@ -214,7 +214,7 @@ where } } -impl<'de, 'a, R, E> de::MapAccess<'de> for MapAccess<'de, 'a, R, E> +impl<'de, 'd, R, E> MapAccess<'de> for ElementMapAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, @@ -289,9 +289,14 @@ where seed.deserialize(de).map(Some) } // Stop iteration after reaching a closing tag - DeEvent::End(e) if e.name() == self.start.name() => Ok(None), - // This is a unmatched closing tag, so the XML is invalid - DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())), + // The matching tag name is guaranteed by the reader if our + // deserializer implementation is correct + DeEvent::End(e) => { + debug_assert_eq!(self.start.name(), e.name()); + // Consume End + self.de.next()?; + Ok(None) + } // We cannot get `Eof` legally, because we always inside of the // opened tag `self.start` DeEvent::Eof => Err(DeError::UnexpectedEof), @@ -403,7 +408,7 @@ macro_rules! forward { /// with the same deserializer; /// - sequences, tuples and tuple structs are deserialized by iterating within the /// parent tag and deserializing each tag or text content using [`SeqItemDeserializer`]; -/// - structs and maps are deserialized using new instance of [`MapAccess`]; +/// - structs and maps are deserialized using new instance of [`ElementMapAccess`]; /// - enums: /// - in case of [`DeEvent::Text`] event the text content is deserialized as /// a `$text` variant. Enum content is deserialized from the text using @@ -419,14 +424,14 @@ macro_rules! forward { /// /// [`deserialize_tuple`]: #method.deserialize_tuple /// [`deserialize_struct`]: #method.deserialize_struct -struct MapValueDeserializer<'de, 'a, 'm, R, E> +struct MapValueDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Access to the map that created this deserializer. Gives access to the /// context, such as list of fields, that current map known about. - map: &'m mut MapAccess<'de, 'a, R, E>, + map: &'m mut ElementMapAccess<'de, 'd, R, E>, /// Determines, should [`Deserializer::read_string_impl()`] expand the second /// level of tags or not. /// @@ -504,7 +509,7 @@ where allow_start: bool, } -impl<'de, 'a, 'm, R, E> MapValueDeserializer<'de, 'a, 'm, R, E> +impl<'de, 'd, 'm, R, E> MapValueDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, @@ -520,7 +525,7 @@ where } } -impl<'de, 'a, 'm, R, E> de::Deserializer<'de> for MapValueDeserializer<'de, 'a, 'm, R, E> +impl<'de, 'd, 'm, R, E> de::Deserializer<'de> for MapValueDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, @@ -543,11 +548,14 @@ where forward!(deserialize_any); - fn deserialize_option(self, visitor: V) -> Result + fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { - deserialize_option!(self.map.de, self, visitor) + match self.map.de.peek()? { + DeEvent::Text(t) if t.is_empty() => visitor.visit_none(), + _ => visitor.visit_some(self), + } } /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] @@ -582,7 +590,7 @@ where // Clone is cheap if event borrows from the input DeEvent::Start(e) => TagFilter::Include(e.clone()), // SAFETY: we use that deserializer with `allow_start == true` - // only from the `MapAccess::next_value_seed` and only when we + // only from the `ElementMapAccess::next_value_seed` and only when we // peeked `Start` event _ => unreachable!(), } @@ -597,11 +605,6 @@ where filter, }) } - - #[inline] - fn is_human_readable(&self) -> bool { - self.map.de.is_human_readable() - } } //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -693,14 +696,14 @@ impl<'de> TagFilter<'de> { /// /// [`Text`]: crate::events::Event::Text /// [`CData`]: crate::events::Event::CData -struct MapValueSeqAccess<'de, 'a, 'm, R, E> +struct MapValueSeqAccess<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Accessor to a map that creates this accessor and to a deserializer for /// a sequence items. - map: &'m mut MapAccess<'de, 'a, R, E>, + map: &'m mut ElementMapAccess<'de, 'd, R, E>, /// Filter that determines whether a tag is a part of this sequence. /// /// When feature [`overlapped-lists`] is not activated, iteration will stop @@ -720,7 +723,7 @@ where } #[cfg(feature = "overlapped-lists")] -impl<'de, 'a, 'm, R, E> Drop for MapValueSeqAccess<'de, 'a, 'm, R, E> +impl<'de, 'd, 'm, R, E> Drop for MapValueSeqAccess<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, @@ -730,7 +733,7 @@ where } } -impl<'de, 'a, 'm, R, E> SeqAccess<'de> for MapValueSeqAccess<'de, 'a, 'm, R, E> +impl<'de, 'd, 'm, R, E> SeqAccess<'de> for MapValueSeqAccess<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, @@ -810,7 +813,7 @@ where /// contains something else other than text, an error is returned, but if it /// contains a text and something else (for example, `text`), /// then the trail is just ignored; -/// - structs and maps are deserialized using new [`MapAccess`]; +/// - structs and maps are deserialized using new [`ElementMapAccess`]; /// - enums: /// - in case of [`DeEvent::Text`] event the text content is deserialized as /// a `$text` variant. Enum content is deserialized from the text using @@ -826,17 +829,17 @@ where /// /// [`deserialize_tuple`]: #method.deserialize_tuple /// [`deserialize_struct`]: #method.deserialize_struct -struct SeqItemDeserializer<'de, 'a, 'm, R, E> +struct SeqItemDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Access to the map that created this deserializer. Gives access to the /// context, such as list of fields, that current map known about. - map: &'m mut MapAccess<'de, 'a, R, E>, + map: &'m mut ElementMapAccess<'de, 'd, R, E>, } -impl<'de, 'a, 'm, R, E> SeqItemDeserializer<'de, 'a, 'm, R, E> +impl<'de, 'd, 'm, R, E> SeqItemDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, @@ -852,7 +855,7 @@ where } } -impl<'de, 'a, 'm, R, E> de::Deserializer<'de> for SeqItemDeserializer<'de, 'a, 'm, R, E> +impl<'de, 'd, 'm, R, E> de::Deserializer<'de> for SeqItemDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, @@ -875,25 +878,27 @@ where forward!(deserialize_any); - fn deserialize_option(self, visitor: V) -> Result + fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { - deserialize_option!(self.map.de, self, visitor) + match self.map.de.peek()? { + DeEvent::Text(t) if t.is_empty() => visitor.visit_none(), + _ => visitor.visit_some(self), + } } /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] - /// with the [`SimpleTypeDeserializer`]. + /// with this deserializer. fn deserialize_newtype_struct( - mut self, + self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { - let text = self.read_string()?; - visitor.visit_newtype_struct(SimpleTypeDeserializer::from_text(text)) + visitor.visit_newtype_struct(self) } /// This method deserializes a sequence inside of element that itself is a @@ -915,11 +920,6 @@ where let text = self.read_string()?; SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor) } - - #[inline] - fn is_human_readable(&self) -> bool { - self.map.de.is_human_readable() - } } //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/de/mod.rs b/src/de/mod.rs index e3e37171..7306f1db 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -1954,16 +1954,6 @@ macro_rules! deserialize_primitives { }; } -macro_rules! deserialize_option { - ($de:expr, $deserializer:ident, $visitor:ident) => { - match $de.peek()? { - DeEvent::Text(t) if t.is_empty() => $visitor.visit_none(), - DeEvent::Eof => $visitor.visit_none(), - _ => $visitor.visit_some($deserializer), - } - }; -} - mod key; mod map; mod resolver; @@ -1974,6 +1964,7 @@ pub use crate::errors::serialize::DeError; pub use resolver::{EntityResolver, NoEntityResolver}; use crate::{ + de::map::ElementMapAccess, encoding::Decoder, errors::Error, events::{BytesCData, BytesEnd, BytesStart, BytesText, Event}, @@ -2124,6 +2115,11 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> { } } + /// Returns `true` if all events was consumed + fn is_empty(&self) -> bool { + matches!(self.lookahead, Ok(PayloadEvent::Eof)) + } + /// Read next event and put it in lookahead, return the current lookahead #[inline(always)] fn next_impl(&mut self) -> Result, DeError> { @@ -2220,8 +2216,8 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> { let result1 = self.reader.read_to_end(name); let result2 = self.reader.read_to_end(name); - // In case of error `next` returns `Eof` - self.lookahead = self.reader.next(); + // In case of error `next_impl` returns `Eof` + let _ = self.next_impl(); result1?; result2?; } @@ -2229,13 +2225,13 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> { // Because this is end event, we already consume the whole tree, so // nothing to do, just update lookahead Ok(PayloadEvent::End(ref e)) if e.name() == name => { - self.lookahead = self.reader.next(); + let _ = self.next_impl(); } Ok(_) => { let result = self.reader.read_to_end(name); - // In case of error `next` returns `Eof` - self.lookahead = self.reader.next(); + // In case of error `next_impl` returns `Eof` + let _ = self.next_impl(); result?; } // Read next lookahead event, unpack error from the current lookahead @@ -2383,6 +2379,19 @@ where } } + /// Returns `true` if all events was consumed. + pub fn is_empty(&self) -> bool { + #[cfg(feature = "overlapped-lists")] + if self.read.is_empty() { + return self.reader.is_empty(); + } + #[cfg(not(feature = "overlapped-lists"))] + if self.peek.is_none() { + return self.reader.is_empty(); + } + false + } + /// Set the maximum number of events that could be skipped during deserialization /// of sequences. /// @@ -2794,13 +2803,7 @@ where V: Visitor<'de>, { match self.next()? { - DeEvent::Start(e) => { - let name = e.name().as_ref().to_vec(); - let map = map::MapAccess::new(self, e, fields)?; - let value = visitor.visit_map(map)?; - self.read_to_end(QName(&name))?; - Ok(value) - } + DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self, e, fields)?), DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())), DeEvent::Text(_) => Err(DeError::ExpectedStart), DeEvent::Eof => Err(DeError::UnexpectedEof), @@ -2875,7 +2878,11 @@ where where V: Visitor<'de>, { - deserialize_option!(self, self, visitor) + match self.peek()? { + DeEvent::Text(t) if t.is_empty() => visitor.visit_none(), + DeEvent::Eof => visitor.visit_none(), + _ => visitor.visit_some(self), + } } fn deserialize_any(self, visitor: V) -> Result @@ -2906,7 +2913,11 @@ where T: DeserializeSeed<'de>, { match self.peek()? { - DeEvent::Eof => Ok(None), + DeEvent::Eof => { + // We need to consume event in order to self.is_empty() worked + self.next()?; + Ok(None) + } // Start(tag), End(tag), Text _ => seed.deserialize(&mut **self).map(Some), diff --git a/src/de/var.rs b/src/de/var.rs index 9c97a9c1..fb12bcac 100644 --- a/src/de/var.rs +++ b/src/de/var.rs @@ -9,33 +9,33 @@ use serde::de::value::BorrowedStrDeserializer; use serde::de::{self, DeserializeSeed, Deserializer as _, Visitor}; /// An enum access -pub struct EnumAccess<'de, 'a, R, E> +pub struct EnumAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { - de: &'a mut Deserializer<'de, R, E>, + de: &'d mut Deserializer<'de, R, E>, } -impl<'de, 'a, R, E> EnumAccess<'de, 'a, R, E> +impl<'de, 'd, R, E> EnumAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { - pub fn new(de: &'a mut Deserializer<'de, R, E>) -> Self { + pub fn new(de: &'d mut Deserializer<'de, R, E>) -> Self { EnumAccess { de } } } -impl<'de, 'a, R, E> de::EnumAccess<'de> for EnumAccess<'de, 'a, R, E> +impl<'de, 'd, R, E> de::EnumAccess<'de> for EnumAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; - type Variant = VariantAccess<'de, 'a, R, E>; + type Variant = VariantAccess<'de, 'd, R, E>; - fn variant_seed(self, seed: V) -> Result<(V::Value, VariantAccess<'de, 'a, R, E>), DeError> + fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> where V: DeserializeSeed<'de>, { @@ -62,25 +62,25 @@ where } } -pub struct VariantAccess<'de, 'a, R, E> +pub struct VariantAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { - de: &'a mut Deserializer<'de, R, E>, + de: &'d mut Deserializer<'de, R, E>, /// `true` if variant should be deserialized from a textual content /// and `false` if from tag is_text: bool, } -impl<'de, 'a, R, E> de::VariantAccess<'de> for VariantAccess<'de, 'a, R, E> +impl<'de, 'd, R, E> de::VariantAccess<'de> for VariantAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; - fn unit_variant(self) -> Result<(), DeError> { + fn unit_variant(self) -> Result<(), Self::Error> { match self.de.next()? { // Consume subtree DeEvent::Start(e) => self.de.read_to_end(e.name()), @@ -92,7 +92,7 @@ where } } - fn newtype_variant_seed(self, seed: T) -> Result + fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { @@ -107,7 +107,7 @@ where } } - fn tuple_variant(self, len: usize, visitor: V) -> Result + fn tuple_variant(self, len: usize, visitor: V) -> Result where V: Visitor<'de>, { @@ -128,7 +128,7 @@ where self, fields: &'static [&'static str], visitor: V, - ) -> Result + ) -> Result where V: Visitor<'de>, { diff --git a/tests/helpers/mod.rs b/tests/helpers/mod.rs index 6d956768..1af3ea74 100644 --- a/tests/helpers/mod.rs +++ b/tests/helpers/mod.rs @@ -17,10 +17,7 @@ where // If type was deserialized, the whole XML document should be consumed if let Ok(_) = result { - match <()>::deserialize(&mut de) { - Err(DeError::UnexpectedEof) => (), - e => panic!("Expected end `UnexpectedEof`, but got {:?}", e), - } + assert!(de.is_empty(), "the whole XML document should be consumed"); } result diff --git a/tests/serde-issues.rs b/tests/serde-issues.rs index 188c3fea..3f3cb7ab 100644 --- a/tests/serde-issues.rs +++ b/tests/serde-issues.rs @@ -5,7 +5,8 @@ use pretty_assertions::assert_eq; use quick_xml::de::from_str; use quick_xml::se::{to_string, to_string_with_root}; -use serde::{Deserialize, Deserializer, Serialize}; +use serde::de::{Deserializer, IgnoredAny}; +use serde::{Deserialize, Serialize}; use std::collections::HashMap; /// Regression tests for https://github.com/tafia/quick-xml/issues/252. @@ -393,10 +394,13 @@ fn issue580() { #[derive(Debug, PartialEq, Eq)] struct Item; impl Item { - fn parse<'de, D>(_deserializer: D) -> Result + fn parse<'de, D>(deserializer: D) -> Result where D: Deserializer<'de>, { + // We should consume something from the deserializer, otherwise this + // leads to infinity loop + IgnoredAny::deserialize(deserializer)?; Ok(Item) } }