Skip to content

Commit ad89a00

Browse files
committed
Properly use xsi:nil to deserialize null values via serde
This commit fixes an issue that causes `quick-xml` trying to deserialize empty tags via the serde interface even if these tags were explicitly marked as `xsi:nil="true"` For example the following XML failed to deserialize before this commit: ```xml <bar> <foo xsi:nil="true"/> </bar> ``` into the following rust type: ```rust #[derive(Deserialize)] struct Bar { foo: Option<Inner>, } #[derive(Deserialize)] struct Foo { baz: String, } ``` Before this commit this failed to deserialize with an error message that complained that the `baz` field was missing. After this commit this uses the `xsi:nil` attribute to deserialize this into `foo: None` instead. The standard (https://www.w3.org/TR/xmlschema-1/#xsi_nil) seems to support this behaviour. Fix #497
1 parent 8f91a9c commit ad89a00

File tree

4 files changed

+249
-13
lines changed

4 files changed

+249
-13
lines changed

src/de/map.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -540,8 +540,16 @@ where
540540
where
541541
V: Visitor<'de>,
542542
{
543-
match self.map.de.peek()? {
543+
let _ = self.map.de.peek()?;
544+
match self.map.de.readable_peek().expect("This exists as we called peek before") {
544545
DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
546+
DeEvent::Start(start)
547+
// if the `xsi:nil` attribute is set to true we got a none value
548+
if start.has_nil_attr(&self.map.de.reader.reader) =>
549+
{
550+
self.map.de.skip_nil_tag()?;
551+
visitor.visit_none()
552+
}
545553
_ => visitor.visit_some(self),
546554
}
547555
}

src/de/mod.rs

Lines changed: 75 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2010,14 +2010,15 @@ pub use self::resolver::{EntityResolver, PredefinedEntityResolver};
20102010
pub use self::simple_type::SimpleTypeDeserializer;
20112011
pub use crate::errors::serialize::DeError;
20122012

2013+
use crate::name::{LocalName, Namespace, PrefixDeclaration, ResolveResult};
20132014
use crate::{
20142015
de::map::ElementMapAccess,
20152016
encoding::Decoder,
20162017
errors::Error,
20172018
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
20182019
name::QName,
2019-
reader::Reader,
20202020
utils::CowRef,
2021+
NsReader,
20212022
};
20222023
use serde::de::{
20232024
self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor,
@@ -2534,6 +2535,17 @@ where
25342535
}
25352536
}
25362537

2538+
fn readable_peek(&self) -> Option<&DeEvent<'de>> {
2539+
#[cfg(not(feature = "overlapped-lists"))]
2540+
{
2541+
self.peek.as_ref()
2542+
}
2543+
#[cfg(feature = "overlapped-lists")]
2544+
{
2545+
self.read.front()
2546+
}
2547+
}
2548+
25372549
fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
25382550
// Replay skipped or peeked events
25392551
#[cfg(feature = "overlapped-lists")]
@@ -2764,6 +2776,14 @@ where
27642776
}
27652777
self.reader.read_to_end(name)
27662778
}
2779+
2780+
fn skip_nil_tag(&mut self) -> Result<(), DeError> {
2781+
let DeEvent::Start(start) = self.next()? else {
2782+
unreachable!("Only call this if the next event is a start event")
2783+
};
2784+
let name = start.name();
2785+
self.read_to_end(name)
2786+
}
27672787
}
27682788

27692789
impl<'de> Deserializer<'de, SliceReader<'de>> {
@@ -2783,7 +2803,7 @@ where
27832803
/// Create new deserializer that will borrow data from the specified string
27842804
/// and use specified entity resolver.
27852805
pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
2786-
let mut reader = Reader::from_str(source);
2806+
let mut reader = NsReader::from_str(source);
27872807
let config = reader.config_mut();
27882808
config.expand_empty_elements = true;
27892809

@@ -2826,7 +2846,7 @@ where
28262846
/// will borrow instead of copy. If you have `&[u8]` which is known to represent
28272847
/// UTF-8, you can decode it first before using [`from_str`].
28282848
pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
2829-
let mut reader = Reader::from_reader(reader);
2849+
let mut reader = NsReader::from_reader(reader);
28302850
let config = reader.config_mut();
28312851
config.expand_empty_elements = true;
28322852

@@ -2945,9 +2965,17 @@ where
29452965
where
29462966
V: Visitor<'de>,
29472967
{
2948-
match self.peek()? {
2968+
let _ = self.peek()?;
2969+
match self.readable_peek().expect("This exists as we called peek before") {
29492970
DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
29502971
DeEvent::Eof => visitor.visit_none(),
2972+
DeEvent::Start(start)
2973+
// if the `xsi:nil` attribute is set to true we got a none value
2974+
if start.has_nil_attr(&self.reader.reader) =>
2975+
{
2976+
self.skip_nil_tag()?;
2977+
visitor.visit_none()
2978+
}
29512979
_ => visitor.visit_some(self),
29522980
}
29532981
}
@@ -3071,14 +3099,22 @@ pub trait XmlRead<'i> {
30713099

30723100
/// A copy of the reader's decoder used to decode strings.
30733101
fn decoder(&self) -> Decoder;
3102+
3103+
/// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_.
3104+
///
3105+
/// See [`NsReader::resolve_attribute`] for details
3106+
fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>);
3107+
3108+
/// Get the current default namespace
3109+
fn default_namespace(&self) -> Option<Namespace<'_>>;
30743110
}
30753111

30763112
/// XML input source that reads from a std::io input stream.
30773113
///
30783114
/// You cannot create it, it is created automatically when you call
30793115
/// [`Deserializer::from_reader`]
30803116
pub struct IoReader<R: BufRead> {
3081-
reader: Reader<R>,
3117+
reader: NsReader<R>,
30823118
start_trimmer: StartTrimmer,
30833119
buf: Vec<u8>,
30843120
}
@@ -3113,7 +3149,7 @@ impl<R: BufRead> IoReader<R> {
31133149
/// assert_eq!(reader.error_position(), 28);
31143150
/// assert_eq!(reader.buffer_position(), 41);
31153151
/// ```
3116-
pub const fn get_ref(&self) -> &Reader<R> {
3152+
pub const fn get_ref(&self) -> &NsReader<R> {
31173153
&self.reader
31183154
}
31193155
}
@@ -3140,14 +3176,28 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
31403176
fn decoder(&self) -> Decoder {
31413177
self.reader.decoder()
31423178
}
3179+
3180+
fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
3181+
self.reader.resolve_attribute(name)
3182+
}
3183+
3184+
fn default_namespace(&self) -> Option<Namespace<'_>> {
3185+
self.reader.prefixes().find_map(|(key, value)| {
3186+
if PrefixDeclaration::Default == key {
3187+
Some(value)
3188+
} else {
3189+
None
3190+
}
3191+
})
3192+
}
31433193
}
31443194

31453195
/// XML input source that reads from a slice of bytes and can borrow from it.
31463196
///
31473197
/// You cannot create it, it is created automatically when you call
31483198
/// [`Deserializer::from_str`].
31493199
pub struct SliceReader<'de> {
3150-
reader: Reader<&'de [u8]>,
3200+
reader: NsReader<&'de [u8]>,
31513201
start_trimmer: StartTrimmer,
31523202
}
31533203

@@ -3180,7 +3230,7 @@ impl<'de> SliceReader<'de> {
31803230
/// assert_eq!(reader.error_position(), 28);
31813231
/// assert_eq!(reader.buffer_position(), 41);
31823232
/// ```
3183-
pub const fn get_ref(&self) -> &Reader<&'de [u8]> {
3233+
pub const fn get_ref(&self) -> &NsReader<&'de [u8]> {
31843234
&self.reader
31853235
}
31863236
}
@@ -3205,6 +3255,20 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
32053255
fn decoder(&self) -> Decoder {
32063256
self.reader.decoder()
32073257
}
3258+
3259+
fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
3260+
self.reader.resolve_attribute(name)
3261+
}
3262+
3263+
fn default_namespace(&self) -> Option<Namespace<'_>> {
3264+
self.reader.prefixes().find_map(|(key, value)| {
3265+
if PrefixDeclaration::Default == key {
3266+
Some(value)
3267+
} else {
3268+
None
3269+
}
3270+
})
3271+
}
32083272
}
32093273

32103274
#[cfg(test)]
@@ -3781,12 +3845,12 @@ mod tests {
37813845
"#;
37823846

37833847
let mut reader1 = IoReader {
3784-
reader: Reader::from_reader(s.as_bytes()),
3848+
reader: NsReader::from_reader(s.as_bytes()),
37853849
start_trimmer: StartTrimmer::default(),
37863850
buf: Vec::new(),
37873851
};
37883852
let mut reader2 = SliceReader {
3789-
reader: Reader::from_str(s),
3853+
reader: NsReader::from_str(s),
37903854
start_trimmer: StartTrimmer::default(),
37913855
};
37923856

@@ -3812,7 +3876,7 @@ mod tests {
38123876
"#;
38133877

38143878
let mut reader = SliceReader {
3815-
reader: Reader::from_str(s),
3879+
reader: NsReader::from_str(s),
38163880
start_trimmer: StartTrimmer::default(),
38173881
};
38183882

src/events/mod.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,17 @@ use crate::errors::{Error, IllFormedError};
5151
use crate::escape::{
5252
escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
5353
};
54+
#[cfg(feature = "serialize")]
55+
use crate::name::Namespace;
5456
use crate::name::{LocalName, QName};
5557
#[cfg(feature = "serialize")]
5658
use crate::utils::CowRef;
5759
use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
5860
use attributes::{AttrError, Attribute, Attributes};
5961

62+
#[cfg(feature = "serialize")]
63+
const XSI_NAMESPACE_URL: Namespace = Namespace(b"http://www.w3.org/2001/XMLSchema-instance");
64+
6065
/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
6166
///
6267
/// The name can be accessed using the [`name`] or [`local_name`] methods.
@@ -232,6 +237,34 @@ impl<'a> BytesStart<'a> {
232237
Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
233238
}
234239
}
240+
241+
/// This method checks if the current tag has a `xsi::nil` attribute
242+
///
243+
/// This attribute should be used for deciding if the value is not set
244+
/// according to https://www.w3.org/TR/xmlschema-1/#xsi_nil
245+
#[cfg(feature = "serialize")]
246+
pub(crate) fn has_nil_attr(&self, reader: &dyn crate::de::XmlRead) -> bool {
247+
use crate::name::ResolveResult;
248+
249+
let default_ns = reader.default_namespace();
250+
self.attributes().any(|attr| {
251+
if let Ok(attr) = attr {
252+
let value_is_true = &*attr.value == b"true" || &*attr.value == b"1";
253+
let might_be_nil_attr = attr.key.0.ends_with(b"nil");
254+
if value_is_true && might_be_nil_attr {
255+
let (res, local_name) = reader.resolve_attribute(attr.key);
256+
(matches!(res, ResolveResult::Bound(XSI_NAMESPACE_URL))
257+
|| (matches!(res, ResolveResult::Unbound)
258+
&& default_ns == Some(XSI_NAMESPACE_URL)))
259+
&& local_name.as_ref() == b"nil"
260+
} else {
261+
false
262+
}
263+
} else {
264+
false
265+
}
266+
})
267+
}
235268
}
236269

237270
/// Attribute-related methods
@@ -278,7 +311,7 @@ impl<'a> BytesStart<'a> {
278311
}
279312

280313
/// Returns an iterator over the attributes of this tag.
281-
pub fn attributes(&self) -> Attributes {
314+
pub fn attributes<'b>(&'b self) -> Attributes<'b> {
282315
Attributes::wrap(&self.buf, self.name_len, false)
283316
}
284317

0 commit comments

Comments
 (0)