Skip to content

Commit a44792f

Browse files
authored
Merge pull request #756 from Mingun/pi-type
Introduce dedicated type for processing instruction
2 parents 8d38e4c + 875a10f commit a44792f

File tree

10 files changed

+204
-43
lines changed

10 files changed

+204
-43
lines changed

Changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616

1717
### Misc Changes
1818

19+
- [#650]: Change the type of `Event::PI` to a new dedicated `BytesPI` type.
20+
21+
[#650]: https://github.com/tafia/quick-xml/issues/650
22+
1923

2024
## 0.32.0 -- 2024-06-10
2125

fuzz/fuzz_targets/fuzz_target_1.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ where
4141
}
4242
Ok(Event::Text(ref e))
4343
| Ok(Event::Comment(ref e))
44-
| Ok(Event::PI(ref e))
4544
| Ok(Event::DocType(ref e)) => {
4645
debug_format!(e);
4746
if let Err(err) = e.unescape() {
@@ -56,6 +55,9 @@ where
5655
break;
5756
}
5857
}
58+
Ok(Event::PI(ref e)) => {
59+
debug_format!(e);
60+
}
5961
Ok(Event::Decl(ref e)) => {
6062
debug_format!(e);
6163
let _ = black_box(e.version());

fuzz/fuzz_targets/structured_roundtrip.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
use arbitrary::{Arbitrary, Unstructured};
44
use libfuzzer_sys::fuzz_target;
5-
use quick_xml::events::{BytesCData, BytesText, Event};
5+
use quick_xml::events::{BytesCData, BytesPI, BytesText, Event};
66
use quick_xml::reader::{Config, NsReader, Reader};
77
use quick_xml::writer::Writer;
88
use std::{hint::black_box, io::Cursor};
@@ -71,7 +71,7 @@ fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> {
7171
_ = element_writer.write_cdata_content(BytesCData::new(*text))?;
7272
}
7373
WritePiContent(text) => {
74-
_ = element_writer.write_pi_content(BytesText::from_escaped(*text))?;
74+
_ = element_writer.write_pi_content(BytesPI::new(*text))?;
7575
}
7676
WriteEmpty => {
7777
_ = element_writer.write_empty()?;

src/events/mod.rs

Lines changed: 153 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ use crate::escape::{
5050
escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
5151
};
5252
use crate::name::{LocalName, QName};
53-
use crate::reader::is_whitespace;
53+
use crate::reader::{is_whitespace, name_len};
5454
use crate::utils::write_cow_string;
5555
#[cfg(feature = "serialize")]
5656
use crate::utils::CowRef;
@@ -992,6 +992,157 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
992992

993993
////////////////////////////////////////////////////////////////////////////////////////////////////
994994

995+
/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
996+
///
997+
/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
998+
#[derive(Clone, Eq, PartialEq)]
999+
pub struct BytesPI<'a> {
1000+
content: BytesStart<'a>,
1001+
}
1002+
1003+
impl<'a> BytesPI<'a> {
1004+
/// Creates a new `BytesPI` from a byte sequence in the specified encoding.
1005+
#[inline]
1006+
pub(crate) fn wrap(content: &'a [u8], target_len: usize) -> Self {
1007+
Self {
1008+
content: BytesStart::wrap(content, target_len),
1009+
}
1010+
}
1011+
1012+
/// Creates a new `BytesPI` from a string.
1013+
///
1014+
/// # Warning
1015+
///
1016+
/// `content` must not contain the `?>` sequence.
1017+
#[inline]
1018+
pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
1019+
let buf = str_cow_to_bytes(content);
1020+
let name_len = name_len(&buf);
1021+
Self {
1022+
content: BytesStart { buf, name_len },
1023+
}
1024+
}
1025+
1026+
/// Ensures that all data is owned to extend the object's lifetime if
1027+
/// necessary.
1028+
#[inline]
1029+
pub fn into_owned(self) -> BytesPI<'static> {
1030+
BytesPI {
1031+
content: self.content.into_owned().into(),
1032+
}
1033+
}
1034+
1035+
/// Extracts the inner `Cow` from the `BytesPI` event container.
1036+
#[inline]
1037+
pub fn into_inner(self) -> Cow<'a, [u8]> {
1038+
self.content.buf
1039+
}
1040+
1041+
/// Converts the event into a borrowed event.
1042+
#[inline]
1043+
pub fn borrow(&self) -> BytesPI {
1044+
BytesPI {
1045+
content: self.content.borrow(),
1046+
}
1047+
}
1048+
1049+
/// A target used to identify the application to which the instruction is directed.
1050+
///
1051+
/// # Example
1052+
///
1053+
/// ```
1054+
/// # use pretty_assertions::assert_eq;
1055+
/// use quick_xml::events::BytesPI;
1056+
///
1057+
/// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1058+
/// assert_eq!(instruction.target(), b"xml-stylesheet");
1059+
/// ```
1060+
#[inline]
1061+
pub fn target(&self) -> &[u8] {
1062+
self.content.name().0
1063+
}
1064+
1065+
/// Content of the processing instruction. Contains everything between target
1066+
/// name and the end of the instruction. A direct consequence is that the first
1067+
/// character is always a space character.
1068+
///
1069+
/// # Example
1070+
///
1071+
/// ```
1072+
/// # use pretty_assertions::assert_eq;
1073+
/// use quick_xml::events::BytesPI;
1074+
///
1075+
/// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1076+
/// assert_eq!(instruction.content(), br#" href="style.css""#);
1077+
/// ```
1078+
#[inline]
1079+
pub fn content(&self) -> &[u8] {
1080+
self.content.attributes_raw()
1081+
}
1082+
1083+
/// A view of the processing instructions' content as a list of key-value pairs.
1084+
///
1085+
/// Key-value pairs are used in some processing instructions, for example in
1086+
/// `<?xml-stylesheet?>`.
1087+
///
1088+
/// Returned iterator does not validate attribute values as may required by
1089+
/// target's rules. For example, it doesn't check that substring `?>` is not
1090+
/// present in the attribute value. That shouldn't be the problem when event
1091+
/// is produced by the reader, because reader detects end of processing instruction
1092+
/// by the first `?>` sequence, as required by the specification, and therefore
1093+
/// this sequence cannot appear inside it.
1094+
///
1095+
/// # Example
1096+
///
1097+
/// ```
1098+
/// # use pretty_assertions::assert_eq;
1099+
/// use std::borrow::Cow;
1100+
/// use quick_xml::events::attributes::Attribute;
1101+
/// use quick_xml::events::BytesPI;
1102+
/// use quick_xml::name::QName;
1103+
///
1104+
/// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1105+
/// for attr in instruction.attributes() {
1106+
/// assert_eq!(attr, Ok(Attribute {
1107+
/// key: QName(b"href"),
1108+
/// value: Cow::Borrowed(b"style.css"),
1109+
/// }));
1110+
/// }
1111+
/// ```
1112+
#[inline]
1113+
pub fn attributes(&self) -> Attributes {
1114+
self.content.attributes()
1115+
}
1116+
}
1117+
1118+
impl<'a> Debug for BytesPI<'a> {
1119+
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1120+
write!(f, "BytesPI {{ content: ")?;
1121+
write_cow_string(f, &self.content.buf)?;
1122+
write!(f, " }}")
1123+
}
1124+
}
1125+
1126+
impl<'a> Deref for BytesPI<'a> {
1127+
type Target = [u8];
1128+
1129+
fn deref(&self) -> &[u8] {
1130+
&self.content
1131+
}
1132+
}
1133+
1134+
#[cfg(feature = "arbitrary")]
1135+
impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1136+
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1137+
Ok(Self::new(<&str>::arbitrary(u)?))
1138+
}
1139+
fn size_hint(depth: usize) -> (usize, Option<usize>) {
1140+
return <&str as arbitrary::Arbitrary>::size_hint(depth);
1141+
}
1142+
}
1143+
1144+
////////////////////////////////////////////////////////////////////////////////////////////////////
1145+
9951146
/// Event emitted by [`Reader::read_event_into`].
9961147
///
9971148
/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
@@ -1013,7 +1164,7 @@ pub enum Event<'a> {
10131164
/// XML declaration `<?xml ...?>`.
10141165
Decl(BytesDecl<'a>),
10151166
/// Processing instruction `<?...?>`.
1016-
PI(BytesText<'a>),
1167+
PI(BytesPI<'a>),
10171168
/// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
10181169
DocType(BytesText<'a>),
10191170
/// End of XML document.

src/reader/mod.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -996,6 +996,16 @@ pub(crate) const fn is_whitespace(b: u8) -> bool {
996996
matches!(b, b' ' | b'\r' | b'\n' | b'\t')
997997
}
998998

999+
/// Calculates name from an element-like content. Name is the first word in `content`,
1000+
/// where word boundaries is XML space characters.
1001+
#[inline]
1002+
pub(crate) fn name_len(content: &[u8]) -> usize {
1003+
content
1004+
.iter()
1005+
.position(|&b| is_whitespace(b))
1006+
.unwrap_or(content.len())
1007+
}
1008+
9991009
////////////////////////////////////////////////////////////////////////////////////////////////////
10001010

10011011
#[cfg(test)]
@@ -1687,7 +1697,7 @@ mod test {
16871697

16881698
/// Ensures, that no empty `Text` events are generated
16891699
mod $read_event {
1690-
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
1700+
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
16911701
use crate::reader::Reader;
16921702
use pretty_assertions::assert_eq;
16931703

@@ -1757,7 +1767,7 @@ mod test {
17571767

17581768
assert_eq!(
17591769
reader.$read_event($buf) $(.$await)? .unwrap(),
1760-
Event::PI(BytesText::from_escaped("xml-stylesheet '? >\" "))
1770+
Event::PI(BytesPI::new("xml-stylesheet '? >\" "))
17611771
);
17621772
}
17631773

@@ -1838,7 +1848,7 @@ mod test {
18381848
$(, $async:ident, $await:ident)?
18391849
) => {
18401850
mod small_buffers {
1841-
use crate::events::{BytesCData, BytesDecl, BytesStart, BytesText, Event};
1851+
use crate::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event};
18421852
use crate::reader::Reader;
18431853
use pretty_assertions::assert_eq;
18441854

@@ -1872,7 +1882,7 @@ mod test {
18721882

18731883
assert_eq!(
18741884
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1875-
Event::PI(BytesText::new("pi"))
1885+
Event::PI(BytesPI::new("pi"))
18761886
);
18771887
assert_eq!(
18781888
reader.$read_event(&mut buf) $(.$await)? .unwrap(),

src/reader/state.rs

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ use encoding_rs::UTF_8;
33

44
use crate::encoding::Decoder;
55
use crate::errors::{Error, IllFormedError, Result, SyntaxError};
6-
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
6+
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
77
#[cfg(feature = "encoding")]
88
use crate::reader::EncodingRef;
9-
use crate::reader::{is_whitespace, BangType, Config, ParseState};
9+
use crate::reader::{is_whitespace, name_len, BangType, Config, ParseState};
1010

1111
/// A struct that holds a current reader state and a parser configuration.
1212
/// It is independent on a way of reading data: the reader feed data into it and
@@ -242,7 +242,7 @@ impl ReaderState {
242242

243243
Ok(Event::Decl(event))
244244
} else {
245-
Ok(Event::PI(BytesText::wrap(content, self.decoder())))
245+
Ok(Event::PI(BytesPI::wrap(content, name_len(content))))
246246
}
247247
} else {
248248
// <?....EOF
@@ -258,31 +258,27 @@ impl ReaderState {
258258
/// # Parameters
259259
/// - `content`: Content of a tag between `<` and `>`
260260
pub fn emit_start<'b>(&mut self, content: &'b [u8]) -> Result<Event<'b>> {
261-
let len = content.len();
262-
let name_end = content
263-
.iter()
264-
.position(|&b| is_whitespace(b))
265-
.unwrap_or(len);
266-
if let Some(&b'/') = content.last() {
261+
if let Some(content) = content.strip_suffix(b"/") {
267262
// This is self-closed tag `<something/>`
268-
let name_len = if name_end < len { name_end } else { len - 1 };
269-
let event = BytesStart::wrap(&content[..len - 1], name_len);
263+
let event = BytesStart::wrap(content, name_len(content));
270264

271265
if self.config.expand_empty_elements {
272266
self.state = ParseState::Empty;
273267
self.opened_starts.push(self.opened_buffer.len());
274-
self.opened_buffer.extend(&content[..name_len]);
268+
self.opened_buffer.extend(event.name().as_ref());
275269
Ok(Event::Start(event))
276270
} else {
277271
Ok(Event::Empty(event))
278272
}
279273
} else {
274+
let event = BytesStart::wrap(content, name_len(content));
275+
280276
// #514: Always store names event when .check_end_names == false,
281277
// because checks can be temporary disabled and when they would be
282278
// enabled, we should have that information
283279
self.opened_starts.push(self.opened_buffer.len());
284-
self.opened_buffer.extend(&content[..name_end]);
285-
Ok(Event::Start(BytesStart::wrap(content, name_end)))
280+
self.opened_buffer.extend(event.name().as_ref());
281+
Ok(Event::Start(event))
286282
}
287283
}
288284

src/writer.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::result::Result as StdResult;
66

77
use crate::encoding::UTF8_BOM;
88
use crate::errors::{Error, Result};
9-
use crate::events::{attributes::Attribute, BytesCData, BytesStart, BytesText, Event};
9+
use crate::events::{attributes::Attribute, BytesCData, BytesPI, BytesStart, BytesText, Event};
1010

1111
#[cfg(feature = "async-tokio")]
1212
mod async_tokio;
@@ -551,10 +551,10 @@ impl<'a, W: Write> ElementWriter<'a, W> {
551551
}
552552

553553
/// Write a processing instruction `<?...?>` inside the current element.
554-
pub fn write_pi_content(self, text: BytesText) -> Result<&'a mut Writer<W>> {
554+
pub fn write_pi_content(self, pi: BytesPI) -> Result<&'a mut Writer<W>> {
555555
self.writer
556556
.write_event(Event::Start(self.start_tag.borrow()))?;
557-
self.writer.write_event(Event::PI(text))?;
557+
self.writer.write_event(Event::PI(pi))?;
558558
self.writer
559559
.write_event(Event::End(self.start_tag.to_end()))?;
560560
Ok(self.writer)

0 commit comments

Comments
 (0)