Skip to content

Commit 908ac15

Browse files
committed
Add new example which demonstrates new Reader API
1 parent 0d8ac7b commit 908ac15

File tree

2 files changed

+193
-1
lines changed

2 files changed

+193
-1
lines changed

examples/high-level-entities.rs

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
//! This example demonstrate how custom entities can be extracted from the DOCTYPE
2+
//! and usage of the high-level `Reader` API.
3+
//!
4+
//! NB: this example is deliberately kept simple:
5+
//! * the regex in this example is simple but brittle;
6+
//! * it does not support the use of entities in entity declaration.
7+
8+
use std::borrow::Cow;
9+
use std::collections::HashMap;
10+
use std::convert::Infallible;
11+
use std::io::{BufRead, Cursor};
12+
13+
use quick_xml::events::{BytesEnd, BytesStart, BytesText};
14+
use quick_xml::reader::{Event, Entity, EntityResolver, Reader, RawReader};
15+
use regex::bytes::Regex;
16+
17+
use pretty_assertions::assert_eq;
18+
19+
const XML1: &str = r#"
20+
<!DOCTYPE test [
21+
<!ENTITY text "hello world" >
22+
<!ENTITY element1 "<dtd attr = 'Message: &text;'/>" >
23+
<!ENTITY element2 "<a>&element1;</a>" >
24+
]>
25+
<test label="Message: &text;">&element2;</test>
26+
&external;
27+
"#;
28+
29+
/// Additional document which in reality would be referenced by
30+
/// `<!ENTITY external SYSTEM "URI to the document, for example, relative file path" >`
31+
const XML2: &str = r#"
32+
<?xml version='1.0'?>
33+
<external>text</external>
34+
"#;
35+
36+
struct MyResolver<'i> {
37+
/// Map of captured internal _parsed general entities_. _Parsed_ means that
38+
/// value of the entity is parsed by XML reader.
39+
entities: HashMap<Cow<'i, [u8]>, Cow<'i, [u8]>>,
40+
/// In this example we use simple regular expression to capture entities from DTD.
41+
/// In real application you should use DTD parser.
42+
entity_re: Regex,
43+
}
44+
impl<'i> MyResolver<'i> {
45+
fn new() -> Result<Self, regex::Error> {
46+
Ok(Self {
47+
entities: Default::default(),
48+
// Capture "name" and "content" from such string:
49+
// <!ENTITY name "content" >
50+
entity_re: Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?,
51+
})
52+
}
53+
fn capture_borrowed(&mut self, doctype: &'i [u8]) {
54+
for cap in self.entity_re.captures_iter(doctype) {
55+
self.entities.insert(
56+
cap.get(1).unwrap().as_bytes().into(),
57+
cap.get(2).unwrap().as_bytes().into(),
58+
);
59+
}
60+
}
61+
fn capture_owned(&mut self, doctype: Vec<u8>) {
62+
for cap in self.entity_re.captures_iter(&doctype) {
63+
self.entities.insert(
64+
cap.get(1).unwrap().as_bytes().to_owned().into(),
65+
cap.get(2).unwrap().as_bytes().to_owned().into(),
66+
);
67+
}
68+
}
69+
}
70+
impl<'i> EntityResolver<'i> for MyResolver<'i> {
71+
type Error = Infallible;
72+
73+
fn capture(&mut self, doctype: BytesText<'i>) -> Result<(), Self::Error> {
74+
match doctype.into_inner() {
75+
Cow::Borrowed(doctype) => self.capture_borrowed(doctype),
76+
Cow::Owned(doctype) => self.capture_owned(doctype),
77+
}
78+
Ok(())
79+
}
80+
81+
fn resolve(&self, entity: &str) -> Option<Entity<'i>> {
82+
if entity == "external" {
83+
return Some(Entity::External(Box::new(Cursor::new(XML2.as_bytes()))));
84+
}
85+
match self.entities.get(entity.as_bytes()) {
86+
Some(Cow::Borrowed(replacement)) => Some(Entity::Internal(replacement)),
87+
Some(Cow::Owned(replacement)) => {
88+
Some(Entity::External(Box::new(Cursor::new(replacement.clone()))))
89+
}
90+
None => None,
91+
}
92+
}
93+
}
94+
95+
/// In this example the events will borrow from the first document
96+
fn borrowed() -> Result<(), Box<dyn std::error::Error>> {
97+
let mut reader = RawReader::from_str(XML1);
98+
reader.config_mut().trim_text(true);
99+
100+
let mut r = Reader::borrowed(reader, MyResolver::new()?);
101+
102+
assert_eq!(
103+
r.read_event()?,
104+
Event::Start(BytesStart::from_content(
105+
r#"test label="Message: &text;""#,
106+
4
107+
))
108+
);
109+
110+
//--------------------------------------------------------------------------
111+
// This part was inserted into original document from entity defined in DTD
112+
assert_eq!(r.read_event()?, Event::Start(BytesStart::new("a")));
113+
assert_eq!(
114+
r.read_event()?,
115+
Event::Empty(BytesStart::from_content(
116+
r#"dtd attr = 'Message: &text;'"#,
117+
3
118+
))
119+
);
120+
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("a")));
121+
//--------------------------------------------------------------------------
122+
123+
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("test")));
124+
125+
//--------------------------------------------------------------------------
126+
// Start of external document
127+
assert_eq!(
128+
r.read_event()?,
129+
Event::Start(BytesStart::new("external"))
130+
);
131+
assert_eq!(r.read_event()?, Event::Text(BytesText::new("text")));
132+
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("external")));
133+
//--------------------------------------------------------------------------
134+
135+
assert_eq!(r.read_event()?, Event::Eof);
136+
137+
Ok(())
138+
}
139+
140+
/// In this example the events will always copy data
141+
fn buffered() -> Result<(), Box<dyn std::error::Error>> {
142+
let boxed: Box<dyn BufRead> = Box::new(Cursor::new(XML1.as_bytes()));
143+
let mut reader = RawReader::from_reader(boxed);
144+
reader.config_mut().trim_text(true);
145+
146+
let mut r = Reader::buffered(reader, MyResolver::new()?);
147+
148+
assert_eq!(
149+
r.read_event()?,
150+
Event::Start(BytesStart::from_content(
151+
r#"test label="Message: &text;""#,
152+
4
153+
))
154+
);
155+
156+
//--------------------------------------------------------------------------
157+
// This part was inserted into original document from entity defined in DTD
158+
assert_eq!(r.read_event()?, Event::Start(BytesStart::new("a")));
159+
assert_eq!(
160+
r.read_event()?,
161+
Event::Empty(BytesStart::from_content(
162+
r#"dtd attr = 'Message: &text;'"#,
163+
3
164+
))
165+
);
166+
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("a")));
167+
//--------------------------------------------------------------------------
168+
169+
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("test")));
170+
171+
//--------------------------------------------------------------------------
172+
// Start of external document
173+
assert_eq!(
174+
r.read_event()?,
175+
Event::Start(BytesStart::new("external"))
176+
);
177+
assert_eq!(r.read_event()?, Event::Text(BytesText::new("text")));
178+
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("external")));
179+
//--------------------------------------------------------------------------
180+
181+
assert_eq!(r.read_event()?, Event::Eof);
182+
183+
Ok(())
184+
}
185+
186+
fn main() -> Result<(), Box<dyn std::error::Error>> {
187+
// In this example the events will borrow from the first document
188+
borrowed()?;
189+
// In this example the events will always copy data
190+
buffered()?;
191+
Ok(())
192+
}

examples/custom_entities.rs renamed to examples/low-level-entities.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//! This example demonstrate how custom entities can be extracted from the DOCTYPE,
2-
//! and later use to:
2+
//! usage of the low-level `RawReader` API, and later used to:
33
//! - insert new pieces of document (particular case - insert only textual content)
44
//! - decode attribute values
55
//!

0 commit comments

Comments
 (0)