|
| 1 | +//! This example demonstrate how custom entities can be extracted from the DOCTYPE |
| 2 | +//! and usage of the high-level `Reader` API. |
| 3 | +//! |
| 4 | +//! NB: this example is deliberately kept simple: |
| 5 | +//! * the regex in this example is simple but brittle; |
| 6 | +//! * it does not support the use of entities in entity declaration. |
| 7 | +
|
| 8 | +use std::borrow::Cow; |
| 9 | +use std::collections::HashMap; |
| 10 | +use std::convert::Infallible; |
| 11 | +use std::io::{BufRead, Cursor}; |
| 12 | + |
| 13 | +use quick_xml::events::{BytesEnd, BytesStart, BytesText}; |
| 14 | +use quick_xml::reader::{Event, Entity, EntityResolver, Reader, RawReader}; |
| 15 | +use regex::bytes::Regex; |
| 16 | + |
| 17 | +use pretty_assertions::assert_eq; |
| 18 | + |
| 19 | +const XML1: &str = r#" |
| 20 | +<!DOCTYPE test [ |
| 21 | +<!ENTITY text "hello world" > |
| 22 | +<!ENTITY element1 "<dtd attr = 'Message: &text;'/>" > |
| 23 | +<!ENTITY element2 "<a>&element1;</a>" > |
| 24 | +]> |
| 25 | +<test label="Message: &text;">&element2;</test> |
| 26 | +&external; |
| 27 | +"#; |
| 28 | + |
| 29 | +/// Additional document which in reality would be referenced by |
| 30 | +/// `<!ENTITY external SYSTEM "URI to the document, for example, relative file path" >` |
| 31 | +const XML2: &str = r#" |
| 32 | +<?xml version='1.0'?> |
| 33 | +<external>text</external> |
| 34 | +"#; |
| 35 | + |
| 36 | +struct MyResolver<'i> { |
| 37 | + /// Map of captured internal _parsed general entities_. _Parsed_ means that |
| 38 | + /// value of the entity is parsed by XML reader. |
| 39 | + entities: HashMap<Cow<'i, [u8]>, Cow<'i, [u8]>>, |
| 40 | + /// In this example we use simple regular expression to capture entities from DTD. |
| 41 | + /// In real application you should use DTD parser. |
| 42 | + entity_re: Regex, |
| 43 | +} |
| 44 | +impl<'i> MyResolver<'i> { |
| 45 | + fn new() -> Result<Self, regex::Error> { |
| 46 | + Ok(Self { |
| 47 | + entities: Default::default(), |
| 48 | + // Capture "name" and "content" from such string: |
| 49 | + // <!ENTITY name "content" > |
| 50 | + entity_re: Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?, |
| 51 | + }) |
| 52 | + } |
| 53 | + fn capture_borrowed(&mut self, doctype: &'i [u8]) { |
| 54 | + for cap in self.entity_re.captures_iter(doctype) { |
| 55 | + self.entities.insert( |
| 56 | + cap.get(1).unwrap().as_bytes().into(), |
| 57 | + cap.get(2).unwrap().as_bytes().into(), |
| 58 | + ); |
| 59 | + } |
| 60 | + } |
| 61 | + fn capture_owned(&mut self, doctype: Vec<u8>) { |
| 62 | + for cap in self.entity_re.captures_iter(&doctype) { |
| 63 | + self.entities.insert( |
| 64 | + cap.get(1).unwrap().as_bytes().to_owned().into(), |
| 65 | + cap.get(2).unwrap().as_bytes().to_owned().into(), |
| 66 | + ); |
| 67 | + } |
| 68 | + } |
| 69 | +} |
| 70 | +impl<'i> EntityResolver<'i> for MyResolver<'i> { |
| 71 | + type Error = Infallible; |
| 72 | + |
| 73 | + fn capture(&mut self, doctype: BytesText<'i>) -> Result<(), Self::Error> { |
| 74 | + match doctype.into_inner() { |
| 75 | + Cow::Borrowed(doctype) => self.capture_borrowed(doctype), |
| 76 | + Cow::Owned(doctype) => self.capture_owned(doctype), |
| 77 | + } |
| 78 | + Ok(()) |
| 79 | + } |
| 80 | + |
| 81 | + fn resolve(&self, entity: &str) -> Option<Entity<'i>> { |
| 82 | + if entity == "external" { |
| 83 | + return Some(Entity::External(Box::new(Cursor::new(XML2.as_bytes())))); |
| 84 | + } |
| 85 | + match self.entities.get(entity.as_bytes()) { |
| 86 | + Some(Cow::Borrowed(replacement)) => Some(Entity::Internal(replacement)), |
| 87 | + Some(Cow::Owned(replacement)) => { |
| 88 | + Some(Entity::External(Box::new(Cursor::new(replacement.clone())))) |
| 89 | + } |
| 90 | + None => None, |
| 91 | + } |
| 92 | + } |
| 93 | +} |
| 94 | + |
| 95 | +/// In this example the events will borrow from the first document |
| 96 | +fn borrowed() -> Result<(), Box<dyn std::error::Error>> { |
| 97 | + let mut reader = RawReader::from_str(XML1); |
| 98 | + reader.config_mut().trim_text(true); |
| 99 | + |
| 100 | + let mut r = Reader::borrowed(reader, MyResolver::new()?); |
| 101 | + |
| 102 | + assert_eq!( |
| 103 | + r.read_event()?, |
| 104 | + Event::Start(BytesStart::from_content( |
| 105 | + r#"test label="Message: &text;""#, |
| 106 | + 4 |
| 107 | + )) |
| 108 | + ); |
| 109 | + |
| 110 | + //-------------------------------------------------------------------------- |
| 111 | + // This part was inserted into original document from entity defined in DTD |
| 112 | + assert_eq!(r.read_event()?, Event::Start(BytesStart::new("a"))); |
| 113 | + assert_eq!( |
| 114 | + r.read_event()?, |
| 115 | + Event::Empty(BytesStart::from_content( |
| 116 | + r#"dtd attr = 'Message: &text;'"#, |
| 117 | + 3 |
| 118 | + )) |
| 119 | + ); |
| 120 | + assert_eq!(r.read_event()?, Event::End(BytesEnd::new("a"))); |
| 121 | + //-------------------------------------------------------------------------- |
| 122 | + |
| 123 | + assert_eq!(r.read_event()?, Event::End(BytesEnd::new("test"))); |
| 124 | + |
| 125 | + //-------------------------------------------------------------------------- |
| 126 | + // Start of external document |
| 127 | + assert_eq!( |
| 128 | + r.read_event()?, |
| 129 | + Event::Start(BytesStart::new("external")) |
| 130 | + ); |
| 131 | + assert_eq!(r.read_event()?, Event::Text(BytesText::new("text"))); |
| 132 | + assert_eq!(r.read_event()?, Event::End(BytesEnd::new("external"))); |
| 133 | + //-------------------------------------------------------------------------- |
| 134 | + |
| 135 | + assert_eq!(r.read_event()?, Event::Eof); |
| 136 | + |
| 137 | + Ok(()) |
| 138 | +} |
| 139 | + |
| 140 | +/// In this example the events will always copy data |
| 141 | +fn buffered() -> Result<(), Box<dyn std::error::Error>> { |
| 142 | + let boxed: Box<dyn BufRead> = Box::new(Cursor::new(XML1.as_bytes())); |
| 143 | + let mut reader = RawReader::from_reader(boxed); |
| 144 | + reader.config_mut().trim_text(true); |
| 145 | + |
| 146 | + let mut r = Reader::buffered(reader, MyResolver::new()?); |
| 147 | + |
| 148 | + assert_eq!( |
| 149 | + r.read_event()?, |
| 150 | + Event::Start(BytesStart::from_content( |
| 151 | + r#"test label="Message: &text;""#, |
| 152 | + 4 |
| 153 | + )) |
| 154 | + ); |
| 155 | + |
| 156 | + //-------------------------------------------------------------------------- |
| 157 | + // This part was inserted into original document from entity defined in DTD |
| 158 | + assert_eq!(r.read_event()?, Event::Start(BytesStart::new("a"))); |
| 159 | + assert_eq!( |
| 160 | + r.read_event()?, |
| 161 | + Event::Empty(BytesStart::from_content( |
| 162 | + r#"dtd attr = 'Message: &text;'"#, |
| 163 | + 3 |
| 164 | + )) |
| 165 | + ); |
| 166 | + assert_eq!(r.read_event()?, Event::End(BytesEnd::new("a"))); |
| 167 | + //-------------------------------------------------------------------------- |
| 168 | + |
| 169 | + assert_eq!(r.read_event()?, Event::End(BytesEnd::new("test"))); |
| 170 | + |
| 171 | + //-------------------------------------------------------------------------- |
| 172 | + // Start of external document |
| 173 | + assert_eq!( |
| 174 | + r.read_event()?, |
| 175 | + Event::Start(BytesStart::new("external")) |
| 176 | + ); |
| 177 | + assert_eq!(r.read_event()?, Event::Text(BytesText::new("text"))); |
| 178 | + assert_eq!(r.read_event()?, Event::End(BytesEnd::new("external"))); |
| 179 | + //-------------------------------------------------------------------------- |
| 180 | + |
| 181 | + assert_eq!(r.read_event()?, Event::Eof); |
| 182 | + |
| 183 | + Ok(()) |
| 184 | +} |
| 185 | + |
| 186 | +fn main() -> Result<(), Box<dyn std::error::Error>> { |
| 187 | + // In this example the events will borrow from the first document |
| 188 | + borrowed()?; |
| 189 | + // In this example the events will always copy data |
| 190 | + buffered()?; |
| 191 | + Ok(()) |
| 192 | +} |
0 commit comments