Skip to content

Commit 7531971

Browse files
committed
Split off most encoding-related tests to a separate file
Write a few new ones Fix up descriptions on some decoding functions
1 parent 8f60b58 commit 7531971

File tree

5 files changed

+95
-73
lines changed

5 files changed

+95
-73
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,6 @@ Note that despite not focusing on performance (there are several unnecessary cop
270270
Benchmarking is hard and the results depend on your input file and your machine.
271271

272272
Here on my particular file, quick-xml is around **50 times faster** than [xml-rs](https://crates.io/crates/xml-rs) crate.
273-
_(measurements was done while this crate named quick-xml)_
274273

275274
```
276275
// quick-xml benches

src/encoding.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,9 @@ impl Decoder {
105105
}
106106
}
107107

108-
/// Decodes the provided bytes using the specified encoding, ignoring the BOM
109-
/// if it is present in the `bytes`.
108+
/// Decodes the provided bytes using the specified encoding.
110109
///
111-
/// Returns an error in case of malformed sequences in the `bytes`.
110+
/// Returns an error in case of malformed or non-representable sequences in the `bytes`.
112111
#[cfg(feature = "encoding")]
113112
pub fn decode<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> Result<Cow<'b, str>> {
114113
encoding
@@ -119,7 +118,7 @@ pub fn decode<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> Result<Cow<'b
119118
/// Decodes a slice with an unknown encoding, removing the BOM if it is present
120119
/// in the bytes.
121120
///
122-
/// Returns an error in case of malformed sequences in the `bytes`.
121+
/// Returns an error in case of malformed or non-representable sequences in the `bytes`.
123122
#[cfg(feature = "encoding")]
124123
pub fn decode_with_bom_removal<'b>(bytes: &'b [u8]) -> Result<Cow<'b, str>> {
125124
if let Some(encoding) = detect_encoding(bytes) {
@@ -185,5 +184,3 @@ pub fn detect_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
185184
_ => None,
186185
}
187186
}
188-
189-
// TODO: add some tests for functions

tests/documents/utf8.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<?xml version="1.0"?>
2+
<project name="project-name">
3+
</project>

tests/encodings.rs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
use quick_xml::events::Event;
2+
use quick_xml::Reader;
3+
4+
#[cfg(feature = "encoding")]
5+
mod decode {
6+
use encoding_rs::{UTF_16BE, UTF_16LE, UTF_8};
7+
use quick_xml::encoding::*;
8+
use std::borrow::Cow;
9+
10+
static UTF16BE_TEXT_WITH_BOM: &[u8] = include_bytes!("./documents/utf16be.xml");
11+
static UTF16LE_TEXT_WITH_BOM: &[u8] = include_bytes!("./documents/utf16le.xml");
12+
static UTF8_TEXT_WITH_BOM: &[u8] = include_bytes!("./documents/utf8.xml");
13+
14+
static UTF8_TEXT: &str = r#"<?xml version="1.0"?>
15+
<project name="project-name">
16+
</project>
17+
"#;
18+
19+
#[test]
20+
fn test_removes_bom() {
21+
// No BOM
22+
assert_eq!(
23+
decode_with_bom_removal(UTF8_TEXT.as_bytes()).unwrap(),
24+
Cow::Borrowed(UTF8_TEXT)
25+
);
26+
// BOM
27+
assert_eq!(
28+
decode_with_bom_removal(UTF8_TEXT_WITH_BOM).unwrap(),
29+
Cow::Borrowed(UTF8_TEXT)
30+
);
31+
assert_eq!(
32+
decode_with_bom_removal(UTF16BE_TEXT_WITH_BOM).unwrap(),
33+
Cow::Borrowed(UTF8_TEXT).into_owned()
34+
);
35+
assert_eq!(
36+
decode_with_bom_removal(UTF16LE_TEXT_WITH_BOM).unwrap(),
37+
Cow::Borrowed(UTF8_TEXT).into_owned()
38+
);
39+
}
40+
41+
#[test]
42+
fn test_detect_encoding() {
43+
// No BOM
44+
assert_eq!(detect_encoding(UTF8_TEXT.as_bytes()), Some(UTF_8));
45+
// BOM
46+
assert_eq!(detect_encoding(UTF8_TEXT_WITH_BOM), Some(UTF_8));
47+
assert_eq!(detect_encoding(UTF16BE_TEXT_WITH_BOM), Some(UTF_16BE));
48+
assert_eq!(detect_encoding(UTF16LE_TEXT_WITH_BOM), Some(UTF_16LE));
49+
}
50+
}
51+
52+
#[test]
53+
#[cfg(feature = "encoding")]
54+
fn test_koi8_r_encoding() {
55+
let src = include_bytes!("documents/opennews_all.rss").as_ref();
56+
let mut buf = vec![];
57+
let mut r = Reader::from_reader(src);
58+
r.trim_text(true).expand_empty_elements(false);
59+
loop {
60+
match r.read_event_into(&mut buf) {
61+
Ok(Event::Text(e)) => {
62+
e.unescape().unwrap();
63+
}
64+
Ok(Event::Eof) => break,
65+
_ => (),
66+
}
67+
}
68+
}
69+
70+
#[test]
71+
#[cfg(feature = "encoding")]
72+
fn fuzz_53() {
73+
use std::io::Cursor;
74+
75+
let data: &[u8] = b"\xe9\x00\x00\x00\x00\x00\x00\x00\x00\
76+
\x00\x00\x00\x00\n(\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\
77+
\x00<>\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00<<\x00\x00\x00";
78+
let cursor = Cursor::new(data);
79+
let mut reader = Reader::from_reader(cursor);
80+
let mut buf = vec![];
81+
loop {
82+
match reader.read_event_into(&mut buf) {
83+
Ok(Event::Eof) | Err(..) => break,
84+
_ => buf.clear(),
85+
}
86+
}
87+
}

tests/test.rs

Lines changed: 2 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use quick_xml::name::QName;
22
use quick_xml::{events::attributes::Attribute, events::Event::*, Error, Reader};
3-
use std::{borrow::Cow, io::Cursor};
3+
4+
use std::borrow::Cow;
45

56
#[cfg(feature = "serialize")]
67
use serde::{Deserialize, Serialize};
@@ -89,40 +90,6 @@ fn test_comment_starting_with_gt() {
8990
}
9091
}
9192

92-
#[test]
93-
#[cfg(feature = "encoding")]
94-
fn test_koi8_r_encoding() {
95-
let src = include_bytes!("documents/opennews_all.rss").as_ref();
96-
let mut buf = vec![];
97-
let mut r = Reader::from_reader(src);
98-
r.trim_text(true).expand_empty_elements(false);
99-
loop {
100-
match r.read_event_into(&mut buf) {
101-
Ok(Text(e)) => {
102-
e.unescape().unwrap();
103-
}
104-
Ok(Eof) => break,
105-
_ => (),
106-
}
107-
}
108-
}
109-
110-
#[test]
111-
fn fuzz_53() {
112-
let data: &[u8] = b"\xe9\x00\x00\x00\x00\x00\x00\x00\x00\
113-
\x00\x00\x00\x00\n(\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\
114-
\x00<>\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00<<\x00\x00\x00";
115-
let cursor = Cursor::new(data);
116-
let mut reader = Reader::from_reader(cursor);
117-
let mut buf = vec![];
118-
loop {
119-
match reader.read_event_into(&mut buf) {
120-
Ok(Eof) | Err(..) => break,
121-
_ => buf.clear(),
122-
}
123-
}
124-
}
125-
12693
#[test]
12794
fn test_issue94() {
12895
let data = br#"<Run>
@@ -138,37 +105,6 @@ fn test_issue94() {
138105
}
139106
}
140107

141-
#[test]
142-
fn fuzz_101() {
143-
let data: &[u8] = b"\x00\x00<\x00\x00\x0a>&#44444444401?#\x0a413518\
144-
#\x0a\x0a\x0a;<:<)(<:\x0a\x0a\x0a\x0a;<:\x0a\x0a\
145-
<:\x0a\x0a\x0a\x0a\x0a<\x00*\x00\x00\x00\x00";
146-
let cursor = Cursor::new(data);
147-
let mut reader = Reader::from_reader(cursor);
148-
let mut buf = vec![];
149-
loop {
150-
match reader.read_event_into(&mut buf) {
151-
Ok(Start(e)) | Ok(Empty(e)) => {
152-
for a in e.attributes() {
153-
if a.ok()
154-
.map_or(true, |a| a.decode_and_unescape_value(&reader).is_err())
155-
{
156-
break;
157-
}
158-
}
159-
}
160-
Ok(Text(e)) => {
161-
if e.unescape().is_err() {
162-
break;
163-
}
164-
}
165-
Ok(Eof) | Err(..) => break,
166-
_ => (),
167-
}
168-
buf.clear();
169-
}
170-
}
171-
172108
#[test]
173109
fn test_no_trim() {
174110
let mut reader = Reader::from_str(" <tag> text </tag> ");

0 commit comments

Comments
 (0)