Skip to content

Commit 466e100

Browse files
authored
Merge pull request #307 from JasperDeSutter/dedupe-unescape
2 parents 2aa38ae + b17e47f commit 466e100

File tree

2 files changed

+45
-50
lines changed

2 files changed

+45
-50
lines changed

fluent-syntax/src/unicode.rs

Lines changed: 22 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,16 @@ fn encode_unicode(s: Option<&str>) -> char {
6666
/// assert_eq!(s, "Foo 😊 Bar");
6767
/// ```
6868
pub fn unescape_unicode<W>(w: &mut W, input: &str) -> fmt::Result
69+
where
70+
W: fmt::Write,
71+
{
72+
if unescape(w, input)? {
73+
return Ok(());
74+
}
75+
w.write_str(input)
76+
}
77+
78+
fn unescape<W>(w: &mut W, input: &str) -> Result<bool, std::fmt::Error>
6979
where
7080
W: fmt::Write,
7181
{
@@ -100,10 +110,15 @@ where
100110
w.write_char(new_char)?;
101111
start = ptr;
102112
}
113+
114+
if start == 0 {
115+
return Ok(false);
116+
}
117+
103118
if start != ptr {
104119
w.write_str(&input[start..ptr])?;
105120
}
106-
Ok(())
121+
Ok(true)
107122
}
108123

109124
/// Unescapes to a `Cow<str>` optionally allocating.
@@ -119,41 +134,11 @@ where
119134
/// );
120135
/// ```
121136
pub fn unescape_unicode_to_string(input: &str) -> Cow<str> {
122-
let bytes = input.as_bytes();
123-
let mut result = Cow::from(input);
124-
125-
let mut ptr = 0;
126-
127-
while let Some(b) = bytes.get(ptr) {
128-
if b != &b'\\' {
129-
if let Cow::Owned(ref mut s) = result {
130-
s.push(*b as char);
131-
}
132-
ptr += 1;
133-
continue;
134-
}
135-
136-
if let Cow::Borrowed(_) = result {
137-
result = Cow::from(&input[0..ptr]);
138-
}
139-
140-
ptr += 1;
141-
142-
let new_char = match bytes.get(ptr) {
143-
Some(b'\\') => '\\',
144-
Some(b'"') => '"',
145-
Some(u @ b'u') | Some(u @ b'U') => {
146-
let start = ptr + 1;
147-
let len = if u == &b'u' { 4 } else { 6 };
148-
ptr += len;
149-
input
150-
.get(start..(start + len))
151-
.map_or(UNKNOWN_CHAR, |slice| encode_unicode(Some(slice)))
152-
}
153-
_ => UNKNOWN_CHAR,
154-
};
155-
result.to_mut().push(new_char);
156-
ptr += 1;
137+
let mut result = String::new();
138+
let owned = unescape(&mut result, input).expect("String write methods don't Err");
139+
if owned {
140+
Cow::Owned(result)
141+
} else {
142+
Cow::Borrowed(input)
157143
}
158-
result
159144
}

fluent-syntax/tests/unicode.rs

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,33 @@
1+
use std::borrow::Cow;
2+
13
use fluent_syntax::unicode::{unescape_unicode, unescape_unicode_to_string};
24

3-
fn test_unescape_unicode(input: &str, output: &str) {
5+
/// Asserts that decoding unicode escape sequences in `input` matches `output`.
6+
/// When `borrowed` = true, asserts that the escaped value is passed back by reference.
7+
fn test_unescape_unicode(input: &str, output: &str, borrowed: bool) {
48
let mut s = String::new();
59
unescape_unicode(&mut s, input).expect("Failed to write.");
6-
assert_eq!(&s, output);
10+
assert_eq!(s, output);
711
let result = unescape_unicode_to_string(input);
8-
assert_eq!(&result, output);
12+
assert_eq!(result, output);
13+
14+
assert_eq!(matches!(result, Cow::Borrowed(_)), borrowed);
915
}
1016

1117
#[test]
1218
fn unescape_unicode_test() {
13-
test_unescape_unicode("foo", "foo");
14-
test_unescape_unicode("foo \\\\", "foo \\");
15-
test_unescape_unicode("foo \\\"", "foo \"");
16-
test_unescape_unicode("foo \\\\ faa", "foo \\ faa");
17-
test_unescape_unicode("foo \\\\ faa \\\\ fii", "foo \\ faa \\ fii");
18-
test_unescape_unicode("foo \\\\\\\" faa \\\"\\\\ fii", "foo \\\" faa \"\\ fii");
19-
test_unescape_unicode("\\u0041\\u004F", "AO");
20-
test_unescape_unicode("\\uA", "�");
21-
test_unescape_unicode("\\uA0Pl", "�");
22-
test_unescape_unicode("\\d Foo", "� Foo");
19+
test_unescape_unicode("foo", "foo", true);
20+
test_unescape_unicode("foo \\\\", "foo \\", false);
21+
test_unescape_unicode("foo \\\"", "foo \"", false);
22+
test_unescape_unicode("foo \\\\ faa", "foo \\ faa", false);
23+
test_unescape_unicode("foo \\\\ faa \\\\ fii", "foo \\ faa \\ fii", false);
24+
test_unescape_unicode(
25+
"foo \\\\\\\" faa \\\"\\\\ fii",
26+
"foo \\\" faa \"\\ fii",
27+
false,
28+
);
29+
test_unescape_unicode("\\u0041\\u004F", "AO", false);
30+
test_unescape_unicode("\\uA", "�", false);
31+
test_unescape_unicode("\\uA0Pl", "�", false);
32+
test_unescape_unicode("\\d Foo", "� Foo", false);
2333
}

0 commit comments

Comments
 (0)