Skip to content

Commit 5aba587

Browse files
committed
Add ensure_ascii option
1 parent 94ac927 commit 5aba587

File tree

6 files changed

+145
-11
lines changed

6 files changed

+145
-11
lines changed

python/pydantic_core/_pydantic_core.pyi

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,7 @@ class SchemaSerializer:
344344
value: Any,
345345
*,
346346
indent: int | None = None,
347+
ensure_ascii: bool = False,
347348
include: _IncEx | None = None,
348349
exclude: _IncEx | None = None,
349350
by_alias: bool | None = None,
@@ -362,6 +363,8 @@ class SchemaSerializer:
362363
Arguments:
363364
value: The Python object to serialize.
364365
indent: If `None`, the JSON will be compact, otherwise it will be pretty-printed with the indent provided.
366+
ensure_ascii: If `True`, the output is guaranteed to have all incoming non-ASCII characters escaped.
367+
If `False` (the default), these characters will be outputted as-is.
365368
include: A set of fields to include, if `None` all fields are included.
366369
exclude: A set of fields to exclude, if `None` no fields are excluded.
367370
by_alias: Whether to use the alias names of fields.
@@ -389,6 +392,7 @@ def to_json(
389392
value: Any,
390393
*,
391394
indent: int | None = None,
395+
ensure_ascii: bool = False,
392396
include: _IncEx | None = None,
393397
exclude: _IncEx | None = None,
394398
# Note: In Pydantic 2.11, the default value of `by_alias` on `SchemaSerializer` was changed from `True` to `None`,
@@ -413,6 +417,8 @@ def to_json(
413417
Arguments:
414418
value: The Python object to serialize.
415419
indent: If `None`, the JSON will be compact, otherwise it will be pretty-printed with the indent provided.
420+
ensure_ascii: If `True`, the output is guaranteed to have all incoming non-ASCII characters escaped.
421+
If `False` (the default), these characters will be outputted as-is.
416422
include: A set of fields to include, if `None` all fields are included.
417423
exclude: A set of fields to exclude, if `None` no fields are excluded.
418424
by_alias: Whether to use the alias names of fields.

src/serializers/mod.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,14 +155,15 @@ impl SchemaSerializer {
155155
}
156156

157157
#[allow(clippy::too_many_arguments)]
158-
#[pyo3(signature = (value, *, indent = None, include = None, exclude = None, by_alias = None,
158+
#[pyo3(signature = (value, *, indent = None, ensure_ascii = false, include = None, exclude = None, by_alias = None,
159159
exclude_unset = false, exclude_defaults = false, exclude_none = false, round_trip = false, warnings = WarningsArg::Bool(true),
160160
fallback = None, serialize_as_any = false, context = None))]
161161
pub fn to_json(
162162
&self,
163163
py: Python,
164164
value: &Bound<'_, PyAny>,
165165
indent: Option<usize>,
166+
ensure_ascii: Option<bool>,
166167
include: Option<&Bound<'_, PyAny>>,
167168
exclude: Option<&Bound<'_, PyAny>>,
168169
by_alias: Option<bool>,
@@ -203,6 +204,7 @@ impl SchemaSerializer {
203204
exclude,
204205
&extra,
205206
indent,
207+
ensure_ascii.unwrap_or(false),
206208
self.expected_json_size.load(Ordering::Relaxed),
207209
)?;
208210

@@ -238,14 +240,15 @@ impl SchemaSerializer {
238240

239241
#[allow(clippy::too_many_arguments)]
240242
#[pyfunction]
241-
#[pyo3(signature = (value, *, indent = None, include = None, exclude = None, by_alias = true,
243+
#[pyo3(signature = (value, *, indent = None, ensure_ascii = false, include = None, exclude = None, by_alias = true,
242244
exclude_none = false, round_trip = false, timedelta_mode = "iso8601", bytes_mode = "utf8",
243245
inf_nan_mode = "constants", serialize_unknown = false, fallback = None, serialize_as_any = false,
244246
context = None))]
245247
pub fn to_json(
246248
py: Python,
247249
value: &Bound<'_, PyAny>,
248250
indent: Option<usize>,
251+
ensure_ascii: Option<bool>,
249252
include: Option<&Bound<'_, PyAny>>,
250253
exclude: Option<&Bound<'_, PyAny>>,
251254
by_alias: bool,
@@ -271,7 +274,16 @@ pub fn to_json(
271274
serialize_as_any,
272275
context,
273276
);
274-
let bytes = to_json_bytes(value, AnySerializer::get(), include, exclude, &extra, indent, 1024)?;
277+
let bytes = to_json_bytes(
278+
value,
279+
AnySerializer::get(),
280+
include,
281+
exclude,
282+
&extra,
283+
indent,
284+
ensure_ascii.unwrap_or(false),
285+
1024,
286+
)?;
275287
state.final_check(py)?;
276288
let py_bytes = PyBytes::new(py, &bytes);
277289
Ok(py_bytes.into())

src/serializers/shared.rs

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::borrow::Cow;
22
use std::fmt::Debug;
3+
use std::io::{self, Write};
34

45
use pyo3::exceptions::PyTypeError;
56
use pyo3::prelude::*;
@@ -9,7 +10,7 @@ use pyo3::{intern, PyTraverseError, PyVisit};
910

1011
use enum_dispatch::enum_dispatch;
1112
use serde::Serialize;
12-
use serde_json::ser::PrettyFormatter;
13+
use serde_json::ser::{Formatter, PrettyFormatter};
1314

1415
use crate::build_tools::py_schema_err;
1516
use crate::build_tools::py_schema_error_type;
@@ -432,6 +433,71 @@ impl Serialize for PydanticSerializer<'_> {
432433
}
433434
}
434435

436+
struct EscapeNonAsciiFormatter;
437+
438+
impl Formatter for EscapeNonAsciiFormatter {
439+
fn write_string_fragment<W: ?Sized + Write>(&mut self, writer: &mut W, fragment: &str) -> io::Result<()> {
440+
for ch in fragment.chars() {
441+
if ch.is_ascii() {
442+
writer.write_all(ch.encode_utf8(&mut [0; 4]).as_bytes())?;
443+
} else {
444+
for escape in ch.encode_utf16(&mut [0; 2]) {
445+
write!(writer, "\\u{escape:04x}")?;
446+
}
447+
}
448+
}
449+
Ok(())
450+
}
451+
}
452+
453+
struct EscapeNonAsciiPrettyFormatter<'a> {
454+
pretty: PrettyFormatter<'a>,
455+
escape_non_ascii: EscapeNonAsciiFormatter,
456+
}
457+
458+
impl<'a> EscapeNonAsciiPrettyFormatter<'a> {
459+
pub fn with_indent(indent: &'a [u8]) -> Self {
460+
Self {
461+
pretty: PrettyFormatter::with_indent(indent),
462+
escape_non_ascii: EscapeNonAsciiFormatter,
463+
}
464+
}
465+
}
466+
467+
macro_rules! defer {
468+
($formatter:ident, $fun:ident) => {
469+
fn $fun<W>(&mut self, writer: &mut W) -> io::Result<()>
470+
where
471+
W: ?Sized + io::Write,
472+
{
473+
self.$formatter.$fun(writer)
474+
}
475+
};
476+
($formatter:ident, $fun:ident, $val:ty) => {
477+
fn $fun<W>(&mut self, writer: &mut W, val: $val) -> io::Result<()>
478+
where
479+
W: ?Sized + io::Write,
480+
{
481+
self.$formatter.$fun(writer, val)
482+
}
483+
};
484+
}
485+
486+
#[allow(clippy::needless_lifetimes)]
487+
impl<'a> Formatter for EscapeNonAsciiPrettyFormatter<'a> {
488+
defer!(escape_non_ascii, write_string_fragment, &str);
489+
defer!(pretty, begin_array);
490+
defer!(pretty, end_array);
491+
defer!(pretty, begin_array_value, bool);
492+
defer!(pretty, end_array_value);
493+
defer!(pretty, begin_object);
494+
defer!(pretty, end_object);
495+
defer!(pretty, begin_object_key, bool);
496+
defer!(pretty, end_object_key);
497+
defer!(pretty, begin_object_value);
498+
defer!(pretty, end_object_value);
499+
}
500+
435501
#[allow(clippy::too_many_arguments)]
436502
pub(crate) fn to_json_bytes(
437503
value: &Bound<'_, PyAny>,
@@ -440,25 +506,40 @@ pub(crate) fn to_json_bytes(
440506
exclude: Option<&Bound<'_, PyAny>>,
441507
extra: &Extra,
442508
indent: Option<usize>,
509+
ensure_ascii: bool,
443510
expected_json_size: usize,
444511
) -> PyResult<Vec<u8>> {
445512
let serializer = PydanticSerializer::new(value, serializer, include, exclude, extra);
446513

447514
let writer: Vec<u8> = Vec::with_capacity(expected_json_size);
448-
let bytes = match indent {
449-
Some(indent) => {
515+
516+
let bytes = match (indent, ensure_ascii) {
517+
(Some(indent), true) => {
518+
let indent = vec![b' '; indent];
519+
let formatter = EscapeNonAsciiPrettyFormatter::with_indent(&indent);
520+
let mut ser = PythonSerializer::with_formatter(writer, formatter);
521+
serializer.serialize(&mut ser).map_err(se_err_py_err)?;
522+
ser.into_inner()
523+
}
524+
(Some(indent), false) => {
450525
let indent = vec![b' '; indent];
451526
let formatter = PrettyFormatter::with_indent(&indent);
452527
let mut ser = PythonSerializer::with_formatter(writer, formatter);
453528
serializer.serialize(&mut ser).map_err(se_err_py_err)?;
454529
ser.into_inner()
455530
}
456-
None => {
531+
(None, true) => {
532+
let mut ser = PythonSerializer::with_formatter(writer, EscapeNonAsciiFormatter);
533+
serializer.serialize(&mut ser).map_err(se_err_py_err)?;
534+
ser.into_inner()
535+
}
536+
(None, false) => {
457537
let mut ser = PythonSerializer::new(writer);
458538
serializer.serialize(&mut ser).map_err(se_err_py_err)?;
459539
ser.into_inner()
460540
}
461541
};
542+
462543
Ok(bytes)
463544
}
464545

src/serializers/type_serializers/json.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ impl TypeSerializer for JsonSerializer {
5454
extra: &Extra,
5555
) -> PyResult<PyObject> {
5656
if extra.round_trip {
57-
let bytes = to_json_bytes(value, &self.serializer, include, exclude, extra, None, 0)?;
57+
let bytes = to_json_bytes(value, &self.serializer, include, exclude, extra, None, false, 0)?;
5858
let py = value.py();
5959
let s = from_utf8(&bytes).map_err(|e| utf8_py_error(py, e, &bytes))?;
6060
Ok(PyString::new(py, s).into())
@@ -65,7 +65,7 @@ impl TypeSerializer for JsonSerializer {
6565

6666
fn json_key<'a>(&self, key: &'a Bound<'_, PyAny>, extra: &Extra) -> PyResult<Cow<'a, str>> {
6767
if extra.round_trip {
68-
let bytes = to_json_bytes(key, &self.serializer, None, None, extra, None, 0)?;
68+
let bytes = to_json_bytes(key, &self.serializer, None, None, extra, None, false, 0)?;
6969
let py = key.py();
7070
let s = from_utf8(&bytes).map_err(|e| utf8_py_error(py, e, &bytes))?;
7171
Ok(Cow::Owned(s.to_string()))
@@ -83,8 +83,8 @@ impl TypeSerializer for JsonSerializer {
8383
extra: &Extra,
8484
) -> Result<S::Ok, S::Error> {
8585
if extra.round_trip {
86-
let bytes =
87-
to_json_bytes(value, &self.serializer, include, exclude, extra, None, 0).map_err(py_err_se_err)?;
86+
let bytes = to_json_bytes(value, &self.serializer, include, exclude, extra, None, false, 0)
87+
.map_err(py_err_se_err)?;
8888
match from_utf8(&bytes) {
8989
Ok(s) => serializer.serialize_str(s),
9090
Err(e) => Err(Error::custom(e.to_string())),

tests/serializers/test_string.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,38 @@ def test_str():
2323
assert json.loads(json_emoji) == 'emoji 💩'
2424

2525

26+
# Tests borrowed from:
27+
# - https://github.com/python/cpython/blob/d87e7f35/Lib/test/test_json/test_encode_basestring_ascii.py
28+
# - https://github.com/python/cpython/blob/d87e7f35/Lib/test/test_json/test_unicode.py
29+
@pytest.mark.parametrize(
30+
['input', 'expected'],
31+
[
32+
(
33+
'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?',
34+
'"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"',
35+
),
36+
('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
37+
('controls', '"controls"'),
38+
('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
39+
(
40+
'{"object with 1 member":["array with 1 element"]}',
41+
'"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"',
42+
),
43+
(' s p a c e d ', '" s p a c e d "'),
44+
('\U0001d120', '"\\ud834\\udd20"'),
45+
('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
46+
("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
47+
('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
48+
('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
49+
('\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}', '"\\u03b1\\u03a9"'),
50+
('\U0001d120', '"\\ud834\\udd20"'),
51+
],
52+
)
53+
def test_str_ensure_ascii(input: str, expected: str) -> None:
54+
v = SchemaSerializer(core_schema.str_schema())
55+
assert v.to_json(input, ensure_ascii=True).decode('utf-8') == expected
56+
57+
2658
def test_huge_str():
2759
v = SchemaSerializer(core_schema.int_schema())
2860
msg = r"Expected `int` - serialized value may not be as expected \[input_value='123456789012345678901234...89012345678901234567890', input_type=str\]"

tests/test_json.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,9 @@ def test_to_json():
218218
assert to_json([1, 2]) == b'[1,2]'
219219
assert to_json([1, 2], indent=2) == b'[\n 1,\n 2\n]'
220220
assert to_json([1, b'x']) == b'[1,"x"]'
221+
assert to_json(['à', 'é']).decode('utf-8') == '["à","é"]'
222+
assert to_json(['à', 'é'], indent=2).decode('utf-8') == '[\n "à",\n "é"\n]'
223+
assert to_json(['à', 'é'], indent=2, ensure_ascii=True).decode('utf-8') == '[\n "\\u00e0",\n "\\u00e9"\n]'
221224

222225
# kwargs required
223226
with pytest.raises(TypeError, match=r'to_json\(\) takes 1 positional arguments but 2 were given'):

0 commit comments

Comments
 (0)