diff --git a/Cargo.toml b/Cargo.toml index 13af5c552..b7020159e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,11 +29,18 @@ rust-version = "1.75" [dependencies] # TODO it would be very nice to remove the "py-clone" feature as it can panic, # but needs a bit of work to make sure it's not used in the codebase -pyo3 = { version = "0.23.5", features = ["generate-import-lib", "num-bigint", "py-clone"] } +pyo3 = { version = "0.23.5", features = [ + "generate-import-lib", + "num-bigint", + "py-clone", +] } regex = "1.11.1" strum = { version = "0.26.3", features = ["derive"] } strum_macros = "0.26.4" -serde_json = {version = "1.0.138", features = ["arbitrary_precision", "preserve_order"]} +serde_json = { version = "1.0.138", features = [ + "arbitrary_precision", + "preserve_order", +] } enum_dispatch = "0.3.13" serde = { version = "1.0.218", features = ["derive"] } speedate = "0.15.0" diff --git a/python/pydantic_core/_pydantic_core.pyi b/python/pydantic_core/_pydantic_core.pyi index 3220b3ef8..8fc9491a1 100644 --- a/python/pydantic_core/_pydantic_core.pyi +++ b/python/pydantic_core/_pydantic_core.pyi @@ -402,6 +402,7 @@ def to_json( fallback: Callable[[Any], Any] | None = None, serialize_as_any: bool = False, context: Any | None = None, + sort_keys: bool = False, ) -> bytes: """ Serialize a Python object to JSON including transforming and filtering data. @@ -426,6 +427,7 @@ def to_json( serialize_as_any: Whether to serialize fields with duck-typing serialization behavior. context: The context to use for serialization, this is passed to functional serializers as [`info.context`][pydantic_core.core_schema.SerializationInfo.context]. + sort_keys: Whether to sort the keys of the serialized object. Raises: PydanticSerializationError: If serialization fails and no `fallback` function is provided. @@ -478,6 +480,7 @@ def to_jsonable_python( fallback: Callable[[Any], Any] | None = None, serialize_as_any: bool = False, context: Any | None = None, + sort_keys: bool = False, ) -> Any: """ Serialize/marshal a Python object to a JSON-serializable Python object including transforming and filtering data. @@ -502,7 +505,7 @@ def to_jsonable_python( serialize_as_any: Whether to serialize fields with duck-typing serialization behavior. context: The context to use for serialization, this is passed to functional serializers as [`info.context`][pydantic_core.core_schema.SerializationInfo.context]. - + sort_keys: Whether to sort the keys of the serialized object. Raises: PydanticSerializationError: If serialization fails and no `fallback` function is provided. diff --git a/src/errors/validation_exception.rs b/src/errors/validation_exception.rs index 16e89eb68..93bff00b2 100644 --- a/src/errors/validation_exception.rs +++ b/src/errors/validation_exception.rs @@ -351,6 +351,7 @@ impl ValidationError { None, DuckTypingSerMode::SchemaBased, None, + false, ); let serializer = ValidationErrorSerializer { py, diff --git a/src/serializers/extra.rs b/src/serializers/extra.rs index 82c432342..18242de57 100644 --- a/src/serializers/extra.rs +++ b/src/serializers/extra.rs @@ -87,9 +87,10 @@ impl SerializationState { exclude_none: bool, round_trip: bool, serialize_unknown: bool, - fallback: Option<&'py Bound<'_, PyAny>>, + fallback: Option<&'py Bound<'py, PyAny>>, duck_typing_ser_mode: DuckTypingSerMode, - context: Option<&'py Bound<'_, PyAny>>, + context: Option<&'py Bound<'py, PyAny>>, + sort_keys: bool, ) -> Extra<'py> { Extra::new( py, @@ -106,6 +107,7 @@ impl SerializationState { fallback, duck_typing_ser_mode, context, + sort_keys, ) } @@ -139,6 +141,7 @@ pub(crate) struct Extra<'a> { pub fallback: Option<&'a Bound<'a, PyAny>>, pub duck_typing_ser_mode: DuckTypingSerMode, pub context: Option<&'a Bound<'a, PyAny>>, + pub sort_keys: bool, } impl<'a> Extra<'a> { @@ -158,6 +161,7 @@ impl<'a> Extra<'a> { fallback: Option<&'a Bound<'a, PyAny>>, duck_typing_ser_mode: DuckTypingSerMode, context: Option<&'a Bound<'a, PyAny>>, + sort_keys: bool, ) -> Self { Self { mode, @@ -177,6 +181,7 @@ impl<'a> Extra<'a> { fallback, duck_typing_ser_mode, context, + sort_keys, } } @@ -288,11 +293,12 @@ impl ExtraOwned { fallback: self.fallback.as_ref().map(|m| m.bind(py)), duck_typing_ser_mode: self.duck_typing_ser_mode, context: self.context.as_ref().map(|m| m.bind(py)), + sort_keys: false, } } } -#[derive(Clone)] +#[derive(Clone, PartialEq)] #[cfg_attr(debug_assertions, derive(Debug))] pub(crate) enum SerMode { Python, diff --git a/src/serializers/infer.rs b/src/serializers/infer.rs index 33f53b290..0569125b0 100644 --- a/src/serializers/infer.rs +++ b/src/serializers/infer.rs @@ -108,6 +108,7 @@ pub(crate) fn infer_to_python_known( extra.fallback, extra.duck_typing_ser_mode, extra.context, + extra.sort_keys, ); serializer.serializer.to_python(value, include, exclude, &extra) }; @@ -265,10 +266,16 @@ pub(crate) fn infer_to_python_known( } ObType::Dict => { let dict = value.downcast::()?; - serialize_pairs_python(py, dict.iter().map(Ok), include, exclude, extra, Ok)? + serialize_pairs_python(py, dict.iter().map(Ok), include, exclude, extra, |k| { + Ok(PyString::new(py, &infer_json_key(&k, extra)?).into_any()) + })? } ObType::PydanticSerializable => serialize_with_serializer()?, - ObType::Dataclass => serialize_pairs_python(py, any_dataclass_iter(value)?.0, include, exclude, extra, Ok)?, + ObType::Dataclass => { + serialize_pairs_python(py, any_dataclass_iter(value)?.0, include, exclude, extra, |k| { + Ok(PyString::new(py, &infer_json_key(&k, extra)?).into_any()) + })? + } ObType::Generator => { let iter = super::type_serializers::generator::SerializationIterator::new( value.downcast()?, @@ -497,6 +504,7 @@ pub(crate) fn infer_serialize_known( extra.fallback, extra.duck_typing_ser_mode, extra.context, + extra.sort_keys, ); let pydantic_serializer = PydanticSerializer::new(value, &extracted_serializer.serializer, include, exclude, &extra); @@ -708,15 +716,36 @@ fn serialize_pairs_python<'py>( let new_dict = PyDict::new(py); let filter = AnyFilter::new(); + // Collect pairs if we need to sort + let mut pairs = Vec::new(); for result in pairs_iter { let (k, v) = result?; let op_next = filter.key_filter(&k, include, exclude)?; if let Some((next_include, next_exclude)) = op_next { - let k = key_transform(k)?; + let k = if *extra.mode == SerMode::Json { + key_transform(k)? + } else { + k + }; let v = infer_to_python(&v, next_include.as_ref(), next_exclude.as_ref(), extra)?; - new_dict.set_item(k, v)?; + pairs.push((k, v)); } } + + // Sort if requested and in JSON mode + if extra.sort_keys && *extra.mode == SerMode::Json { + pairs.sort_by(|(a, _), (b, _)| { + a.str() + .ok() + .and_then(|s| s.to_str().ok().map(ToString::to_string)) + .cmp(&b.str().ok().and_then(|s| s.to_str().ok().map(ToString::to_string))) + }); + } + + // Add to dictionary + for (k, v) in pairs { + new_dict.set_item(k, v)?; + } Ok(new_dict.into()) } @@ -731,15 +760,26 @@ fn serialize_pairs_json<'py, S: Serializer>( let mut map = serializer.serialize_map(Some(iter_size))?; let filter = AnyFilter::new(); + // If sort_keys is true, collect and sort the pairs first + let mut pairs: Vec<_> = Vec::new(); for result in pairs_iter { let (key, value) = result.map_err(py_err_se_err)?; - let op_next = filter.key_filter(&key, include, exclude).map_err(py_err_se_err)?; if let Some((next_include, next_exclude)) = op_next { - let key = infer_json_key(&key, extra).map_err(py_err_se_err)?; - let value_serializer = SerializeInfer::new(&value, next_include.as_ref(), next_exclude.as_ref(), extra); - map.serialize_entry(&key, &value_serializer)?; + let key_str = infer_json_key(&key, extra).map_err(py_err_se_err)?.into_owned(); + pairs.push((key_str, (value, next_include, next_exclude))); } } + + if extra.sort_keys { + pairs.sort_by(|(a, _), (b, _)| a.cmp(b)); + } + + // Serialize the pairs in order + for (key, (value, next_include, next_exclude)) in pairs { + let value_serializer = SerializeInfer::new(&value, next_include.as_ref(), next_exclude.as_ref(), extra); + map.serialize_entry(&key, &value_serializer)?; + } + map.end() } diff --git a/src/serializers/mod.rs b/src/serializers/mod.rs index 6ed496aab..1df0d14c0 100644 --- a/src/serializers/mod.rs +++ b/src/serializers/mod.rs @@ -65,6 +65,7 @@ impl SchemaSerializer { fallback: Option<&'a Bound<'a, PyAny>>, duck_typing_ser_mode: DuckTypingSerMode, context: Option<&'a Bound<'a, PyAny>>, + sort_keys: bool, ) -> Extra<'b> { Extra::new( py, @@ -81,6 +82,7 @@ impl SchemaSerializer { fallback, duck_typing_ser_mode, context, + sort_keys, ) } } @@ -148,6 +150,7 @@ impl SchemaSerializer { fallback, duck_typing_ser_mode, context, + false, ); let v = self.serializer.to_python(value, include, exclude, &extra)?; warnings.final_check(py)?; @@ -157,7 +160,7 @@ impl SchemaSerializer { #[allow(clippy::too_many_arguments)] #[pyo3(signature = (value, *, indent = None, include = None, exclude = None, by_alias = None, exclude_unset = false, exclude_defaults = false, exclude_none = false, round_trip = false, warnings = WarningsArg::Bool(true), - fallback = None, serialize_as_any = false, context = None))] + fallback = None, serialize_as_any = false, context = None, sort_keys = false))] pub fn to_json( &self, py: Python, @@ -174,6 +177,7 @@ impl SchemaSerializer { fallback: Option<&Bound<'_, PyAny>>, serialize_as_any: bool, context: Option<&Bound<'_, PyAny>>, + sort_keys: bool, ) -> PyResult { let warnings_mode = match warnings { WarningsArg::Bool(b) => b.into(), @@ -196,6 +200,7 @@ impl SchemaSerializer { fallback, duck_typing_ser_mode, context, + sort_keys, ); let bytes = to_json_bytes( value, @@ -242,7 +247,7 @@ impl SchemaSerializer { #[pyo3(signature = (value, *, indent = None, include = None, exclude = None, by_alias = None, exclude_none = false, round_trip = false, timedelta_mode = "iso8601", bytes_mode = "utf8", inf_nan_mode = "constants", serialize_unknown = false, fallback = None, serialize_as_any = false, - context = None))] + context = None, sort_keys = false))] pub fn to_json( py: Python, value: &Bound<'_, PyAny>, @@ -259,6 +264,7 @@ pub fn to_json( fallback: Option<&Bound<'_, PyAny>>, serialize_as_any: bool, context: Option<&Bound<'_, PyAny>>, + sort_keys: bool, ) -> PyResult { let state = SerializationState::new(timedelta_mode, bytes_mode, inf_nan_mode)?; let duck_typing_ser_mode = DuckTypingSerMode::from_bool(serialize_as_any); @@ -272,6 +278,7 @@ pub fn to_json( fallback, duck_typing_ser_mode, context, + sort_keys, ); let serializer = type_serializers::any::AnySerializer.into(); let bytes = to_json_bytes(value, &serializer, include, exclude, &extra, indent, 1024)?; @@ -284,7 +291,7 @@ pub fn to_json( #[pyfunction] #[pyo3(signature = (value, *, include = None, exclude = None, by_alias = None, exclude_none = false, round_trip = false, timedelta_mode = "iso8601", bytes_mode = "utf8", inf_nan_mode = "constants", serialize_unknown = false, fallback = None, - serialize_as_any = false, context = None))] + serialize_as_any = false, context = None, sort_keys = false))] pub fn to_jsonable_python( py: Python, value: &Bound<'_, PyAny>, @@ -300,6 +307,7 @@ pub fn to_jsonable_python( fallback: Option<&Bound<'_, PyAny>>, serialize_as_any: bool, context: Option<&Bound<'_, PyAny>>, + sort_keys: bool, ) -> PyResult { let state = SerializationState::new(timedelta_mode, bytes_mode, inf_nan_mode)?; let duck_typing_ser_mode = DuckTypingSerMode::from_bool(serialize_as_any); @@ -313,6 +321,7 @@ pub fn to_jsonable_python( fallback, duck_typing_ser_mode, context, + sort_keys, ); let v = infer::infer_to_python(value, include, exclude, &extra)?; state.final_check(py)?; diff --git a/tests/test.rs b/tests/test.rs index 58e2904f5..2c6414c95 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -104,6 +104,7 @@ a = A() None, false, None, + false, ) .unwrap(); let serialized: &[u8] = serialized.extract(py).unwrap(); @@ -212,6 +213,7 @@ dump_json_input_2 = {'a': 'something'} None, false, None, + false, ) .unwrap(); let repr = format!("{}", serialization_result.bind(py).repr().unwrap()); @@ -233,6 +235,7 @@ dump_json_input_2 = {'a': 'something'} None, false, None, + false, ) .unwrap(); let repr = format!("{}", serialization_result.bind(py).repr().unwrap()); diff --git a/tests/test_json.py b/tests/test_json.py index 4d40ceb16..0f76aa2f9 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -1,6 +1,7 @@ import json import platform import re +from typing import Any import pytest from dirty_equals import IsFloatNan, IsList @@ -233,6 +234,31 @@ def test_to_json_fallback(): assert to_json(Foobar(), fallback=fallback_func) == b'"fallback:Foobar"' +@pytest.mark.parametrize( + 'input_value,unsorted_output_value,sorted_output_value', + [ + ( + {'b': 2, 'a': 1}, + b'{"b":2,"a":1}', + b'{"a":1,"b":2}', + ), + ( + {'b': {'d': 4, 'c': 3}}, + b'{"b":{"d":4,"c":3}}', + b'{"b":{"c":3,"d":4}}', + ), + ( + {'b': {'d': 4, 'c': 3}, 'a': 1}, + b'{"b":{"d":4,"c":3},"a":1}', + b'{"a":1,"b":{"c":3,"d":4}}', + ), + ], +) +def test_to_json_sort_keys(input_value: dict[str, Any], unsorted_output_value: bytes, sorted_output_value: bytes): + assert to_json(input_value) == unsorted_output_value + assert to_json(input_value, sort_keys=True) == sorted_output_value + + def test_to_jsonable_python(): assert to_jsonable_python([1, 2]) == [1, 2] assert to_jsonable_python({1, 2}) == IsList(1, 2, check_order=False) @@ -249,6 +275,21 @@ def test_to_jsonable_python_fallback(): assert to_jsonable_python(Foobar(), fallback=fallback_func) == 'fallback:Foobar' +@pytest.mark.parametrize( + 'input_value,unsorted_output_value,sorted_output_value', + [ + ({'b': 2, 'a': 1}, {'b': 2, 'a': 1}, {'a': 1, 'b': 2}), + ({'b': {'d': 4, 'c': 3}}, {'b': {'d': 4, 'c': 3}}, {'b': {'c': 3, 'd': 4}}), + ({'b': {'d': 4, 'c': 3}, 'a': 1}, {'b': {'d': 4, 'c': 3}, 'a': 1}, {'a': 1, 'b': {'c': 3, 'd': 4}}), + ], +) +def test_to_jsonable_python_sort_keys( + input_value: dict[str, Any], unsorted_output_value: dict[str, Any], sorted_output_value: dict[str, Any] +): + assert to_jsonable_python(input_value) == unsorted_output_value + assert to_jsonable_python(input_value, sort_keys=True) == sorted_output_value + + def test_to_jsonable_python_schema_serializer(): class Foobar: def __init__(self, my_foo: int, my_inners: list['Foobar']):