Skip to content

Freeze metadata #3140

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,18 @@
[0.6.2] - 2025-04-01
--------------------


**Bugfixes**

- Meatdata.schema was returning a modified schema, this is fixed to return a copy of
the original schema instead (:user:`benjeffery`, :issue:`3129`, :pr:`3130`)

**Breaking Changes**

- To avoid confusion, metadata returned from `.metadata` accessors is now a `FrozenDict`
that does not allow mutation of its contents. This is because mutating the object doesn't
update the underlying metadata. (:user:`benjeffery`, :issue:`993`, :pr:`3140`)

--------------------
[0.6.1] - 2025-03-31
--------------------
Expand Down
183 changes: 183 additions & 0 deletions python/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2515,3 +2515,186 @@ def test_explicit_ordering(self):

dtype = metadata.MetadataSchema(schema).numpy_dtype()
assert dtype.names == ("id", "name", "age")


class TestFrozenMetadata:
def test_json_simple_frozen(self):
schema = metadata.MetadataSchema({"codec": "json"})
data = {"a": 1, "b": 2, "c": [1, 2, 3], "d": {"x": 10, "y": 20}}
encoded = schema.validate_and_encode_row(data)
decoded = schema.decode_row(encoded)

# Check that we can't modify the top-level dict
with pytest.raises(TypeError):
decoded["e"] = 5

with pytest.raises(TypeError):
decoded["a"] = 10

with pytest.raises(TypeError):
del decoded["a"]

# Check that we can't modify nested lists
with pytest.raises(AttributeError):
decoded["c"].append(4)

with pytest.raises(TypeError):
decoded["c"][0] = 10

# Check that we can't modify nested dictionaries
with pytest.raises(TypeError):
decoded["d"]["z"] = 30

with pytest.raises(TypeError):
decoded["d"]["x"] = 100

with pytest.raises(TypeError):
del decoded["d"]["x"]

def test_struct_simple_frozen(self):
schema = metadata.MetadataSchema(
{
"codec": "struct",
"type": "object",
"properties": {
"a": {"type": "number", "binaryFormat": "i"},
"b": {"type": "number", "binaryFormat": "i"},
"c": {
"type": "array",
"items": {"type": "number", "binaryFormat": "i"},
},
"d": {
"type": "object",
"properties": {
"x": {"type": "number", "binaryFormat": "i"},
"y": {"type": "number", "binaryFormat": "i"},
},
},
},
}
)

data = {"a": 1, "b": 2, "c": [1, 2, 3], "d": {"x": 10, "y": 20}}
encoded = schema.validate_and_encode_row(data)
decoded = schema.decode_row(encoded)

# Check that we can't modify the top-level dict
with pytest.raises(TypeError):
decoded["e"] = 5

with pytest.raises(TypeError):
decoded["a"] = 10

with pytest.raises(TypeError):
del decoded["a"]

# Check that we can't modify nested lists
with pytest.raises(AttributeError):
decoded["c"].append(4)

with pytest.raises(TypeError):
decoded["c"][0] = 10

# Check that we can't modify nested dictionaries
with pytest.raises(TypeError):
decoded["d"]["z"] = 30

with pytest.raises(TypeError):
decoded["d"]["x"] = 100

with pytest.raises(TypeError):
del decoded["d"]["x"]

def test_complex_nested_frozen(self):
schema = metadata.MetadataSchema(
{"codec": "json", "type": "object", "properties": {}}
)

data = {
"level1": {
"level2": {"level3": {"array": [1, 2, [3, 4, {"a": 5}]]}},
"items": [
{"name": "item1", "values": [1, 2, 3]},
{"name": "item2", "values": [4, 5, 6]},
],
}
}

encoded = schema.validate_and_encode_row(data)
decoded = schema.decode_row(encoded)

# Test deeply nested dictionary modification
with pytest.raises(TypeError):
decoded["level1"]["level2"]["level3"]["new"] = "value"

# Test deeply nested array modification
with pytest.raises(AttributeError):
decoded["level1"]["level2"]["level3"]["array"].append(5)

# Test nested array in array modification
with pytest.raises(AttributeError):
decoded["level1"]["level2"]["level3"]["array"][2].append(6)

# Test dictionary in nested array modification
with pytest.raises(TypeError):
decoded["level1"]["level2"]["level3"]["array"][2][2]["b"] = 6

# Test object in array modification
with pytest.raises(AttributeError):
decoded["level1"]["items"][0]["values"].append(4)

# Test replacing object in array
with pytest.raises(TypeError):
decoded["level1"]["items"][1] = {"name": "new", "values": []}

def test_json_empty_values_frozen(self):
schema = metadata.MetadataSchema({"codec": "json"})

data = {"empty_dict": {}, "empty_list": []}
encoded = schema.validate_and_encode_row(data)
decoded = schema.decode_row(encoded)

with pytest.raises(TypeError):
decoded["empty_dict"]["x"] = 1

with pytest.raises(AttributeError):
decoded["empty_list"].append(1)

def test_tree_sequence_metadata_frozen(self):
ts = msprime.simulate(10, random_seed=42)
tables = ts.dump_tables()

# Set metadata schema and add metadata to a node, and at top-level
tables.nodes.metadata_schema = metadata.MetadataSchema({"codec": "json"})
node_data = {"value": 42, "name": "test", "list": [1, 2, 3]}
tables.nodes[0] = tables.nodes[0].replace(metadata=node_data)
tables.metadata_schema = metadata.MetadataSchema({"codec": "json"})
tables.metadata = {"key": "value"}

# Create new tree sequence and verify the metadata is frozen
new_ts = tables.tree_sequence()
node_metadata = new_ts.node(0).metadata

with pytest.raises(TypeError):
node_metadata["new_field"] = "value"

with pytest.raises(AttributeError):
node_metadata["list"].append(4)

top_level_metadata = new_ts.metadata
with pytest.raises(TypeError):
top_level_metadata["new_key"] = "new_value"

with pytest.raises(TypeError):
top_level_metadata["key"] = "new_value"

def test_frozen_dict_repr(self):
schema = metadata.MetadataSchema({"codec": "json"})
data = {"a": 1, "b": [1, 2], "c": {"x": 10}}
encoded = schema.validate_and_encode_row(data)
decoded = schema.decode_row(encoded)

# String representation should be like a normal dict/list
assert str(decoded) == "{'a': 1, 'b': [1, 2], 'c': {'x': 10}}"
assert str(decoded["b"]) == "[1, 2]"
assert str(decoded["c"]) == "{'x': 10}"
65 changes: 61 additions & 4 deletions python/tskit/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,61 @@ def replace_root_refs(obj):
TSKITMetadataSchemaValidator.META_SCHEMA = deref_meta_schema


class FrozenDict(collections.abc.Mapping):
def __init__(self, *args, **kwargs):
self._data = dict(*args, **kwargs)

def __getitem__(self, key):
return self._data[key]

def __iter__(self):
return iter(self._data)

def __len__(self):
return len(self._data)

def __repr__(self):
return repr(self._data)

def __str__(self):
return str(self._data)

def __eq__(self, other):
if isinstance(other, dict):
return self._data == other
return self._data == getattr(other, "_data", other)


class FrozenList(collections.abc.Sequence):
def __init__(self, iterable):
self._data = list(iterable)

def __getitem__(self, index):
return self._data[index]

def __len__(self):
return len(self._data)

def __repr__(self):
return repr(self._data)

def __str__(self):
return str(self._data)

def __eq__(self, other):
if isinstance(other, list):
return self._data == other
return self._data == getattr(other, "_data", other)


def freeze(obj):
if isinstance(obj, dict):
return FrozenDict({k: freeze(v) for k, v in obj.items()})
elif isinstance(obj, list):
return FrozenList([freeze(item) for item in obj])
return obj


class AbstractMetadataCodec(metaclass=abc.ABCMeta):
"""
Superclass of all MetadataCodecs.
Expand Down Expand Up @@ -173,9 +228,10 @@ def decode(self, encoded: bytes) -> Any:

# Assign default values
if isinstance(result, dict):
return dict(self.defaults, **result)
else:
return result
result = dict(self.defaults, **result)

# Freeze the result to prevent mutation
return freeze(result)


register_metadata_codec(JSONCodec, "json")
Expand Down Expand Up @@ -662,7 +718,8 @@ def __init__(self, schema: Mapping[str, Any]) -> None:

self.encode = StructCodec.make_encode(schema)
decoder = StructCodec.make_decode(schema)
self.decode = lambda buffer: decoder(iter(buffer))
# Wrap the decoder with freeze to make results immutable
self.decode = lambda buffer: freeze(decoder(iter(buffer)))

def encode(self, obj: Any) -> bytes:
# Set by __init__
Expand Down
Loading