diff --git a/changes/3100.bugfix.rst b/changes/3100.bugfix.rst new file mode 100644 index 0000000000..11f06628c0 --- /dev/null +++ b/changes/3100.bugfix.rst @@ -0,0 +1,3 @@ +For Zarr format 2, allow fixed-length string arrays to be created without automatically inserting a +``Vlen-UT8`` codec in the array of filters. Fixed-length string arrays do not need this codec. This +change fixes a regression where fixed-length string arrays created with Zarr Python 3 could not be read with Zarr Python 2.18. \ No newline at end of file diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 62af644f7d..b4e8ac0ff6 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -768,14 +768,6 @@ def _create_metadata_v2( dtype = parse_dtype(dtype, zarr_format=2) - # inject VLenUTF8 for str dtype if not already present - if np.issubdtype(dtype, np.str_): - filters = filters or [] - from numcodecs.vlen import VLenUTF8 - - if not any(isinstance(x, VLenUTF8) or x["id"] == "vlen-utf8" for x in filters): - filters = list(filters) + [VLenUTF8()] - return ArrayV2Metadata( shape=shape, dtype=np.dtype(dtype), diff --git a/tests/test_array.py b/tests/test_array.py index a6bcd17c4b..3fc7b3938c 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1245,7 +1245,7 @@ async def test_invalid_v3_arguments( zarr.create(store=store, dtype="uint8", shape=(10,), zarr_format=3, **kwargs) @staticmethod - @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) + @pytest.mark.parametrize("dtype", ["uint8", "float32", "str", "U10", "S10", ">M8[10s]"]) @pytest.mark.parametrize( "compressors", [