From 7077fae5bf3e0552065f5440512731b8856a8415 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 6 Jan 2025 14:54:19 -0700 Subject: [PATCH 1/6] Fix zarr upstream tests --- xarray/backends/zarr.py | 20 +++++------ xarray/tests/test_backends.py | 64 +++++++++++++++-------------------- 2 files changed, 35 insertions(+), 49 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index f7f30272941..495cfddd92e 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -447,10 +447,11 @@ def extract_zarr_variable_encoding( safe_to_drop = {"source", "original_shape", "preferred_chunks"} valid_encodings = { - "codecs", "chunks", - "compressor", + "compressor", # TODO: delete when min zarr >=3 + "compressors", "filters", + "serializer", "cache_metadata", "write_empty_chunks", } @@ -480,7 +481,7 @@ def extract_zarr_variable_encoding( mode=mode, shape=shape, ) - encoding["chunks"] = chunks + encoding["chunks"] = chunks or "auto" return encoding @@ -816,22 +817,17 @@ def open_store_variable(self, name): ) attributes = dict(attributes) - # TODO: this should not be needed once - # https://github.com/zarr-developers/zarr-python/issues/1269 is resolved. - attributes.pop("filters", None) - encoding = { "chunks": zarr_array.chunks, "preferred_chunks": dict(zip(dimensions, zarr_array.chunks, strict=True)), } - if _zarr_v3() and zarr_array.metadata.zarr_format == 3: - encoding["codecs"] = [x.to_dict() for x in zarr_array.metadata.codecs] - elif _zarr_v3(): + if _zarr_v3(): encoding.update( { - "compressor": zarr_array.metadata.compressor, - "filters": zarr_array.metadata.filters, + "compressors": zarr_array.compressors, + "filters": zarr_array.filters, + # "serializer": zarr_array.serializer, } ) else: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 330dd1dac1f..c5e308215b5 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -134,21 +134,21 @@ @pytest.fixture(scope="module", params=ZARR_FORMATS) -def default_zarr_version(request) -> Generator[None, None]: +def default_zarr_format(request) -> Generator[None, None]: if has_zarr_v3: - with zarr.config.set(default_zarr_version=request.param): + with zarr.config.set(default_zarr_format=request.param): yield else: yield def skip_if_zarr_format_3(reason: str): - if has_zarr_v3 and zarr.config["default_zarr_version"] == 3: + if has_zarr_v3 and zarr.config["default_zarr_format"] == 3: pytest.skip(reason=f"Unsupported with zarr_format=3: {reason}") def skip_if_zarr_format_2(reason: str): - if not has_zarr_v3 or (zarr.config["default_zarr_version"] == 2): + if not has_zarr_v3 or (zarr.config["default_zarr_format"] == 2): pytest.skip(reason=f"Unsupported with zarr_format=2: {reason}") @@ -2270,7 +2270,7 @@ def test_roundtrip_coordinates(self) -> None: @requires_zarr -@pytest.mark.usefixtures("default_zarr_version") +@pytest.mark.usefixtures("default_zarr_format") class ZarrBase(CFEncodedBase): DIMENSION_KEY = "_ARRAY_DIMENSIONS" zarr_version = 2 @@ -2675,40 +2675,35 @@ def test_write_persistence_modes(self, group) -> None: assert_identical(original, actual) def test_compressor_encoding(self) -> None: + from numcodecs.blosc import Blosc + original = create_test_data() # specify a custom compressor - if has_zarr_v3 and zarr.config.config["default_zarr_version"] == 3: - encoding_key = "codecs" + if has_zarr_v3 and zarr.config.config["default_zarr_format"] == 3: + encoding_key = "compressors" # all parameters need to be explicitly specified in order for the comparison to pass below encoding = { + "serializer": zarr.codecs.BytesCodec(endian="little"), encoding_key: ( - zarr.codecs.BytesCodec(endian="little"), - zarr.codecs.BloscCodec( + Blosc( cname="zstd", clevel=3, shuffle="shuffle", typesize=8, blocksize=0, ), - ) + ), } else: - from numcodecs.blosc import Blosc - - encoding_key = "compressor" - encoding = {encoding_key: Blosc(cname="zstd", clevel=3, shuffle=2)} + encoding_key = "compressors" if has_zarr_v3 else "compressor" + encoding = {encoding_key: (Blosc(cname="zstd", clevel=3, shuffle=2),)} save_kwargs = dict(encoding={"var1": encoding}) with self.roundtrip(original, save_kwargs=save_kwargs) as ds: enc = ds["var1"].encoding[encoding_key] - if has_zarr_v3 and zarr.config.config["default_zarr_version"] == 3: - # TODO: figure out a cleaner way to do this comparison - codecs = zarr.core.metadata.v3.parse_codecs(enc) - assert codecs == encoding[encoding_key] - else: - assert enc == encoding[encoding_key] + assert enc == encoding[encoding_key] def test_group(self) -> None: original = create_test_data() @@ -2846,14 +2841,9 @@ def test_check_encoding_is_consistent_after_append(self) -> None: import numcodecs encoding_value: Any - if has_zarr_v3 and zarr.config.config["default_zarr_version"] == 3: - compressor = zarr.codecs.BloscCodec() - encoding_key = "codecs" - encoding_value = [zarr.codecs.BytesCodec(), compressor] - else: - compressor = numcodecs.Blosc() - encoding_key = "compressor" - encoding_value = compressor + compressor = numcodecs.Blosc() + encoding_key = "compressors" if has_zarr_v3 else "compressor" + encoding_value = compressor encoding = {"da": {encoding_key: encoding_value}} ds.to_zarr(store_target, mode="w", encoding=encoding, **self.version_kwargs) @@ -6092,14 +6082,14 @@ def test_raise_writing_to_nczarr(self, mode) -> None: @requires_netCDF4 @requires_dask -@pytest.mark.usefixtures("default_zarr_version") +@pytest.mark.usefixtures("default_zarr_format") def test_pickle_open_mfdataset_dataset(): with open_example_mfdataset(["bears.nc"]) as ds: assert_identical(ds, pickle.loads(pickle.dumps(ds))) @requires_zarr -@pytest.mark.usefixtures("default_zarr_version") +@pytest.mark.usefixtures("default_zarr_format") def test_zarr_closing_internal_zip_store(): store_name = "tmp.zarr.zip" original_da = DataArray(np.arange(12).reshape((3, 4))) @@ -6110,7 +6100,7 @@ def test_zarr_closing_internal_zip_store(): @requires_zarr -@pytest.mark.usefixtures("default_zarr_version") +@pytest.mark.usefixtures("default_zarr_format") class TestZarrRegionAuto: def test_zarr_region_auto_all(self, tmp_path): x = np.arange(0, 50, 10) @@ -6286,7 +6276,7 @@ def test_zarr_region_append(self, tmp_path): @requires_zarr -@pytest.mark.usefixtures("default_zarr_version") +@pytest.mark.usefixtures("default_zarr_format") def test_zarr_region(tmp_path): x = np.arange(0, 50, 10) y = np.arange(0, 20, 2) @@ -6315,7 +6305,7 @@ def test_zarr_region(tmp_path): @requires_zarr @requires_dask -@pytest.mark.usefixtures("default_zarr_version") +@pytest.mark.usefixtures("default_zarr_format") def test_zarr_region_chunk_partial(tmp_path): """ Check that writing to partial chunks with `region` fails, assuming `safe_chunks=False`. @@ -6336,7 +6326,7 @@ def test_zarr_region_chunk_partial(tmp_path): @requires_zarr @requires_dask -@pytest.mark.usefixtures("default_zarr_version") +@pytest.mark.usefixtures("default_zarr_format") def test_zarr_append_chunk_partial(tmp_path): t_coords = np.array([np.datetime64("2020-01-01").astype("datetime64[ns]")]) data = np.ones((10, 10)) @@ -6374,7 +6364,7 @@ def test_zarr_append_chunk_partial(tmp_path): @requires_zarr @requires_dask -@pytest.mark.usefixtures("default_zarr_version") +@pytest.mark.usefixtures("default_zarr_format") def test_zarr_region_chunk_partial_offset(tmp_path): # https://github.com/pydata/xarray/pull/8459#issuecomment-1819417545 store = tmp_path / "foo.zarr" @@ -6394,7 +6384,7 @@ def test_zarr_region_chunk_partial_offset(tmp_path): @requires_zarr @requires_dask -@pytest.mark.usefixtures("default_zarr_version") +@pytest.mark.usefixtures("default_zarr_format") def test_zarr_safe_chunk_append_dim(tmp_path): store = tmp_path / "foo.zarr" data = np.ones((20,)) @@ -6445,7 +6435,7 @@ def test_zarr_safe_chunk_append_dim(tmp_path): @requires_zarr @requires_dask -@pytest.mark.usefixtures("default_zarr_version") +@pytest.mark.usefixtures("default_zarr_format") def test_zarr_safe_chunk_region(tmp_path): store = tmp_path / "foo.zarr" From ffeb009228d3a89dad945c856f3be3956f7254ef Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 6 Jan 2025 15:33:59 -0700 Subject: [PATCH 2/6] fix two --- xarray/backends/zarr.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 495cfddd92e..383c385e1d5 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -481,7 +481,9 @@ def extract_zarr_variable_encoding( mode=mode, shape=shape, ) - encoding["chunks"] = chunks or "auto" + if _zarr_v3() and chunks is None: + chunks = "auto" + encoding["chunks"] = chunks return encoding @@ -827,9 +829,10 @@ def open_store_variable(self, name): { "compressors": zarr_array.compressors, "filters": zarr_array.filters, - # "serializer": zarr_array.serializer, } ) + if self.zarr_group.metadata.zarr_format == 3: + encoding.update({"serializer": zarr_array.serializer}) else: encoding.update( { From 4a85fcdb0962d575a5686bea6695cd8a709e42d7 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 6 Jan 2025 16:06:06 -0700 Subject: [PATCH 3/6] fix compressors Co-authored-by: Matthew Iannucci --- xarray/tests/test_backends.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index c5e308215b5..9a2c2bd3137 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2675,18 +2675,15 @@ def test_write_persistence_modes(self, group) -> None: assert_identical(original, actual) def test_compressor_encoding(self) -> None: - from numcodecs.blosc import Blosc - - original = create_test_data() # specify a custom compressor - + original = create_test_data() if has_zarr_v3 and zarr.config.config["default_zarr_format"] == 3: encoding_key = "compressors" # all parameters need to be explicitly specified in order for the comparison to pass below encoding = { "serializer": zarr.codecs.BytesCodec(endian="little"), encoding_key: ( - Blosc( + zarr.codecs.BloscCodec( cname="zstd", clevel=3, shuffle="shuffle", @@ -2696,8 +2693,11 @@ def test_compressor_encoding(self) -> None: ), } else: + from numcodecs.blosc import Blosc + encoding_key = "compressors" if has_zarr_v3 else "compressor" - encoding = {encoding_key: (Blosc(cname="zstd", clevel=3, shuffle=2),)} + comp = Blosc(cname="zstd", clevel=3, shuffle=2) + encoding = {encoding_key: (comp,) if has_zarr_v3 else comp} save_kwargs = dict(encoding={"var1": encoding}) From 72fd9444726243817af963072253479550d74828 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 6 Jan 2025 16:19:37 -0700 Subject: [PATCH 4/6] one more compressor fix --- xarray/tests/test_backends.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 9a2c2bd3137..ba8bf92e7b3 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2841,9 +2841,12 @@ def test_check_encoding_is_consistent_after_append(self) -> None: import numcodecs encoding_value: Any - compressor = numcodecs.Blosc() + if has_zarr_v3 and zarr.config.config["default_zarr_format"] == 3: + compressor = zarr.codecs.BloscCodec() + else: + compressor = numcodecs.Blosc() encoding_key = "compressors" if has_zarr_v3 else "compressor" - encoding_value = compressor + encoding_value = (compressor,) if has_zarr_v3 else compressor encoding = {"da": {encoding_key: encoding_value}} ds.to_zarr(store_target, mode="w", encoding=encoding, **self.version_kwargs) @@ -5469,7 +5472,7 @@ def test_dataarray_to_netcdf_no_name_pathlib(self) -> None: @requires_zarr class TestDataArrayToZarr: def skip_if_zarr_python_3_and_zip_store(self, store) -> None: - if has_zarr_v3 and isinstance(store, zarr.storage.zip.ZipStore): + if has_zarr_v3 and isinstance(store, zarr.storage.ZipStore): pytest.skip( reason="zarr-python 3.x doesn't support reopening ZipStore with a new mode." ) From 5f510fb0bfdbea55a26a730004b134c8c83df885 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 6 Jan 2025 16:21:59 -0700 Subject: [PATCH 5/6] fix chunks test --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index ba8bf92e7b3..37e2cc15416 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -5779,7 +5779,7 @@ def test_extract_zarr_variable_encoding() -> None: var = xr.Variable("x", [1, 2]) actual = backends.zarr.extract_zarr_variable_encoding(var) assert "chunks" in actual - assert actual["chunks"] is None + assert actual["chunks"] == ("auto" if has_zarr_v3 else None) var = xr.Variable("x", [1, 2], encoding={"chunks": (1,)}) actual = backends.zarr.extract_zarr_variable_encoding(var) From 4b5def787c11ff85d33193c6bde2a994702424c1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 6 Jan 2025 16:23:47 -0700 Subject: [PATCH 6/6] [test-upstream] fix warning test --- xarray/tests/test_backends.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 37e2cc15416..cfca5e69048 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2439,7 +2439,7 @@ def test_warning_on_bad_chunks(self) -> None: with warnings.catch_warnings(): warnings.filterwarnings( "ignore", - message=".*Zarr version 3 specification.*", + message=".*Zarr format 3 specification.*", category=UserWarning, ) with self.roundtrip(original, open_kwargs=kwargs) as actual: @@ -2988,7 +2988,7 @@ def test_no_warning_from_open_emptydim_with_chunks(self) -> None: with warnings.catch_warnings(): warnings.filterwarnings( "ignore", - message=".*Zarr version 3 specification.*", + message=".*Zarr format 3 specification.*", category=UserWarning, ) with self.roundtrip(ds, open_kwargs=dict(chunks={"a": 1})) as ds_reload: