From 34f6d8e0da610bfaf750f60f621b8c1008be899d Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Mon, 6 Nov 2023 16:50:32 +0100 Subject: [PATCH] Support no compressor in `open_cupy_array()` (#312) ... also added some more examples. Authors: - Mads R. B. Kristensen (https://github.com/madsbk) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/kvikio/pull/312 --- python/examples/zarr_cupy_nvcomp.py | 30 +++++++++++++++++++++++++++++ python/kvikio/zarr.py | 19 +++++++++++++++++- python/tests/test_zarr.py | 16 +++++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/python/examples/zarr_cupy_nvcomp.py b/python/examples/zarr_cupy_nvcomp.py index 766139b442..9f05f7874a 100644 --- a/python/examples/zarr_cupy_nvcomp.py +++ b/python/examples/zarr_cupy_nvcomp.py @@ -30,6 +30,10 @@ def main(path): # E.g., `open_cupy_array()` uses nvCOMP's Snappy GPU compression by default, # which, as far as we know, isn’t compatible with any CPU compressor. Thus, # let's re-write our Zarr array using a CPU and GPU compatible compressor. + # + # Warning: it isn't possible to use `CompatCompressor` as a compressor argument + # in Zarr directly. It is only meant for `open_cupy_array()`. However, + # in an example further down, we show how to write using regular Zarr. z = kvikio.zarr.open_cupy_array( store=path, mode="w", @@ -53,6 +57,32 @@ def main(path): assert isinstance(z[:], cupy.ndarray) assert (cupy.arange(20, 40) == z[:]).all() + # Similarly, we can also open a file written by regular Zarr. + # Let's write the file without any compressor. + ary = numpy.arange(10) + z = zarr.open(store=path, mode="w", shape=ary.shape, compressor=None) + z[:] = ary + # This works as before where the file is read as a CuPy array + z = kvikio.zarr.open_cupy_array(store=path) + assert isinstance(z[:], cupy.ndarray) + assert (z[:] == cupy.asarray(ary)).all() + + # Using a compressor is a bit more tricky since not all CPU compressors + # are GPU compatible. To make sure we use a compable compressor, we use + # the CPU-part of `CompatCompressor.lz4()`. + ary = numpy.arange(10) + z = zarr.open( + store=path, + mode="w", + shape=ary.shape, + compressor=kvikio.zarr.CompatCompressor.lz4().cpu, + ) + z[:] = ary + # This works as before where the file is read as a CuPy array + z = kvikio.zarr.open_cupy_array(store=path) + assert isinstance(z[:], cupy.ndarray) + assert (z[:] == cupy.asarray(ary)).all() + if __name__ == "__main__": main("/tmp/zarr-cupy-nvcomp") diff --git a/python/kvikio/zarr.py b/python/kvikio/zarr.py index c718c413a7..1a8e7f3848 100644 --- a/python/kvikio/zarr.py +++ b/python/kvikio/zarr.py @@ -274,7 +274,22 @@ def get_nvcomp_manager(self): class CompatCompressor: - """A pair of compatible compressors one using the CPU and one using the GPU""" + """A pair of compatible compressors one using the CPU and one using the GPU + + Warning + ------- + `CompatCompressor` is only supported by KvikIO's `open_cupy_array()` and + cannot be used as a compressor argument in Zarr functions like `open()` + and `open_array()` directly. However, it is possible to use its `.cpu` + like: `open(..., compressor=CompatCompressor.lz4().cpu)`. + + Parameters + ---------- + cpu + The CPU compressor. + gpu + The GPU compressor. + """ def __init__(self, cpu: Codec, gpu: CudaCodec) -> None: self.cpu = cpu @@ -347,6 +362,8 @@ def open_cupy_array( if mode in ("r", "r+"): raise else: + if ret.compressor is None: + return ret # If we are reading a LZ4-CPU compressed file, we overwrite the # metadata on-the-fly to make Zarr use LZ4-GPU for both compression # and decompression. diff --git a/python/tests/test_zarr.py b/python/tests/test_zarr.py index 33e3b4588f..ba42e35a96 100644 --- a/python/tests/test_zarr.py +++ b/python/tests/test_zarr.py @@ -249,6 +249,22 @@ def test_open_cupy_array(tmp_path, write_mode, read_mode): numpy.testing.assert_array_equal(a.get(), z[:]) +@pytest.mark.parametrize("compressor", [None, kvikio_zarr.CompatCompressor.lz4().cpu]) +def test_open_cupy_array_written_by_zarr(tmp_path, compressor): + data = numpy.arange(100) + z = zarr.open_array( + tmp_path, + shape=data.shape, + mode="w", + compressor=compressor, + ) + z[:] = data + + z = kvikio_zarr.open_cupy_array(tmp_path, mode="r") + assert isinstance(z[:], cupy.ndarray) + cupy.testing.assert_array_equal(z[:], data) + + @pytest.mark.parametrize("mode", ["r", "r+", "a"]) def test_open_cupy_array_incompatible_compressor(tmp_path, mode): zarr.create((10,), store=tmp_path, compressor=numcodecs.Blosc())