From 5807cba192773630dfd172715558423d5a32bb01 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 22 Oct 2024 11:17:11 -0700 Subject: [PATCH 1/2] fix(remotestore): raise error if path includes scheme (#2348) * fix(remotestore): raise error if path includes scheme * fixup * fixup * strip scheme in from_url in case fsspec fails to * style: pre-commit fixes * disable cache in listing ops * update docs * use listings cache again * no refresh * style: pre-commit fixes --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/guide/storage.rst | 2 +- src/zarr/abc/store.py | 2 +- src/zarr/storage/remote.py | 42 +++++++++++++++++++++++++++++++-- tests/test_store/test_core.py | 5 +--- tests/test_store/test_remote.py | 35 ++++++++++++++++++++++----- 5 files changed, 72 insertions(+), 14 deletions(-) diff --git a/docs/guide/storage.rst b/docs/guide/storage.rst index dfda553c43..0019f993a2 100644 --- a/docs/guide/storage.rst +++ b/docs/guide/storage.rst @@ -72,7 +72,7 @@ that implements the `AbstractFileSystem` API, .. code-block:: python >>> import zarr - >>> store = zarr.storage.RemoteStore("gs://foo/bar", mode="r") + >>> store = zarr.storage.RemoteStore.from_url("gs://foo/bar", mode="r") >>> zarr.open(store=store) shape=(10, 20) dtype=float32> diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index a995a6bf38..045da7e84a 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -168,7 +168,7 @@ def with_mode(self, mode: AccessModeLiteral) -> Self: Returns ------- - store: + store A new store of the same type with the new mode. Examples diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py index 812b1e24f9..1f7d5f7a12 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/remote.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from typing import TYPE_CHECKING, Any, Self from zarr.abc.store import ByteRangeRequest, Store @@ -32,7 +33,8 @@ class RemoteStore(Store): mode : AccessModeLiteral The access mode to use. path : str - The root path of the store. + The root path of the store. This should be a relative path and must not include the + filesystem scheme. allowed_exceptions : tuple[type[Exception], ...] When fetching data, these cases will be deemed to correspond to missing keys. @@ -44,6 +46,23 @@ class RemoteStore(Store): supports_deletes supports_partial_writes supports_listing + + Raises + ------ + TypeError + If the Filesystem does not support async operations. + ValueError + If the path argument includes a scheme. + + Warns + ----- + UserWarning + If the file system (fs) was not created with `asynchronous=True`. + + See Also + -------- + RemoteStore.from_upath + RemoteStore.from_url """ # based on FSSpec @@ -69,6 +88,15 @@ def __init__( if not self.fs.async_impl: raise TypeError("Filesystem needs to support async operations.") + if not self.fs.asynchronous: + warnings.warn( + f"fs ({fs}) was not created with `asynchronous=True`, this may lead to surprising behavior", + stacklevel=2, + ) + if "://" in path and not path.startswith("http"): + # `not path.startswith("http")` is a special case for the http filesystem (¯\_(ツ)_/¯) + scheme, _ = path.split("://", maxsplit=1) + raise ValueError(f"path argument to RemoteStore must not include scheme ({scheme}://)") @classmethod def from_upath( @@ -134,7 +162,17 @@ def from_url( # before fsspec==2024.3.1 from fsspec.core import url_to_fs - fs, path = url_to_fs(url, **storage_options) + opts = storage_options or {} + opts = {"asynchronous": True, **opts} + + fs, path = url_to_fs(url, **opts) + + # fsspec is not consistent about removing the scheme from the path, so check and strip it here + # https://github.com/fsspec/filesystem_spec/issues/1722 + if "://" in path and not path.startswith("http"): + # `not path.startswith("http")` is a special case for the http filesystem (¯\_(ツ)_/¯) + path = fs._strip_protocol(path) + return cls(fs=fs, path=path, mode=mode, allowed_exceptions=allowed_exceptions) async def clear(self) -> None: diff --git a/tests/test_store/test_core.py b/tests/test_store/test_core.py index 771dc3c43e..38292e23c9 100644 --- a/tests/test_store/test_core.py +++ b/tests/test_store/test_core.py @@ -71,10 +71,7 @@ async def test_make_store_path_invalid() -> None: async def test_make_store_path_fsspec(monkeypatch) -> None: - import fsspec.implementations.memory - - monkeypatch.setattr(fsspec.implementations.memory.MemoryFileSystem, "async_impl", True) - store_path = await make_store_path("memory://") + store_path = await make_store_path("http://foo.com/bar") assert isinstance(store_path.store, RemoteStore) diff --git a/tests/test_store/test_remote.py b/tests/test_store/test_remote.py index c8e9a162b0..2ad1fd787b 100644 --- a/tests/test_store/test_remote.py +++ b/tests/test_store/test_remote.py @@ -86,10 +86,12 @@ def s3(s3_base: None) -> Generator[s3fs.S3FileSystem, None, None]: async def test_basic() -> None: store = RemoteStore.from_url( - f"s3://{test_bucket_name}", + f"s3://{test_bucket_name}/foo/spam/", mode="w", storage_options={"endpoint_url": endpoint_url, "anon": False}, ) + assert store.fs.asynchronous + assert store.path == f"{test_bucket_name}/foo/spam" assert await _collect_aiterator(store.list()) == () assert not await store.exists("foo") data = b"hello" @@ -109,7 +111,7 @@ class TestRemoteStoreS3(StoreTests[RemoteStore, cpu.Buffer]): @pytest.fixture def store_kwargs(self, request) -> dict[str, str | bool]: fs, path = fsspec.url_to_fs( - f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False + f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False, asynchronous=True ) return {"fs": fs, "path": path, "mode": "r+"} @@ -143,9 +145,7 @@ def test_store_supports_partial_writes(self, store: RemoteStore) -> None: def test_store_supports_listing(self, store: RemoteStore) -> None: assert store.supports_listing - async def test_remote_store_from_uri( - self, store: RemoteStore, store_kwargs: dict[str, str | bool] - ): + async def test_remote_store_from_uri(self, store: RemoteStore): storage_options = { "endpoint_url": endpoint_url, "anon": False, @@ -183,9 +183,32 @@ async def test_remote_store_from_uri( assert dict(group.attrs) == {"key": "value-3"} def test_from_upath(self) -> None: - path = UPath(f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False) + path = UPath( + f"s3://{test_bucket_name}/foo/bar/", + endpoint_url=endpoint_url, + anon=False, + asynchronous=True, + ) result = RemoteStore.from_upath(path) assert result.fs.endpoint_url == endpoint_url + assert result.fs.asynchronous + assert result.path == f"{test_bucket_name}/foo/bar" + + def test_init_raises_if_path_has_scheme(self, store_kwargs) -> None: + # regression test for https://github.com/zarr-developers/zarr-python/issues/2342 + store_kwargs["path"] = "s3://" + store_kwargs["path"] + with pytest.raises( + ValueError, match="path argument to RemoteStore must not include scheme .*" + ): + self.store_cls(**store_kwargs) + + def test_init_warns_if_fs_asynchronous_is_false(self) -> None: + fs, path = fsspec.url_to_fs( + f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False, asynchronous=False + ) + store_kwargs = {"fs": fs, "path": path, "mode": "r+"} + with pytest.warns(UserWarning, match=r".* was not created with `asynchronous=True`.*"): + self.store_cls(**store_kwargs) async def test_empty_nonexistent_path(self, store_kwargs) -> None: # regression test for https://github.com/zarr-developers/zarr-python/pull/2343 From 8a33df7fb0568c92a40c57874e07b68e371a4a59 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 13:19:23 -0700 Subject: [PATCH 2/2] chore: update pre-commit hooks (#2427) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.6.9 → v0.7.0](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.9...v0.7.0) - [github.com/pre-commit/mirrors-mypy: v1.11.2 → v1.12.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.11.2...v1.12.1) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4667b20de1..55488c2372 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ default_language_version: python: python3 repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.9 + rev: v0.7.0 hooks: - id: ruff args: ["--fix", "--show-fixes"] @@ -22,7 +22,7 @@ repos: hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.11.2 + rev: v1.12.1 hooks: - id: mypy files: src|tests