diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a350fc097..b9e5d8fb1 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -20,6 +20,8 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install --upgrade tox + # Annotate codespell within PR + - uses: codespell-project/codespell-problem-matcher@v1 - name: Run linters run: | tox -e lint diff --git a/docs/source/cmdline/download.rst b/docs/source/cmdline/download.rst index bcdaf774d..32bf11cfe 100644 --- a/docs/source/cmdline/download.rst +++ b/docs/source/cmdline/download.rst @@ -46,6 +46,12 @@ Options Whether to interpret asset paths in URLs as exact matches or glob patterns +.. option:: --preserve-tree + + When downloading only part of a Dandiset, also download + :file:`dandiset.yaml` and do not strip leading directories from asset + paths. Implies ``--download all``. + .. option:: --sync Delete local assets that do not exist on the server after downloading diff --git a/lincbrain/cli/cmd_download.py b/lincbrain/cli/cmd_download.py index 509d24590..5c0bc5f22 100644 --- a/lincbrain/cli/cmd_download.py +++ b/lincbrain/cli/cmd_download.py @@ -11,6 +11,29 @@ from ..download import DownloadExisting, DownloadFormat, PathType from ..utils import get_instance, joinurl +_examples = """ +EXAMPLES: + +\b + - Download only the dandiset.yaml + dandi download --download dandiset.yaml DANDI:000027 +\b + - Download only dandiset.yaml if there is a newer version + dandi download https://identifiers.org/DANDI:000027 --existing refresh +\b + - Download only the assets + dandi download --download assets DANDI:000027 +\b + - Download all from a specific version + dandi download DANDI:000027/0.210831.2033 +\b + - Download a specific directory + dandi download dandi://DANDI/000027@0.210831.2033/sub-RAT123/ +\b + - Download a specific file + dandi download dandi://DANDI/000027@0.210831.2033/sub-RAT123/sub-RAT123.nwb +""" + # The use of f-strings apparently makes this not a proper docstring, and so # click doesn't use it unless we explicitly assign it to `help`: @@ -18,9 +41,15 @@ help=f"""\ Download files or entire folders from LINC. +\b +{_dandi_url_parser.resource_identifier_primer} + \b {_dandi_url_parser.known_patterns} - """ + +{_examples} + +""" ) @click.option( "-o", @@ -71,6 +100,15 @@ default="all", show_default=True, ) +@click.option( + "--preserve-tree", + is_flag=True, + help=( + "When downloading only part of a Dandiset, also download" + " `dandiset.yaml` and do not strip leading directories from asset" + " paths. Implies `--download all`." + ), +) @click.option( "--sync", is_flag=True, help="Delete local assets that do not exist on the server" ) @@ -109,6 +147,7 @@ def download( sync: bool, dandi_instance: str, path_type: PathType, + preserve_tree: bool, ) -> None: # We need to import the download module rather than the download function # so that the tests can properly patch the function with a mock. @@ -142,8 +181,9 @@ def download( format=format, jobs=jobs[0], jobs_per_zarr=jobs[1], - get_metadata="dandiset.yaml" in download_types, - get_assets="assets" in download_types, + get_metadata="dandiset.yaml" in download_types or preserve_tree, + get_assets="assets" in download_types or preserve_tree, + preserve_tree=preserve_tree, sync=sync, path_type=path_type, # develop_debug=develop_debug diff --git a/lincbrain/cli/cmd_ls.py b/lincbrain/cli/cmd_ls.py index eeb3dbecd..9ac9ed42f 100644 --- a/lincbrain/cli/cmd_ls.py +++ b/lincbrain/cli/cmd_ls.py @@ -26,6 +26,8 @@ The arguments may be either resource identifiers or paths to local files/directories. +\b +{_dandi_url_parser.resource_identifier_primer} \b {_dandi_url_parser.known_patterns} """ diff --git a/lincbrain/cli/command.py b/lincbrain/cli/command.py index cb09e3f5e..03a193957 100644 --- a/lincbrain/cli/command.py +++ b/lincbrain/cli/command.py @@ -99,7 +99,7 @@ def main(ctx, log_level, pdb=False): logdir = platformdirs.user_log_dir("lincbrain-cli", "lincbrain") logfile = os.path.join( - logdir, f"{datetime.now(timezone.utc):%Y%m%d%H%M%SZ}-{os.getpid()}.log" + logdir, f"{datetime.now(timezone.utc):%Y.%m.%d-%H.%M.%SZ}-{os.getpid()}.log" ) os.makedirs(logdir, exist_ok=True) handler = logging.FileHandler(logfile, encoding="utf-8") diff --git a/lincbrain/cli/tests/data/update_dandiset_from_doi/biorxiv.json b/lincbrain/cli/tests/data/update_dandiset_from_doi/biorxiv.json index 6cb786cbd..1b7eaaed4 100644 --- a/lincbrain/cli/tests/data/update_dandiset_from_doi/biorxiv.json +++ b/lincbrain/cli/tests/data/update_dandiset_from_doi/biorxiv.json @@ -20,6 +20,7 @@ "contributor": [ { "name": "Tests, Dandi-Cli", + "email": "nemo@example.com", "roleName": [ "dcite:Author", "dcite:ContactPerson" diff --git a/lincbrain/cli/tests/data/update_dandiset_from_doi/elife.json b/lincbrain/cli/tests/data/update_dandiset_from_doi/elife.json index e4a55ac1d..dd4df7f46 100644 --- a/lincbrain/cli/tests/data/update_dandiset_from_doi/elife.json +++ b/lincbrain/cli/tests/data/update_dandiset_from_doi/elife.json @@ -20,6 +20,7 @@ "contributor": [ { "name": "Tests, Dandi-Cli", + "email": "nemo@example.com", "roleName": [ "dcite:Author", "dcite:ContactPerson" diff --git a/lincbrain/cli/tests/data/update_dandiset_from_doi/jneurosci.json b/lincbrain/cli/tests/data/update_dandiset_from_doi/jneurosci.json index caf16aba6..18ce9c631 100644 --- a/lincbrain/cli/tests/data/update_dandiset_from_doi/jneurosci.json +++ b/lincbrain/cli/tests/data/update_dandiset_from_doi/jneurosci.json @@ -20,6 +20,7 @@ "contributor": [ { "name": "Tests, Dandi-Cli", + "email": "nemo@example.com", "roleName": [ "dcite:Author", "dcite:ContactPerson" diff --git a/lincbrain/cli/tests/data/update_dandiset_from_doi/nature.json b/lincbrain/cli/tests/data/update_dandiset_from_doi/nature.json index 92588edf7..da8514fc0 100644 --- a/lincbrain/cli/tests/data/update_dandiset_from_doi/nature.json +++ b/lincbrain/cli/tests/data/update_dandiset_from_doi/nature.json @@ -20,6 +20,7 @@ "contributor": [ { "name": "Tests, Dandi-Cli", + "email": "nemo@example.com", "roleName": [ "dcite:Author", "dcite:ContactPerson" diff --git a/lincbrain/cli/tests/data/update_dandiset_from_doi/neuron.json b/lincbrain/cli/tests/data/update_dandiset_from_doi/neuron.json index 82a81a7fa..fe2874f42 100644 --- a/lincbrain/cli/tests/data/update_dandiset_from_doi/neuron.json +++ b/lincbrain/cli/tests/data/update_dandiset_from_doi/neuron.json @@ -20,6 +20,7 @@ "contributor": [ { "name": "Tests, Dandi-Cli", + "email": "nemo@example.com", "roleName": [ "dcite:Author", "dcite:ContactPerson" diff --git a/lincbrain/cli/tests/test_download.py b/lincbrain/cli/tests/test_download.py index fc5915ede..bc0235110 100644 --- a/lincbrain/cli/tests/test_download.py +++ b/lincbrain/cli/tests/test_download.py @@ -23,6 +23,7 @@ def test_download_defaults(mocker): jobs_per_zarr=None, get_metadata=True, get_assets=True, + preserve_tree=False, sync=False, path_type=PathType.EXACT, ) @@ -41,6 +42,7 @@ def test_download_all_types(mocker): jobs_per_zarr=None, get_metadata=True, get_assets=True, + preserve_tree=False, sync=False, path_type=PathType.EXACT, ) @@ -59,6 +61,7 @@ def test_download_metadata_only(mocker): jobs_per_zarr=None, get_metadata=True, get_assets=False, + preserve_tree=False, sync=False, path_type=PathType.EXACT, ) @@ -77,6 +80,7 @@ def test_download_assets_only(mocker): jobs_per_zarr=None, get_metadata=False, get_assets=True, + preserve_tree=False, sync=False, path_type=PathType.EXACT, ) @@ -110,6 +114,7 @@ def test_download_gui_instance_in_dandiset(mocker): jobs_per_zarr=None, get_metadata=True, get_assets=True, + preserve_tree=False, sync=False, path_type=PathType.EXACT, ) @@ -135,6 +140,7 @@ def test_download_api_instance_in_dandiset(mocker): jobs_per_zarr=None, get_metadata=True, get_assets=True, + preserve_tree=False, sync=False, path_type=PathType.EXACT, ) @@ -160,6 +166,7 @@ def test_download_url_instance_match(mocker): jobs_per_zarr=None, get_metadata=True, get_assets=True, + preserve_tree=False, sync=False, path_type=PathType.EXACT, ) diff --git a/lincbrain/conftest.py b/lincbrain/conftest.py index 61a3fc760..a59f14b3d 100644 --- a/lincbrain/conftest.py +++ b/lincbrain/conftest.py @@ -12,6 +12,12 @@ def pytest_addoption(parser: Parser) -> None: default=False, help="Only run tests of the new Django Dandi API", ) + parser.addoption( + "--scheduled", + action="store_true", + default=False, + help="Use configuration for a scheduled daily test run", + ) def pytest_collection_modifyitems(items: list[Item], config: Config) -> None: diff --git a/lincbrain/dandiapi.py b/lincbrain/dandiapi.py index e204c7b09..33e20ef99 100644 --- a/lincbrain/dandiapi.py +++ b/lincbrain/dandiapi.py @@ -10,6 +10,7 @@ import json import os.path from pathlib import Path, PurePosixPath +import posixpath import re from time import sleep, time from types import TracebackType @@ -233,6 +234,8 @@ def request( url, result.text, ) + if data is not None and hasattr(data, "seek"): + data.seek(0) result.raise_for_status() except Exception as e: if isinstance(e, requests.HTTPError): @@ -935,6 +938,29 @@ def from_data(cls, client: DandiAPIClient, data: dict[str, Any]) -> RemoteDandis client=client, identifier=data["identifier"], version=version, data=data ) + @staticmethod + def _normalize_path(path: str) -> str: + """ + Helper to normalize path before passing it to the server. + + We and API call it "path" but it is really a "prefix" with inherent + semantics of containing directory divider '/' and referring to a + directory when terminated with '/'. + """ + # Server (now) expects path to be a proper prefix, so to account for user + # possibly specifying ./ or some other relative paths etc, let's normalize + # the path. + # Ref: https://github.com/dandi/dandi-cli/issues/1452 + path_normed = posixpath.normpath(path) + if path_normed == ".": + path_normed = "" + elif path.endswith("/"): + # we need to make sure that we have a trailing slash if we had it before + path_normed += "/" + if path_normed != path: + lgr.debug("Normalized path %r to %r", path, path_normed) + return path_normed + def json_dict(self) -> dict[str, Any]: """ Convert to a JSONable `dict`, omitting the ``client`` attribute and @@ -1152,6 +1178,9 @@ def get_assets_with_path_prefix( Returns an iterator of all assets in this version of the Dandiset whose `~RemoteAsset.path` attributes start with ``path`` + ``path`` is normalized first to possibly remove leading ``./`` or relative + paths (e.g., ``../``) within it. + Assets can be sorted by a given field by passing the name of that field as the ``order`` parameter. The accepted field names are ``"created"``, ``"modified"``, and ``"path"``. Prepend a hyphen to the @@ -1160,7 +1189,7 @@ def get_assets_with_path_prefix( try: for a in self.client.paginate( f"{self.version_api_path}assets/", - params={"path": path, "order": order}, + params={"path": self._normalize_path(path), "order": order}, ): yield RemoteAsset.from_data(self, a) except HTTP404Error: @@ -1198,7 +1227,11 @@ def get_asset_by_path(self, path: str) -> RemoteAsset: Fetch the asset in this version of the Dandiset whose `~RemoteAsset.path` equals ``path``. If the given asset does not exist, a `NotFoundError` is raised. + + ``path`` is normalized first to possibly remove leading ``./`` or relative + paths (e.g., ``../``) within it. """ + path = self._normalize_path(path) try: # Weed out any assets that happen to have the given path as a # proper prefix: @@ -1219,9 +1252,15 @@ def download_directory( """ Download all assets under the virtual directory ``assets_dirpath`` to the directory ``dirpath``. Downloads are synchronous. + + ``path`` is normalized first to possibly remove leading ``./`` or relative + paths (e.g., ``../``) within it. """ if assets_dirpath and not assets_dirpath.endswith("/"): assets_dirpath += "/" + # need to normalize explicitly since we do use it below also + # to deduce portion of the path to strip off. + assets_dirpath = self._normalize_path(assets_dirpath) assets = list(self.get_assets_with_path_prefix(assets_dirpath)) for a in assets: filepath = Path(dirpath, a.path[len(assets_dirpath) :]) diff --git a/lincbrain/dandiarchive.py b/lincbrain/dandiarchive.py index cf7dec4c6..40eb5fb3f 100644 --- a/lincbrain/dandiarchive.py +++ b/lincbrain/dandiarchive.py @@ -183,11 +183,17 @@ def navigate( yield (client, dandiset, assets) @abstractmethod - def get_asset_download_path(self, asset: BaseRemoteAsset) -> str: + def get_asset_download_path( + self, asset: BaseRemoteAsset, preserve_tree: bool + ) -> str: """ Returns the path (relative to the base download directory) at which the asset ``asset`` (assumed to have been returned by this object's - `get_assets()` method) should be downloaded + `get_assets()` method) should be downloaded. + + If ``preserve_tree`` is `True`, then the download is being performed + with ``--download tree`` option, and the method's return value should + be adjusted accordingly. :meta private: """ @@ -231,7 +237,9 @@ def get_assets( assert d is not None yield from d.get_assets(order=order) - def get_asset_download_path(self, asset: BaseRemoteAsset) -> str: + def get_asset_download_path( + self, asset: BaseRemoteAsset, preserve_tree: bool + ) -> str: return asset.path.lstrip("/") def is_under_download_path(self, path: str) -> bool: @@ -242,13 +250,17 @@ def is_under_download_path(self, path: str) -> bool: class SingleAssetURL(ParsedDandiURL): """Superclass for parsed URLs that refer to a single asset""" - def get_asset_download_path(self, asset: BaseRemoteAsset) -> str: - return posixpath.basename(asset.path.lstrip("/")) + def get_asset_download_path( + self, asset: BaseRemoteAsset, preserve_tree: bool + ) -> str: + path = asset.path.lstrip("/") + if preserve_tree: + return path + else: + return posixpath.basename(path) def is_under_download_path(self, path: str) -> bool: - raise TypeError( - f"{type(self).__name__}.is_under_download_path() should not be called" - ) + return False @dataclass @@ -257,8 +269,14 @@ class MultiAssetURL(ParsedDandiURL): path: str - def get_asset_download_path(self, asset: BaseRemoteAsset) -> str: - return multiasset_target(self.path, asset.path.lstrip("/")) + def get_asset_download_path( + self, asset: BaseRemoteAsset, preserve_tree: bool + ) -> str: + path = asset.path.lstrip("/") + if preserve_tree: + return path + else: + return multiasset_target(self.path, path) def is_under_download_path(self, path: str) -> bool: prefix = posixpath.dirname(self.path.strip("/")) @@ -487,7 +505,9 @@ def get_assets( if strict and not any_assets: raise NotFoundError(f"No assets found matching glob {self.path!r}") - def get_asset_download_path(self, asset: BaseRemoteAsset) -> str: + def get_asset_download_path( + self, asset: BaseRemoteAsset, preserve_tree: bool + ) -> str: return asset.path.lstrip("/") def is_under_download_path(self, path: str) -> bool: @@ -675,6 +695,16 @@ class _dandi_url_parser: "https:///...", ), ] + resource_identifier_primer = """RESOURCE ID/URLS:\n + dandi commands accept URLs and URL-like identifiers called in the following formats for identifying Dandisets, assets, and + asset collections. + + Text in [brackets] is optional. A server field is a base API or GUI URL + for a DANDI Archive instance. If an optional ``version`` field is + omitted from a URL, the given Dandiset's most recent published version + will be used if it has one, and its draft version will be used otherwise. + """ known_patterns = "Accepted resource identifier patterns:" + "\n - ".join( [""] + [display for _, _, display in known_urls] ) diff --git a/lincbrain/download.py b/lincbrain/download.py index 79e2f575f..2df711c78 100644 --- a/lincbrain/download.py +++ b/lincbrain/download.py @@ -93,6 +93,7 @@ def download( jobs_per_zarr: int | None = None, get_metadata: bool = True, get_assets: bool = True, + preserve_tree: bool = False, sync: bool = False, path_type: PathType = PathType.EXACT, ) -> None: @@ -141,6 +142,7 @@ def download( existing=existing, get_metadata=get_metadata, get_assets=get_assets, + preserve_tree=preserve_tree, jobs_per_zarr=jobs_per_zarr, on_error="yield" if format is DownloadFormat.PYOUT else "raise", **kw, @@ -201,6 +203,7 @@ class Downloader: existing: DownloadExisting get_metadata: bool get_assets: bool + preserve_tree: bool jobs_per_zarr: int | None on_error: Literal["raise", "yield"] #: which will be set .gen to assets. Purpose is to make it possible to get @@ -214,19 +217,24 @@ def __post_init__(self, output_dir: str | Path) -> None: # TODO: if we are ALREADY in a dandiset - we can validate that it is # the same dandiset and use that dandiset path as the one to download # under - if isinstance(self.url, DandisetURL): + if isinstance(self.url, DandisetURL) or ( + self.preserve_tree and self.url.dandiset_id is not None + ): assert self.url.dandiset_id is not None self.output_prefix = Path(self.url.dandiset_id) else: self.output_prefix = Path() self.output_path = Path(output_dir, self.output_prefix) + def is_dandiset_yaml(self) -> bool: + return isinstance(self.url, AssetItemURL) and self.url.path == "dandiset.yaml" + def download_generator(self) -> Iterator[dict]: """ A generator for downloads of files, folders, or entire dandiset from DANDI (as identified by URL) - This function is a generator which would yield records on ongoing + This function is a generator which yields records on ongoing activities. Activities include traversal of the remote resource (DANDI archive), download of individual assets while yielding records (TODO: schema) while validating their checksums "on the fly", etc. @@ -235,10 +243,8 @@ def download_generator(self) -> Iterator[dict]: with self.url.navigate(strict=True) as (client, dandiset, assets): if ( isinstance(self.url, DandisetURL) - or ( - isinstance(self.url, AssetItemURL) - and self.url.path == "dandiset.yaml" - ) + or self.is_dandiset_yaml() + or self.preserve_tree ) and self.get_metadata: assert dandiset is not None for resp in _populate_dandiset_yaml( @@ -248,7 +254,7 @@ def download_generator(self) -> Iterator[dict]: "path": str(self.output_prefix / dandiset_metadata_file), **resp, } - if isinstance(self.url, AssetItemURL): + if self.is_dandiset_yaml(): return # TODO: do analysis of assets for early detection of needed renames @@ -262,7 +268,9 @@ def download_generator(self) -> Iterator[dict]: assets = self.assets_it.feed(assets) lock = Lock() for asset in assets: - path = self.url.get_asset_download_path(asset) + path = self.url.get_asset_download_path( + asset, preserve_tree=self.preserve_tree + ) self.asset_download_paths.add(path) download_path = Path(self.output_path, path) path = str(self.output_prefix / path) @@ -481,8 +489,8 @@ def agg_done(self, done_sizes: Iterator[int]) -> list[str]: return v -def _skip_file(msg: Any) -> dict: - return {"status": "skipped", "message": str(msg)} +def _skip_file(msg: Any, **kwargs: Any) -> dict: + return {"status": "skipped", "message": str(msg), **kwargs} def _populate_dandiset_yaml( @@ -514,7 +522,7 @@ def _populate_dandiset_yaml( existing is DownloadExisting.REFRESH and os.lstat(dandiset_yaml).st_mtime >= mtime.timestamp() ): - yield _skip_file("already exists") + yield _skip_file("already exists", size=os.lstat(dandiset_yaml).st_mtime) return ds = Dandiset(dandiset_path, allow_empty=True) ds.path_obj.mkdir(exist_ok=True) # exist_ok in case of parallel race @@ -637,7 +645,7 @@ def _download_file( # but mtime is different if same == ["mtime", "size"]: # TODO: add recording and handling of .nwb object_id - yield _skip_file("same time and size") + yield _skip_file("same time and size", size=size) return lgr.debug(f"{path!r} - same attributes: {same}. Redownloading") @@ -878,33 +886,40 @@ def _download_zarr( # Avoid heavy import by importing within function: from .support.digests import get_zarr_checksum - download_gens = {} - entries = list(asset.iterfiles()) + # we will collect them all while starting the download + # with the first page of entries received from the server. + entries = [] digests = {} + pc = ProgressCombiner(zarr_size=asset.size) def digest_callback(path: str, algoname: str, d: str) -> None: if algoname == "md5": digests[path] = d - for entry in entries: - etag = entry.digest - assert etag.algorithm is DigestType.md5 - download_gens[str(entry)] = _download_file( - entry.get_download_file_iter(), - download_path / str(entry), - toplevel_path=toplevel_path, - size=entry.size, - mtime=entry.modified, - existing=existing, - digests={"md5": etag.value}, - lock=lock, - digest_callback=partial(digest_callback, str(entry)), - ) + def downloads_gen(): + for entry in asset.iterfiles(): + entries.append(entry) + etag = entry.digest + assert etag.algorithm is DigestType.md5 + yield pairing( + str(entry), + _download_file( + entry.get_download_file_iter(), + download_path / str(entry), + toplevel_path=toplevel_path, + size=entry.size, + mtime=entry.modified, + existing=existing, + digests={"md5": etag.value}, + lock=lock, + digest_callback=partial(digest_callback, str(entry)), + ), + ) + pc.file_qty = len(entries) - pc = ProgressCombiner(zarr_size=asset.size, file_qty=len(download_gens)) final_out: dict | None = None with interleave( - [pairing(p, gen) for p, gen in download_gens.items()], + downloads_gen(), onerror=FINISH_CURRENT, max_workers=jobs or 4, ) as it: @@ -988,7 +1003,9 @@ class DownloadProgress: @dataclass class ProgressCombiner: zarr_size: int - file_qty: int + file_qty: int | None = ( + None # set to specific known value whenever full sweep is complete + ) files: dict[str, DownloadProgress] = field(default_factory=dict) #: Total size of all files that were not skipped and did not error out #: during download @@ -1021,18 +1038,25 @@ def get_done(self) -> dict: total_downloaded = sum( s.downloaded for s in self.files.values() - if s.state in (DLState.DOWNLOADING, DLState.CHECKSUM_ERROR, DLState.DONE) + if s.state + in ( + DLState.DOWNLOADING, + DLState.CHECKSUM_ERROR, + DLState.SKIPPED, + DLState.DONE, + ) ) return { "done": total_downloaded, - "done%": total_downloaded / self.maxsize * 100, + "done%": total_downloaded / self.zarr_size * 100 if self.zarr_size else 0, } def set_status(self, statusdict: dict) -> None: state_qtys = Counter(s.state for s in self.files.values()) total = len(self.files) if ( - total == self.file_qty + self.file_qty is not None # if already known + and total == self.file_qty and state_qtys[DLState.STARTING] == state_qtys[DLState.DOWNLOADING] == 0 ): # All files have finished @@ -1053,16 +1077,25 @@ def set_status(self, statusdict: dict) -> None: def feed(self, path: str, status: dict) -> Iterator[dict]: keys = list(status.keys()) self.files.setdefault(path, DownloadProgress()) + size = status.get("size") + if size is not None: + if not self.yielded_size: + # this thread will yield + self.yielded_size = True + yield {"size": self.zarr_size} if status.get("status") == "skipped": self.files[path].state = DLState.SKIPPED out = {"message": self.message} + # Treat skipped as "downloaded" for the matter of accounting + if size is not None: + self.files[path].downloaded = size + self.maxsize += size self.set_status(out) yield out + if self.zarr_size: + yield self.get_done() elif keys == ["size"]: - if not self.yielded_size: - yield {"size": self.zarr_size} - self.yielded_size = True - self.files[path].size = status["size"] + self.files[path].size = size self.maxsize += status["size"] if any(s.state is DLState.DOWNLOADING for s in self.files.values()): yield self.get_done() diff --git a/lincbrain/metadata/util.py b/lincbrain/metadata/util.py index 8849a9164..d3f8f95b4 100644 --- a/lincbrain/metadata/util.py +++ b/lincbrain/metadata/util.py @@ -11,6 +11,7 @@ from dandischema import models import requests import tenacity +from yarl import URL from .. import __version__ from ..utils import ensure_datetime @@ -583,6 +584,29 @@ def extract_digest(metadata: dict) -> dict[models.DigestType, str] | None: return None +def extract_related_resource(metadata: dict) -> list[models.Resource] | None: + pubs = metadata.get("related_publications") + if not isinstance(pubs, (list, tuple)): + return None + related = [] + for v in pubs: + if not isinstance(v, str): + continue + try: + u = URL(v) + except ValueError: + continue + if u.scheme not in ("http", "https") or u.host != "doi.org": + continue + related.append( + models.Resource( + identifier=v, + relation=models.RelationType.IsDescribedBy, + ) + ) + return related + + FIELD_EXTRACTORS: dict[str, Callable[[dict], Any]] = { "wasDerivedFrom": extract_wasDerivedFrom, "wasAttributedTo": extract_wasAttributedTo, @@ -595,6 +619,7 @@ def extract_digest(metadata: dict) -> dict[models.DigestType, str] | None: "anatomy": extract_anatomy, "digest": extract_digest, "species": extract_species, + "relatedResource": extract_related_resource, } diff --git a/lincbrain/tests/data/dandiarchive-docker/docker-compose.yml b/lincbrain/tests/data/dandiarchive-docker/docker-compose.yml index 03ae76d6c..e4ba36c56 100644 --- a/lincbrain/tests/data/dandiarchive-docker/docker-compose.yml +++ b/lincbrain/tests/data/dandiarchive-docker/docker-compose.yml @@ -4,8 +4,6 @@ # , # but using images uploaded to Docker Hub instead of building them locally. -version: '2.1' - services: django: image: dandiarchive/dandiarchive-api @@ -19,7 +17,7 @@ services: condition: service_healthy rabbitmq: condition: service_started - environment: + environment: &django_env DJANGO_CELERY_BROKER_URL: amqp://rabbitmq:5672/ DJANGO_CONFIGURATION: DevelopmentConfiguration DJANGO_DANDI_DANDISETS_BUCKET_NAME: dandi-dandisets @@ -32,13 +30,14 @@ services: DJANGO_MINIO_STORAGE_SECRET_KEY: minioSecretKey DJANGO_STORAGE_BUCKET_NAME: django-storage DJANGO_MINIO_STORAGE_MEDIA_URL: http://localhost:9000/django-storage - DJANGO_DANDI_SCHEMA_VERSION: + DJANGO_DANDI_SCHEMA_VERSION: ~ DJANGO_DANDI_WEB_APP_URL: http://localhost:8085 DJANGO_DANDI_API_URL: http://localhost:8000 DJANGO_DANDI_JUPYTERHUB_URL: https://hub.dandiarchive.org + DJANGO_DANDI_DEV_EMAIL: "test@example.com" DANDI_ALLOW_LOCALHOST_URLS: "1" ports: - - "8000:8000" + - "127.0.0.1:8000:8000" celery: image: dandiarchive/dandiarchive-api @@ -62,24 +61,8 @@ services: rabbitmq: condition: service_started environment: - DJANGO_CELERY_BROKER_URL: amqp://rabbitmq:5672/ - DJANGO_CONFIGURATION: DevelopmentConfiguration - DJANGO_DANDI_DANDISETS_BUCKET_NAME: dandi-dandisets - DJANGO_DANDI_DANDISETS_LOG_BUCKET_NAME: dandiapi-dandisets-logs - DJANGO_DANDI_DANDISETS_EMBARGO_BUCKET_NAME: dandi-embargoed-dandisets - DJANGO_DANDI_DANDISETS_EMBARGO_LOG_BUCKET_NAME: dandiapi-embargo-dandisets-logs - DJANGO_DATABASE_URL: postgres://postgres:postgres@postgres:5432/django - DJANGO_MINIO_STORAGE_ACCESS_KEY: minioAccessKey - DJANGO_MINIO_STORAGE_ENDPOINT: minio:9000 - DJANGO_MINIO_STORAGE_SECRET_KEY: minioSecretKey - DJANGO_STORAGE_BUCKET_NAME: django-storage - DJANGO_MINIO_STORAGE_MEDIA_URL: http://localhost:9000/django-storage - DJANGO_DANDI_SCHEMA_VERSION: + << : *django_env DJANGO_DANDI_VALIDATION_JOB_INTERVAL: "5" - DJANGO_DANDI_WEB_APP_URL: http://localhost:8085 - DJANGO_DANDI_API_URL: http://localhost:8000 - DJANGO_DANDI_JUPYTERHUB_URL: https://hub.dandiarchive.org - DANDI_ALLOW_LOCALHOST_URLS: "1" minio: image: minio/minio:RELEASE.2022-04-12T06-55-35Z @@ -87,7 +70,7 @@ services: tty: true command: ["server", "/data"] ports: - - "9000:9000" + - "127.0.0.1:9000:9000" environment: MINIO_ACCESS_KEY: minioAccessKey MINIO_SECRET_KEY: minioSecretKey @@ -102,8 +85,8 @@ services: POSTGRES_DB: django POSTGRES_PASSWORD: postgres image: postgres - ports: - - "5432:5432" + expose: + - "5432" healthcheck: test: ["CMD", "pg_isready", "-U", "postgres"] interval: 7s @@ -112,5 +95,5 @@ services: rabbitmq: image: rabbitmq:management - ports: - - "5672:5672" + expose: + - "5672" diff --git a/lincbrain/tests/data/metadata/metadata2asset_3.json b/lincbrain/tests/data/metadata/metadata2asset_3.json index f3e844133..44b809a69 100644 --- a/lincbrain/tests/data/metadata/metadata2asset_3.json +++ b/lincbrain/tests/data/metadata/metadata2asset_3.json @@ -92,5 +92,12 @@ "name": "Cyperus bulbosus" } } + ], + "relatedResource": [ + { + "schemaKey": "Resource", + "identifier": "https://doi.org/10.48324/dandi.000027/0.210831.2033", + "relation": "dcite:IsDescribedBy" + } ] } diff --git a/lincbrain/tests/data/metadata/metadata2asset_simple1.json b/lincbrain/tests/data/metadata/metadata2asset_simple1.json index 04babc1a1..931779db5 100644 --- a/lincbrain/tests/data/metadata/metadata2asset_simple1.json +++ b/lincbrain/tests/data/metadata/metadata2asset_simple1.json @@ -42,5 +42,6 @@ "schemaKey": "Participant", "identifier": "sub-01" } - ] + ], + "relatedResource": [] } diff --git a/lincbrain/tests/fixtures.py b/lincbrain/tests/fixtures.py index 4320042ec..6f528b0ba 100644 --- a/lincbrain/tests/fixtures.py +++ b/lincbrain/tests/fixtures.py @@ -394,16 +394,41 @@ def docker_compose_setup() -> Iterator[dict[str, str]]: try: if create: if os.environ.get("DANDI_TESTS_PULL_DOCKER_COMPOSE", "1") not in ("", "0"): - run(["docker-compose", "pull"], cwd=str(LOCAL_DOCKER_DIR), check=True) + run( + ["docker", "compose", "pull"], cwd=str(LOCAL_DOCKER_DIR), check=True + ) run( - ["docker-compose", "run", "--rm", "django", "./manage.py", "migrate"], + [ + "docker", + "compose", + "run", + "--rm", + "django", + "./manage.py", + "migrate", + ], cwd=str(LOCAL_DOCKER_DIR), env=env, check=True, ) run( [ - "docker-compose", + "docker", + "compose", + "run", + "--rm", + "django", + "./manage.py", + "createcachetable", + ], + cwd=str(LOCAL_DOCKER_DIR), + env=env, + check=True, + ) + run( + [ + "docker", + "compose", "run", "--rm", "-e", @@ -422,7 +447,8 @@ def docker_compose_setup() -> Iterator[dict[str, str]]: r = check_output( [ - "docker-compose", + "docker", + "compose", "run", "--rm", "-T", @@ -444,7 +470,7 @@ def docker_compose_setup() -> Iterator[dict[str, str]]: if create: run( - ["docker-compose", "up", "-d", "django", "celery"], + ["docker", "compose", "up", "-d", "django", "celery"], cwd=str(LOCAL_DOCKER_DIR), env=env, check=True, @@ -462,7 +488,11 @@ def docker_compose_setup() -> Iterator[dict[str, str]]: yield {"django_api_key": django_api_key} finally: if persist in (None, "0"): - run(["docker-compose", "down", "-v"], cwd=str(LOCAL_DOCKER_DIR), check=True) + run( + ["docker", "compose", "down", "-v"], + cwd=str(LOCAL_DOCKER_DIR), + check=True, + ) @dataclass @@ -534,6 +564,7 @@ def mkdandiset(self, name: str, embargo: bool = False) -> SampleDandiset: { "schemaKey": "Person", "name": "Tests, Dandi-Cli", + "email": "nemo@example.com", "roleName": ["dcite:Author", "dcite:ContactPerson"], } ], diff --git a/lincbrain/tests/skip.py b/lincbrain/tests/skip.py index 0e08b5f99..54d864b76 100644 --- a/lincbrain/tests/skip.py +++ b/lincbrain/tests/skip.py @@ -101,7 +101,7 @@ def windows(): def no_docker_commands(): missing_cmds = [] - for cmd in ("docker", "docker-compose"): + for cmd in ("docker",): if shutil.which(cmd) is None: missing_cmds.append(cmd) msg = "missing Docker commands: {}".format(", ".join(missing_cmds)) diff --git a/lincbrain/tests/test_dandiapi.py b/lincbrain/tests/test_dandiapi.py index 18339907b..5a39cc953 100644 --- a/lincbrain/tests/test_dandiapi.py +++ b/lincbrain/tests/test_dandiapi.py @@ -7,6 +7,7 @@ import random import re from shutil import rmtree +from typing import Any import anys import click @@ -46,7 +47,7 @@ def test_upload( d.upload_raw_asset(simple1_nwb, {"path": "testing/simple1.nwb"}) (asset,) = d.get_assets() assert asset.path == "testing/simple1.nwb" - d.download_directory("", tmp_path) + d.download_directory("./", tmp_path) paths = list_paths(tmp_path) assert paths == [tmp_path / "testing" / "simple1.nwb"] assert paths[0].stat().st_size == simple1_nwb.stat().st_size @@ -678,26 +679,49 @@ def test_get_assets_order(text_dandiset: SampleDandiset) -> None: def test_get_assets_with_path_prefix(text_dandiset: SampleDandiset) -> None: - assert sorted( - asset.path - for asset in text_dandiset.dandiset.get_assets_with_path_prefix("subdir") - ) == ["subdir1/apple.txt", "subdir2/banana.txt", "subdir2/coconut.txt"] - assert sorted( - asset.path - for asset in text_dandiset.dandiset.get_assets_with_path_prefix("subdir2") - ) == ["subdir2/banana.txt", "subdir2/coconut.txt"] - assert [ - asset.path - for asset in text_dandiset.dandiset.get_assets_with_path_prefix( - "subdir", order="path" - ) - ] == ["subdir1/apple.txt", "subdir2/banana.txt", "subdir2/coconut.txt"] - assert [ - asset.path - for asset in text_dandiset.dandiset.get_assets_with_path_prefix( - "subdir", order="-path" - ) - ] == ["subdir2/coconut.txt", "subdir2/banana.txt", "subdir1/apple.txt"] + def _get_assets_with_path_prefix(prefix: str, **kw: Any) -> list[str]: + return [ + asset.path + for asset in text_dandiset.dandiset.get_assets_with_path_prefix( + prefix, **kw + ) + ] + + assert ( + sorted(_get_assets_with_path_prefix("subdir")) + == sorted(_get_assets_with_path_prefix("./subdir")) + == sorted(_get_assets_with_path_prefix("./subdir2/../sub")) + == [ + "subdir1/apple.txt", + "subdir2/banana.txt", + "subdir2/coconut.txt", + ] + ) + assert ( + _get_assets_with_path_prefix("subdir/") + == _get_assets_with_path_prefix("./subdir/") + == _get_assets_with_path_prefix("../subdir1/") + == [] + ) + assert ( + sorted(_get_assets_with_path_prefix("subdir2")) + == sorted(_get_assets_with_path_prefix("a/../subdir2")) + == sorted(_get_assets_with_path_prefix("./subdir2")) + == [ + "subdir2/banana.txt", + "subdir2/coconut.txt", + ] + ) + assert _get_assets_with_path_prefix("subdir", order="path") == [ + "subdir1/apple.txt", + "subdir2/banana.txt", + "subdir2/coconut.txt", + ] + assert _get_assets_with_path_prefix("subdir", order="-path") == [ + "subdir2/coconut.txt", + "subdir2/banana.txt", + "subdir1/apple.txt", + ] def test_get_assets_by_glob(text_dandiset: SampleDandiset) -> None: diff --git a/lincbrain/tests/test_download.py b/lincbrain/tests/test_download.py index 07caaf117..2a8c044ce 100644 --- a/lincbrain/tests/test_download.py +++ b/lincbrain/tests/test_download.py @@ -172,6 +172,28 @@ def test_download_folder(text_dandiset: SampleDandiset, tmp_path: Path) -> None: assert (tmp_path / "subdir2" / "coconut.txt").read_text() == "Coconut\n" +def test_download_folder_preserve_tree( + text_dandiset: SampleDandiset, tmp_path: Path +) -> None: + dandiset_id = text_dandiset.dandiset_id + download( + f"dandi://{text_dandiset.api.instance_id}/{dandiset_id}/subdir2/", + tmp_path, + preserve_tree=True, + ) + assert list_paths(tmp_path, dirs=True) == [ + tmp_path / dandiset_id, + tmp_path / dandiset_id / "dandiset.yaml", + tmp_path / dandiset_id / "subdir2", + tmp_path / dandiset_id / "subdir2" / "banana.txt", + tmp_path / dandiset_id / "subdir2" / "coconut.txt", + ] + assert (tmp_path / dandiset_id / "subdir2" / "banana.txt").read_text() == "Banana\n" + assert ( + tmp_path / dandiset_id / "subdir2" / "coconut.txt" + ).read_text() == "Coconut\n" + + def test_download_item(text_dandiset: SampleDandiset, tmp_path: Path) -> None: dandiset_id = text_dandiset.dandiset_id download( @@ -182,6 +204,26 @@ def test_download_item(text_dandiset: SampleDandiset, tmp_path: Path) -> None: assert (tmp_path / "coconut.txt").read_text() == "Coconut\n" +def test_download_item_preserve_tree( + text_dandiset: SampleDandiset, tmp_path: Path +) -> None: + dandiset_id = text_dandiset.dandiset_id + download( + f"dandi://{text_dandiset.api.instance_id}/{dandiset_id}/subdir2/coconut.txt", + tmp_path, + preserve_tree=True, + ) + assert list_paths(tmp_path, dirs=True) == [ + tmp_path / dandiset_id, + tmp_path / dandiset_id / "dandiset.yaml", + tmp_path / dandiset_id / "subdir2", + tmp_path / dandiset_id / "subdir2" / "coconut.txt", + ] + assert ( + tmp_path / dandiset_id / "subdir2" / "coconut.txt" + ).read_text() == "Coconut\n" + + def test_download_dandiset_yaml(text_dandiset: SampleDandiset, tmp_path: Path) -> None: dandiset_id = text_dandiset.dandiset_id download( @@ -227,7 +269,7 @@ def test_download_sync( dspath = tmp_path / text_dandiset.dandiset_id os.rename(text_dandiset.dspath, dspath) confirm_mock = mocker.patch( - "lincbrain.download.abbrev_prompt", return_value="yes" if confirm else "no" + "dandi.download.abbrev_prompt", return_value="yes" if confirm else "no" ) download( f"dandi://{text_dandiset.api.instance_id}/{text_dandiset.dandiset_id}", @@ -247,7 +289,7 @@ def test_download_sync_folder( ) -> None: text_dandiset.dandiset.get_asset_by_path("file.txt").delete() text_dandiset.dandiset.get_asset_by_path("subdir2/banana.txt").delete() - confirm_mock = mocker.patch("lincbrain.download.abbrev_prompt", return_value="yes") + confirm_mock = mocker.patch("dandi.download.abbrev_prompt", return_value="yes") download( f"dandi://{text_dandiset.api.instance_id}/{text_dandiset.dandiset_id}/subdir2/", text_dandiset.dspath, @@ -268,7 +310,7 @@ def test_download_sync_list( text_dandiset.dandiset.get_asset_by_path("file.txt").delete() dspath = tmp_path / text_dandiset.dandiset_id os.rename(text_dandiset.dspath, dspath) - input_mock = mocker.patch("lincbrain.utils.input", side_effect=["list", "yes"]) + input_mock = mocker.patch("dandi.utils.input", side_effect=["list", "yes"]) download( f"dandi://{text_dandiset.api.instance_id}/{text_dandiset.dandiset_id}", tmp_path, @@ -289,7 +331,7 @@ def test_download_sync_zarr( zarr_dandiset.dandiset.get_asset_by_path("sample.zarr").delete() dspath = tmp_path / zarr_dandiset.dandiset_id os.rename(zarr_dandiset.dspath, dspath) - confirm_mock = mocker.patch("lincbrain.download.abbrev_prompt", return_value="yes") + confirm_mock = mocker.patch("dandi.download.abbrev_prompt", return_value="yes") download( zarr_dandiset.dandiset.version_api_url, tmp_path, @@ -330,6 +372,7 @@ def test_download_metadata404(text_dandiset: SampleDandiset, tmp_path: Path) -> existing=DownloadExisting.ERROR, get_metadata=True, get_assets=True, + preserve_tree=False, jobs_per_zarr=None, on_error="raise", ).download_generator() @@ -485,9 +528,10 @@ def test_download_zarr_subdir_has_only_subdirs( @pytest.mark.parametrize( - "file_qty,inputs,expected", + "zarr_size,file_qty,inputs,expected", [ - ( + ( # 0 + 42, 1, [ ("lonely.txt", {"size": 42}), @@ -501,7 +545,7 @@ def test_download_zarr_subdir_has_only_subdirs( ("lonely.txt", {"status": "done"}), ], [ - {"size": 69105}, + {"size": 42}, {"status": "downloading"}, {"done": 0, "done%": 0.0}, {"done": 20, "done%": 20 / 42 * 100}, @@ -510,7 +554,8 @@ def test_download_zarr_subdir_has_only_subdirs( {"status": "done", "message": "1 done"}, ], ), - ( + ( # 1 + 169, 2, [ ("apple.txt", {"size": 42}), @@ -534,7 +579,7 @@ def test_download_zarr_subdir_has_only_subdirs( ("banana.txt", {"status": "done"}), ], [ - {"size": 69105}, + {"size": 169}, {"status": "downloading"}, {"done": 0, "done%": 0.0}, {"done": 0, "done%": 0.0}, @@ -549,7 +594,8 @@ def test_download_zarr_subdir_has_only_subdirs( {"status": "done", "message": "2 done"}, ], ), - ( + ( # 2 + 169, 2, [ ("apple.txt", {"size": 42}), @@ -573,10 +619,10 @@ def test_download_zarr_subdir_has_only_subdirs( ("banana.txt", {"status": "done"}), ], [ - {"size": 69105}, + {"size": 169}, {"status": "downloading"}, {"done": 0, "done%": 0.0}, - {"done": 20, "done%": 20 / 42 * 100}, + {"done": 20, "done%": 20 / 169 * 100}, {"done": 20, "done%": 20 / 169 * 100}, {"done": 40, "done%": 40 / 169 * 100}, {"done": 42, "done%": 42 / 169 * 100}, @@ -589,7 +635,8 @@ def test_download_zarr_subdir_has_only_subdirs( {"status": "done", "message": "2 done"}, ], ), - ( + ( # 3 + 169, 2, [ ("apple.txt", {"size": 42}), @@ -613,12 +660,12 @@ def test_download_zarr_subdir_has_only_subdirs( ("banana.txt", {"status": "done"}), ], [ - {"size": 69105}, + {"size": 169}, {"status": "downloading"}, {"done": 0, "done%": 0.0}, - {"done": 20, "done%": 20 / 42 * 100}, - {"done": 40, "done%": 40 / 42 * 100}, - {"done": 42, "done%": 42 / 42 * 100}, + {"done": 20, "done%": 20 / 169 * 100}, + {"done": 40, "done%": 40 / 169 * 100}, + {"done": 42, "done%": 42 / 169 * 100}, {"message": "1 done"}, {"done": 42, "done%": 42 / 169 * 100}, {"done": 82, "done%": 82 / 169 * 100}, @@ -628,7 +675,8 @@ def test_download_zarr_subdir_has_only_subdirs( {"status": "done", "message": "2 done"}, ], ), - ( + ( # 4 + 169, 2, [ ("apple.txt", {"size": 42}), @@ -647,7 +695,7 @@ def test_download_zarr_subdir_has_only_subdirs( ("apple.txt", {"status": "done"}), ], [ - {"size": 69105}, + {"size": 169}, {"status": "downloading"}, {"done": 0, "done%": 0.0}, {"done": 0, "done%": 0.0}, @@ -655,21 +703,26 @@ def test_download_zarr_subdir_has_only_subdirs( {"done": 60, "done%": 60 / 169 * 100}, {"done": 80, "done%": 80 / 169 * 100}, {"message": "1 errored"}, - {"done": 40, "done%": 40 / 42 * 100}, - {"done": 42, "done%": 100.0}, + {"done": 40, "done%": 40 / 169 * 100}, + {"done": 42, "done%": 42 / 169 * 100}, {"status": "error", "message": "1 done, 1 errored"}, ], ), - ( + ( # 5 + 0, 1, [("lonely.txt", {"status": "skipped", "message": "already exists"})], [{"status": "skipped", "message": "1 skipped"}], ), - ( + ( # 6 + 169, 2, [ ("apple.txt", {"size": 42}), - ("banana.txt", {"status": "skipped", "message": "already exists"}), + ( + "banana.txt", + {"size": 127, "status": "skipped", "message": "already exists"}, + ), ("apple.txt", {"status": "downloading"}), ("apple.txt", {"done": 0, "done%": 0.0}), ("apple.txt", {"done": 20, "done%": 20 / 42 * 100}), @@ -680,17 +733,19 @@ def test_download_zarr_subdir_has_only_subdirs( ("apple.txt", {"status": "done"}), ], [ - {"size": 69105}, + {"size": 169}, {"message": "1 skipped"}, + {"done": 127, "done%": (127 + 0) / 169 * 100}, {"status": "downloading"}, - {"done": 0, "done%": 0.0}, - {"done": 20, "done%": 20 / 42 * 100}, - {"done": 40, "done%": 40 / 42 * 100}, - {"done": 42, "done%": 100.0}, + {"done": 127 + 0, "done%": (127 + 0) / 169 * 100}, + {"done": 127 + 20, "done%": (127 + 20) / 169 * 100}, + {"done": 127 + 40, "done%": (127 + 40) / 169 * 100}, + {"done": 127 + 42, "done%": 100.0}, {"status": "done", "message": "1 done, 1 skipped"}, ], ), - ( + ( # 7 + 169, 2, [ ("apple.txt", {"size": 42}), @@ -719,7 +774,7 @@ def test_download_zarr_subdir_has_only_subdirs( ("apple.txt", {"status": "done"}), ], [ - {"size": 69105}, + {"size": 169}, {"status": "downloading"}, {"done": 0, "done%": 0.0}, {"done": 0, "done%": 0.0}, @@ -734,14 +789,18 @@ def test_download_zarr_subdir_has_only_subdirs( {"status": "error", "message": "1 done, 1 errored"}, ], ), - ( + ( # 8 + 179, 3, [ ("apple.txt", {"size": 42}), ("banana.txt", {"size": 127}), ("apple.txt", {"status": "downloading"}), ("banana.txt", {"status": "downloading"}), - ("coconut", {"status": "skipped", "message": "already exists"}), + ( + "coconut", + {"size": 10, "status": "skipped", "message": "already exists"}, + ), ("apple.txt", {"done": 0, "done%": 0.0}), ("banana.txt", {"done": 0, "done%": 0.0}), ("apple.txt", {"done": 20, "done%": 20 / 42 * 100}), @@ -764,28 +823,29 @@ def test_download_zarr_subdir_has_only_subdirs( ("banana.txt", {"status": "done"}), ], [ - {"size": 69105}, + {"size": 179}, {"status": "downloading"}, {"message": "1 skipped"}, - {"done": 0, "done%": 0.0}, - {"done": 0, "done%": 0.0}, - {"done": 20, "done%": 20 / 169 * 100}, - {"done": 60, "done%": 60 / 169 * 100}, - {"done": 80, "done%": 80 / 169 * 100}, - {"done": 120, "done%": 120 / 169 * 100}, - {"done": 122, "done%": 122 / 169 * 100}, + {"done": 10, "done%": 10 / 179 * 100}, + {"done": 10, "done%": 10 / 179 * 100}, + {"done": 10, "done%": 10 / 179 * 100}, + {"done": 10 + 20, "done%": (10 + 20) / 179 * 100}, + {"done": 10 + 60, "done%": (10 + 60) / 179 * 100}, + {"done": 10 + 80, "done%": (10 + 80) / 179 * 100}, + {"done": 10 + 120, "done%": (10 + 120) / 179 * 100}, + {"done": 10 + 122, "done%": (10 + 122) / 179 * 100}, {"message": "1 errored, 1 skipped"}, - {"done": 162, "done%": 162 / 169 * 100}, - {"done": 169, "done%": 100.0}, + {"done": 10 + 162, "done%": (10 + 162) / 179 * 100}, + {"done": 179, "done%": 100.0}, {"status": "error", "message": "1 done, 1 errored, 1 skipped"}, ], ), ], ) def test_progress_combiner( - file_qty: int, inputs: list[tuple[str, dict]], expected: list[dict] + zarr_size: int, file_qty: int, inputs: list[tuple[str, dict]], expected: list[dict] ) -> None: - pc = ProgressCombiner(zarr_size=69105, file_qty=file_qty) + pc = ProgressCombiner(zarr_size=zarr_size, file_qty=file_qty) outputs: list[dict] = [] for path, status in inputs: outputs.extend(pc.feed(path, status)) @@ -865,7 +925,7 @@ def test_download_sync_glob( ) -> None: text_dandiset.dandiset.get_asset_by_path("file.txt").delete() text_dandiset.dandiset.get_asset_by_path("subdir2/banana.txt").delete() - confirm_mock = mocker.patch("lincbrain.download.abbrev_prompt", return_value="yes") + confirm_mock = mocker.patch("dandi.download.abbrev_prompt", return_value="yes") download( f"{text_dandiset.dandiset.version_api_url}assets/?glob=s*.Txt", text_dandiset.dspath, @@ -968,4 +1028,4 @@ def test_pyouthelper_time_remaining_1339(): # once done, dont print ETA assert len(done) == 2 else: - assert done[-1] == f"ETA: {10-i} seconds<" # noqa: E226 + assert done[-1] == f"ETA: {10 - i} seconds<" diff --git a/lincbrain/tests/test_metadata.py b/lincbrain/tests/test_metadata.py index 07c19b24a..8c5229bd9 100644 --- a/lincbrain/tests/test_metadata.py +++ b/lincbrain/tests/test_metadata.py @@ -29,6 +29,7 @@ from dateutil.tz import tzutc from pydantic import ByteSize import pytest +import requests from semantic_version import Version from .fixtures import SampleDandiset @@ -53,6 +54,13 @@ METADATA_DIR = Path(__file__).with_name("data") / "metadata" +mark_xfail_ontobee = pytest.mark.xfail( + condition="not config.getoption('--scheduled')", + reason="Flaky ontobee site", + strict=False, + raises=requests.RequestException, +) + def test_get_metadata(simple1_nwb: Path, simple1_nwb_metadata: dict[str, Any]) -> None: target_metadata = simple1_nwb_metadata.copy() @@ -234,6 +242,7 @@ def test_timedelta2duration(td: timedelta, duration: str) -> None: assert timedelta2duration(td) == duration +@mark_xfail_ontobee @mark.skipif_no_network @pytest.mark.parametrize( "filename, metadata", @@ -323,7 +332,9 @@ def test_timedelta2duration(td: timedelta, duration: str) -> None: "institution": "University College", "keywords": ["test", "sample", "example", "test-case"], "lab": "Retriever Laboratory", - "related_publications": "A Brief History of Test Cases", + "related_publications": [ + "https://doi.org/10.48324/dandi.000027/0.210831.2033" + ], "session_description": "Some test data", "session_id": "XYZ789", "session_start_time": "2020-08-31T15:58:28-04:00", @@ -459,6 +470,7 @@ def test_time_extract_gest() -> None: ) +@mark_xfail_ontobee @mark.skipif_no_network @pytest.mark.obolibrary @pytest.mark.parametrize( @@ -489,6 +501,7 @@ def test_parseobourl(url, value): assert parse_purlobourl(url) == value +@mark_xfail_ontobee @pytest.mark.obolibrary @mark.skipif_no_network def test_species(): @@ -860,6 +873,7 @@ def test_nwb2asset(simple2_nwb: Path) -> None: variableMeasured=[], measurementTechnique=[], approach=[], + relatedResource=[], ) @@ -939,4 +953,5 @@ def test_nwb2asset_remote_asset(nwb_dandiset: SampleDandiset) -> None: variableMeasured=[], measurementTechnique=[], approach=[], + relatedResource=[], ) diff --git a/setup.cfg b/setup.cfg index 06177a72b..64eae7c39 100644 --- a/setup.cfg +++ b/setup.cfg @@ -89,6 +89,7 @@ test = pytest-cov pytest-mock pytest-rerunfailures + pytest-timeout responses != 0.24.0 vcrpy tools=