Skip to content

Commit

Permalink
fix: zenodo bundle download
Browse files Browse the repository at this point in the history
  • Loading branch information
makkus committed Apr 8, 2024
1 parent 845c5e2 commit ca734f9
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 159 deletions.
90 changes: 38 additions & 52 deletions src/kiara_plugin/onboarding/modules/download/zenodo.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
# -*- coding: utf-8 -*-
from typing import TYPE_CHECKING, Any, Dict, Union

from kiara.api import ValueMap
from kiara.exceptions import KiaraException
from kiara.models.values.value import ValueMap
from kiara_plugin.onboarding.modules import OnboardFileBundleModule, OnboardFileModule

if TYPE_CHECKING:
from kiara.models.filesystem import FolderImportConfig, KiaraFile, KiaraFileBundle


class DownloadZenodoFileModule(OnboardFileModule):
"""Download a single file from a github repo."""
"""Download a single file from a Zenodo record."""

_module_type_name = "download.file.from.zenodo"

def create_onboard_inputs_schema(self) -> Dict[str, Any]:

result: Dict[str, Dict[str, Any]] = {
"doi": {"type": "string", "doc": "The DOI."},
"version": {
"type": "string",
"doc": "The version of the record to download.",
"optional": True,
},
"path": {
"type": "string",
"doc": "The path to the file/file name within the dataset.",
Expand All @@ -35,6 +40,13 @@ def retrieve_file(
from kiara_plugin.onboarding.utils.download import download_file

doi = inputs.get_value_data("doi")

version = inputs.get_value_data("version")
if version:
raise NotImplementedError(
"Downloading versioned records is not yet supported."
)

file_path = inputs.get_value_data("path")

if "/zenodo." not in doi:
Expand All @@ -43,6 +55,18 @@ def retrieve_file(
zen = pyzenodo3.Zenodo()
record = zen.find_record_by_doi(doi)

if not file_path:
if len(record.data["files"]) == 1:
file_path = record.data["files"][0]["key"]
else:
msg = "Available files:\n"
for key in record.data["files"]:
msg += f" - {key['key']}\n"

raise KiaraException(
msg=f"Multiple files available in Zenodo record, please specify 'path' input.\n\n{msg}"
)

match = None
for _available_file in record.data["files"]:
if file_path == _available_file["key"]:
Expand Down Expand Up @@ -93,6 +117,11 @@ class DownloadZenodoFileBundleModule(OnboardFileBundleModule):
def create_onboard_inputs_schema(self) -> Dict[str, Any]:
result: Dict[str, Dict[str, Any]] = {
"doi": {"type": "string", "doc": "The DOI."},
"version": {
"type": "string",
"doc": "The version of the record to download. By default, the latest version will be used.",
"optional": True,
},
}
return result

Expand All @@ -105,60 +134,17 @@ def retrieve_archive(
import_config: "FolderImportConfig",
) -> Union["KiaraFile", "KiaraFileBundle"]:

import pyzenodo3

from kiara.models.filesystem import KiaraFile, KiaraFileBundle
from kiara_plugin.onboarding.utils.download import download_file
from kiara_plugin.onboarding.utils.download import download_zenodo_file_bundle

doi = inputs.get_value_data("doi")
version = inputs.get_value_data("version")

if "/zenodo." not in doi:
doi = f"10.5281/zenodo.{doi}"

zen = pyzenodo3.Zenodo()
record = zen.find_record_by_doi(doi)

base_path = KiaraFileBundle.create_tmp_dir()

for _available_file in record.data["files"]:
match = _available_file

url = match["links"]["self"]
checksum = match["checksum"][4:]

file_path = _available_file["key"]
full_path = base_path / file_path

file_name = file_path.split("/")[-1]

# TODO: filter here already, so we don't need to download files we don't want

result_file: KiaraFile
result_file, result_checksum = download_file( # type: ignore
url=url,
target=full_path.as_posix(),
file_name=file_name,
attach_metadata=True,
return_md5_hash=True,
)

if checksum != result_checksum:
raise KiaraException(
msg=f"Can't download file '{file_name}' from zenodo, invalid checksum: {checksum} != {result_checksum}"
)

if not bundle_name:
bundle_name = doi
result = KiaraFileBundle.import_folder(
source=base_path.as_posix(),
result = download_zenodo_file_bundle(
doi=doi,
version=version,
attach_metadata_to_bundle=attach_metadata_to_bundle,
attach_metadata_to_files=attach_metadata_to_files,
bundle_name=bundle_name,
import_config=import_config,
)
if attach_metadata_to_bundle:
result.metadata["zenodo_record_data"] = record.data

if attach_metadata_to_files:
for file in result.included_files.values():
file.metadata["zenodo_record_data"] = record.data

return result
106 changes: 0 additions & 106 deletions src/kiara_plugin/onboarding/modules/zenodo.py

This file was deleted.

71 changes: 70 additions & 1 deletion src/kiara_plugin/onboarding/utils/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from datetime import datetime
from functools import lru_cache
from pathlib import Path
from typing import Dict, List, Mapping, Tuple, Type, Union
from typing import Any, Dict, List, Mapping, Tuple, Type, Union

from pydantic import BaseModel, Field

Expand Down Expand Up @@ -365,3 +365,72 @@ def onboard_file_bundle(
imported_bundle = result

return imported_bundle


def download_zenodo_file_bundle(
doi: str,
version: Union[None, str],
attach_metadata_to_bundle: bool,
attach_metadata_to_files: bool,
bundle_name: Union[str, None] = None,
import_config: Union[None, Mapping[str, Any], FolderImportConfig] = None,
) -> KiaraFileBundle:

import pyzenodo3

from kiara.models.filesystem import KiaraFile, KiaraFileBundle

if "/zenodo." not in doi:
doi = f"10.5281/zenodo.{doi}"

zen = pyzenodo3.Zenodo()

if version:
raise NotImplementedError("Downloading versioned records is not yet supported.")

record = zen.find_record_by_doi(doi)

base_path = KiaraFileBundle.create_tmp_dir()

for _available_file in record.data["files"]:
match = _available_file

url = match["links"]["self"]
checksum = match["checksum"][4:]

file_path = _available_file["key"]
full_path = base_path / file_path

file_name = file_path.split("/")[-1]

# TODO: filter here already, so we don't need to download files we don't want

result_file: KiaraFile
result_file, result_checksum = download_file( # type: ignore
url=url,
target=full_path.as_posix(),
file_name=file_name,
attach_metadata=True,
return_md5_hash=True,
)

if checksum != result_checksum:
raise KiaraException(
msg=f"Can't download file '{file_name}' from zenodo, invalid checksum: {checksum} != {result_checksum}"
)

if not bundle_name:
bundle_name = doi
result = KiaraFileBundle.import_folder(
source=base_path.as_posix(),
bundle_name=bundle_name,
import_config=import_config,
)
if attach_metadata_to_bundle:
result.metadata["zenodo_record_data"] = record.data

if attach_metadata_to_files:
for file in result.included_files.values():
file.metadata["zenodo_record_data"] = record.data

return result

0 comments on commit ca734f9

Please sign in to comment.