Skip to content

Commit

Permalink
[CDF-23393] HTTP download support (#1241)
Browse files Browse the repository at this point in the history
* HTTP download support

* statement moved out from try/except
  • Loading branch information
ronpal authored Nov 28, 2024
1 parent a534e9b commit 041ccdd
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ tags = [
modules = []

[[data]]
repoType = "GitHub"
repo = "cognitedata/toolkit-data"
source = "data/publicdata/pi_timeseries.Table.csv"
repoType = "http"
repo = "https://apps-cdn.cogniteapp.com/toolkit"
source = "publicdata/pi_timeseries.Table.csv"
destination = "raw/timeseries.Table.csv"

[[extra_resources]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ tags = [
modules = []

[[data]]
repoType = "GitHub"
repo = "cognitedata/toolkit-data"
source = "data/publicdata/assets.Table.csv"
repoType = "http"
repo = "https://apps-cdn.cogniteapp.com/toolkit"
source = "publicdata/assets.Table.csv"
destination = "raw/dump.Table.csv"


[[extra_resources]]
location = "cdf_common/data_sets/demo.DataSet.yaml"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ tags = [
]

[[data]]
repoType = "GitHub"
repo = "cognitedata/toolkit-data"
source = "data/publicdata/[work]*.csv"
repoType = "http"
repo = "https://apps-cdn.cogniteapp.com/toolkit/publicdata"
source = "workitem.Table.csv;workorder.Table.csv;workpackage.Table.csv;worktask.Table.csv"
destination = "raw/"

[[extra_resources]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ tags = [
]

[[data]]
repoType = "GitHub"
repo = "cognitedata/toolkit-data"
source = "data/publicdata/files/*.pdf"
repoType = "http"
repo = "https://apps-cdn.cogniteapp.com/toolkit/publicdata/files"
source = "PH-25578-P-4110006-001.pdf;PH-25578-P-4110010-001.pdf;PH-25578-P-4110119-001.pdf;PH-ME-P-0003-001.pdf;PH-ME-P-0004-001.pdf;PH-ME-P-0151-001.pdf;PH-ME-P-0152-001.pdf;PH-ME-P-0153-001.pdf;PH-ME-P-0156-001.pdf;PH-ME-P-0156-002.pdf;PH-ME-P-0160-001.pdf;Processed-PH-25578-P-4110006-001.svg;Processed-PH-25578-P-4110010-001.svg;Processed-PH-ME-P-0152-001.svg;Processed-PH-ME-P-0153-001.svg;Processed-PH-ME-P-0156-001.svg;Processed-PH-ME-P-0156-002.svg;Processed-PH-ME-P-0160-001.svg"
destination = "files/"

[[data]]
repoType = "GitHub"
repo = "cognitedata/toolkit-data"
source = "data/publicdata/valhall_file_metadata.csv"
repoType = "http"
repo = "https://apps-cdn.cogniteapp.com/toolkit"
source = "publicdata/valhall_file_metadata.csv"
destination = "raw/files.Table.csv"

[[extra_resources]]
Expand Down
29 changes: 19 additions & 10 deletions cognite_toolkit/_cdf_tk/commands/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
from cognite_toolkit._cdf_tk.tk_warnings import MediumSeverityWarning
from cognite_toolkit._cdf_tk.utils import humanize_collection, read_yaml_file
from cognite_toolkit._cdf_tk.utils.modules import module_directory_from_path
from cognite_toolkit._cdf_tk.utils.repository import GitHubFileDownloader
from cognite_toolkit._cdf_tk.utils.repository import FileDownloader
from cognite_toolkit._version import __version__

custom_style_fancy = questionary.Style(
Expand All @@ -73,6 +73,12 @@
POINTER = INDENT + "▶"


_FILE_DOWNLOADERS_BY_TYPE: dict[str, type[FileDownloader]] = {
downloader_cls._type: downloader_cls # type: ignore
for downloader_cls in FileDownloader.__subclasses__()
}


class ModulesCommand(ToolkitCommand):
def __init__(self, print_warning: bool = True, skip_tracking: bool = False, silent: bool = False):
super().__init__(print_warning, skip_tracking, silent)
Expand Down Expand Up @@ -117,7 +123,8 @@ def _create(

seen_modules: set[Path] = set()
selected_paths: set[Path] = set()
downloader_by_repo: dict[str, GitHubFileDownloader] = {}
downloader_by_repo: dict[str, FileDownloader] = {}

extra_resources: set[Path] = set()
for package_name, package in selected_packages.items():
print(f"{INDENT}[{'yellow' if mode == 'clean' else 'green'}]Creating {package_name}[/]")
Expand Down Expand Up @@ -152,15 +159,17 @@ def _create(

if module.definition is not None and download_data:
for example_data in module.definition.data:
if example_data.repo_type.casefold() != "github":
self.warn(
MediumSeverityWarning(
f"Unsupported repo type for example data: {example_data.repo_type}"
)
)
continue
if example_data.repo not in downloader_by_repo:
downloader_by_repo[example_data.repo] = GitHubFileDownloader(example_data.repo)
try:
downloader_cls = _FILE_DOWNLOADERS_BY_TYPE[example_data.repo_type]
except KeyError:
self.warn(
MediumSeverityWarning(
f"Unsupported repo type for example data: {example_data.repo_type}"
)
)
continue
downloader_by_repo[example_data.repo] = downloader_cls(example_data.repo)

downloader = downloader_by_repo[example_data.repo]
downloader.copy(example_data.source, target_dir / example_data.destination)
Expand Down
46 changes: 44 additions & 2 deletions cognite_toolkit/_cdf_tk/utils/repository.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import fnmatch
from abc import ABC, abstractmethod
from collections.abc import Iterable
from functools import lru_cache
from pathlib import Path
from typing import Literal
from typing import ClassVar, Literal
from urllib.parse import urljoin

import requests
from requests import Response
Expand All @@ -13,7 +15,21 @@
from cognite_toolkit._cdf_tk.tk_warnings import HTTPWarning


class GitHubFileDownloader:
class FileDownloader(ABC):
_type: ClassVar[str] = "generic"

@abstractmethod
def __init__(self, repo: str, errors: Literal["continue", "raise"] = "continue") -> None:
pass

@abstractmethod
def copy(self, source: str, destination: Path) -> None:
pass


class GitHubFileDownloader(FileDownloader):
_type: ClassVar[str] = "github"

api_url = "https://api.github.com"

def __init__(self, repo: str, errors: Literal["continue", "raise"] = "continue") -> None:
Expand Down Expand Up @@ -70,3 +86,29 @@ def _download_file(self, url: str, source: Path, destination: Path) -> None:
destination_path = destination / source.name
destination_path.parent.mkdir(parents=True, exist_ok=True)
destination_path.write_bytes(response.content)


class HttpFileDownloader(FileDownloader):
_type: ClassVar[str] = "http"

def __init__(self, repo: str, errors: Literal["continue", "raise"] = "raise") -> None:
self.repo = repo
self.errors = errors

def copy(self, source: str, destination: Path) -> None:
if not self.repo.endswith("/"):
self.repo += "/"

sources = source.split(";") if ";" in source else [source]
for source in sources:
file_url = urljoin(self.repo, source)

response = requests.get(file_url)
if response.status_code >= 400:
if self.errors == "raise":
response.raise_for_status()
print(HTTPWarning("GET", response.text, response.status_code).get_message())
continue

location = destination if not destination.is_dir() else destination / Path(source).name
location.write_bytes(response.content)

0 comments on commit 041ccdd

Please sign in to comment.