From 283db0b028d13301dec2777a9a0be75b830d130e Mon Sep 17 00:00:00 2001 From: stefan6419846 <96178532+stefan6419846@users.noreply.github.com> Date: Thu, 28 Mar 2024 13:43:59 +0100 Subject: [PATCH] add support for Rust crates/Cargo package index --- .github/workflows/ci.yml | 5 +- CHANGELOG.md | 4 + README.md | 2 + license_tools/__main__.py | 29 ++++ license_tools/constants.py | 15 ++ license_tools/retrieval.py | 5 +- license_tools/tools/cargo_tools.py | 122 ++++++++++++++ license_tools/tools/pip_tools.py | 23 +-- license_tools/utils/download_utils.py | 97 +++++++++++ license_tools/utils/rendering_utils.py | 41 +++++ requirements.txt | 1 + setup.py | 2 + tests/__init__.py | 6 +- tests/data.py | 28 +++- tests/test_constants.py | 14 ++ tests/test_main.py | 80 +++++++++ tests/test_retrieval.py | 70 +++++++- tests/tools/test_cargo_tools.py | 221 +++++++++++++++++++++++++ tests/utils/test_archive_utils.py | 52 +++++- tests/utils/test_download_utils.py | 179 ++++++++++++++++++++ tests/utils/test_rendering_utils.py | 65 ++++++++ 21 files changed, 1030 insertions(+), 31 deletions(-) create mode 100644 license_tools/constants.py create mode 100644 license_tools/tools/cargo_tools.py create mode 100644 license_tools/utils/download_utils.py create mode 100644 license_tools/utils/rendering_utils.py create mode 100644 tests/test_constants.py create mode 100644 tests/test_main.py create mode 100644 tests/tools/test_cargo_tools.py create mode 100644 tests/utils/test_download_utils.py create mode 100644 tests/utils/test_rendering_utils.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c3068e3..6fe2750 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,7 +40,10 @@ jobs: python -m pip install .[dev,extended_rpm] - name: test run: - python -m unittest discover --verbose --start-directory tests/ + coverage --branch -m unittest discover --verbose --start-directory tests/ + - name: coverage + run: + coverage report - name: lint run: flake8 diff --git a/CHANGELOG.md b/CHANGELOG.md index bd1123d..72fabba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Development version +* Add handling for Rust crates: + * Automatically download the packages referenced in a given `Cargo.lock` file. + * Parse the metadata of `Cargo.toml` files. + # Version 0.9.0 - 2024-03-23 * Add support for `.egg-info` files for retrieving Python metadata. diff --git a/README.md b/README.md index d9c97bd..7bf33e4 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,9 @@ I wrote this tool to simplify the initial scanning steps for third-party package * Look into font files to easily analyze their metadata. * Look into RPM file metadata. * Look into Python package metadata. +* Look into Rust crate metadata. * Recursively look into nested archives, for example by unpacking the actual upstream source code archives inside RPM (source) files. +* Download the package versions declared inside a `Cargo.lock` file. * Make everything available from the terminal. ## Installation diff --git a/license_tools/__main__.py b/license_tools/__main__.py index fc3584d..6bc87ed 100644 --- a/license_tools/__main__.py +++ b/license_tools/__main__.py @@ -102,8 +102,37 @@ def main() -> None: help="Retrieve Python package metadata.", ) + parser.add_argument( + "--cargo-lock-download", + action="store_true", + required=False, + default=False, + help="Instead of analyzing the files, download the packages for a Cargo.lock file.", + ) + parser.add_argument( + "--cargo-lock", + type=str, + required=False, + default=None, + help="Path to the Cargo.lock file to use with `--cargo-lock-download`." + ) + parser.add_argument( + "--target-directory", + type=str, + required=False, + default=None, + help="Path to write the Cargo crate files to when using the `--cargo-lock-download` option." + ) + arguments = parser.parse_args() + if arguments.cargo_lock_download: + from license_tools.tools import cargo_tools + return cargo_tools.download_from_lock_file( + lock_path=arguments.cargo_lock, + target_directory=arguments.target_directory + ) + retrieval.run( directory=arguments.directory, file_path=arguments.file, diff --git a/license_tools/constants.py b/license_tools/constants.py new file mode 100644 index 0000000..a6bf871 --- /dev/null +++ b/license_tools/constants.py @@ -0,0 +1,15 @@ +# Copyright (c) stefan6419846. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. + +from __future__ import annotations + +try: + from importlib.metadata import version as _version +except ImportError: + from importlib_metadata import version as _version + + +VERSION: str = _version("license_tools") + +del _version diff --git a/license_tools/retrieval.py b/license_tools/retrieval.py index fc2d8ae..e7d7abd 100644 --- a/license_tools/retrieval.py +++ b/license_tools/retrieval.py @@ -22,7 +22,7 @@ import scancode_config # type: ignore[import-untyped] from joblib import Parallel, delayed # type: ignore[import-untyped] -from license_tools.tools import font_tools, linking_tools, pip_tools, scancode_tools +from license_tools.tools import cargo_tools, font_tools, linking_tools, pip_tools, scancode_tools from license_tools.tools.scancode_tools import FileResults, Licenses, PackageResults from license_tools.utils import archive_utils from license_tools.utils.path_utils import TemporaryDirectoryWithFixedName @@ -182,6 +182,9 @@ def run_on_file( FileResults, _run_on_archive_file(path=path, short_path=short_path, default_to_none=False) ) + if path.name.startswith("Cargo.toml"): + print(short_path) + print(cargo_tools.check_metadata(path=path) + "\n") retrieval_kwargs = RetrievalFlags.to_kwargs(flags=retrieval_flags) diff --git a/license_tools/tools/cargo_tools.py b/license_tools/tools/cargo_tools.py new file mode 100644 index 0000000..8ecf67a --- /dev/null +++ b/license_tools/tools/cargo_tools.py @@ -0,0 +1,122 @@ +# Copyright (c) stefan6419846. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. + +""" +Tools related to Cargo/Rust. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Generator + +import tomli + +from license_tools.utils import download_utils, rendering_utils +from license_tools.utils.download_utils import Download + + +# https://doc.rust-lang.org/cargo/reference/manifest.html +_VERBOSE_NAMES = { + "name": "Name", + "version": "Version", + "authors": "Authors", + "description": "Description", + "readme": "README", + "homepage": "Homepage", + "repository": "Repository", + "license": "License", + "license-file": "License File", + "keywords": "Keywords", + "categories": "Categories", +} + + +def read_toml(path: Path) -> dict[str, Any]: + """ + Read the given TOML file. + + :param path: The file to read. + :return: The parsed file content. + """ + return tomli.loads(path.read_text()) + + +def analyze_metadata(path: Path | str) -> dict[str, str | list[str]] | None: + """ + Analyze the Rust package metadata for the given directory. + + :param path: The directory/file to analyze. Should either be a directory or `Cargo.toml` file. + :return: The package metadata. + """ + path = Path(path) + if path.name != "Cargo.toml": + if path.joinpath("Cargo.toml").exists(): + path = path / "Cargo.toml" + elif len(list(path.glob("*"))) == 1: + path = next(path.glob("*")) / "Cargo.toml" + else: + raise ValueError(f"No clear Cargo.toml in {path}.") + manifest = read_toml(path) + return manifest.get("package") + + +def check_metadata(path: Path | str) -> str: + """ + Render the relevant details for the given package. + + :param path: The package path. + :return: The rendered dictionary-like representation of the relevant fields. + """ + metadata = analyze_metadata(path) + if not metadata: + return "" + return rendering_utils.render_dictionary( + dictionary=metadata, verbose_names_mapping=_VERBOSE_NAMES, multi_value_keys={"authors", "categories", "keywords"} + ) + + +@dataclass +class PackageVersion: + name: str + version: str + checksum: str + + def to_download(self) -> Download: + return Download( + url=f"https://crates.io/api/v1/crates/{self.name}/{self.version}/download", + filename=f"{self.name}_{self.version}.crate", + sha256=self.checksum + ) + + +def get_package_versions(lock_path: Path | str) -> Generator[PackageVersion, None, None]: + """ + Get the packages from the given lock file. + + :param lock_path: The lock file to read. + :return: The packages retrieved from lock file. + """ + data = read_toml(Path(lock_path)) + for package in data["package"]: + if package.get("source") != "registry+https://github.com/rust-lang/crates.io-index": + print("Skipping", package) + continue + yield PackageVersion(name=package["name"], version=package["version"], checksum=package["checksum"]) + + +def download_from_lock_file(lock_path: Path | str, target_directory: Path | str) -> None: + """ + Download the packages from the given lock file. + + :param lock_path: The lock file to read. + :param target_directory: The directory to write the packages to. + """ + target_directory = Path(target_directory) + if not target_directory.exists(): + target_directory.mkdir() + + downloads = [package.to_download() for package in get_package_versions(lock_path)] + download_utils.download_one_file_per_second(downloads=downloads, directory=target_directory) diff --git a/license_tools/tools/pip_tools.py b/license_tools/tools/pip_tools.py index 404ed84..c6d42ea 100644 --- a/license_tools/tools/pip_tools.py +++ b/license_tools/tools/pip_tools.py @@ -9,6 +9,9 @@ from __future__ import annotations from pathlib import Path + +from license_tools.utils import rendering_utils + try: from importlib.metadata import Distribution, PathDistribution except ImportError: @@ -57,20 +60,6 @@ def check_metadata(path: Path | str) -> str: :return: The rendered dictionary-like representation of the relevant fields. """ metadata = analyze_metadata(path) - maximum_length = max(map(len, _VERBOSE_NAMES.values())) - rendered = [] - for key, value in metadata.items(): - if key not in _VERBOSE_NAMES: - continue - if key in {"licensefile", "license_classifier", "requires"} and isinstance(value, (list, set)): - if len(value) == 1: - value = value.pop() - rendered.append(f"{_VERBOSE_NAMES.get(key):>{maximum_length}}: {value}") - elif not value: - rendered.append(f"{_VERBOSE_NAMES.get(key):>{maximum_length}}:") - else: - value = "\n" + "\n".join(map(lambda x: " " * maximum_length + f" * {x}", sorted(value))) - rendered.append(f"{_VERBOSE_NAMES.get(key):>{maximum_length}}:{value}") - else: - rendered.append(f"{_VERBOSE_NAMES.get(key):>{maximum_length}}: {value}") - return "\n".join(rendered) + return rendering_utils.render_dictionary( + dictionary=metadata, verbose_names_mapping=_VERBOSE_NAMES, multi_value_keys={"licensefile", "license_classifier", "requires"} + ) diff --git a/license_tools/utils/download_utils.py b/license_tools/utils/download_utils.py new file mode 100644 index 0000000..d0e3e13 --- /dev/null +++ b/license_tools/utils/download_utils.py @@ -0,0 +1,97 @@ +# Copyright (c) stefan6419846. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. + +""" +Download handling. +""" + +from __future__ import annotations + +import hashlib +import logging +import time +from dataclasses import dataclass +from pathlib import Path + +import requests + +from license_tools.constants import VERSION + + +logger = logging.getLogger(__name__) +del logging + + +USER_AGENT = f"https://github.com/stefan6419846/license_tools version {VERSION}" + + +class ChecksumError(ValueError): + pass + + +@dataclass +class Download: + url: str + filename: str + sha256: str | None = None + + def verify_checksum(self, data: bytes) -> None: + """ + Check if the checksum of the given data matches the expected one. + """ + if self.sha256 is not None: + digest = hashlib.sha256(data).hexdigest() + expected = self.sha256 + else: + return + if digest != expected: + raise ChecksumError(f'Checksum mismatch: Got {digest}, expected {expected}!') + + +class DownloadError(ValueError): + pass + + +def get_session() -> requests.Session: + """ + Get an identifiable session. + + :return: The session which identifies us against the server. + """ + session = requests.Session() + session.headers.update({"User-Agent": USER_AGENT}) + return session + + +def download_file(download: Download, directory: Path, session: requests.Session | None = None) -> None: + """ + Download the given file. + + :param download: Download to perform. + :param directory: Directory to download to. + :param session: Session to use. + """ + if session is None: + session = get_session() + target_path = directory / download.filename + logger.info("Downloading %s to %s ...", download.url, target_path) + response = session.get(download.url) + if not response.ok: + raise DownloadError(f"Download not okay? {download.url} {response}") + download.verify_checksum(response.content) + target_path.write_bytes(response.content) + + +def download_one_file_per_second(downloads: list[Download], directory: Path) -> None: + """ + Download the given files with not more than one request per second. This conforms to + https://crates.io/data-access#api accordingly. + + :param downloads: List of downloads to perform. + :param directory: Directory to download to. + """ + session = get_session() + for download in downloads: + download_file(download=download, directory=directory, session=session) + time.sleep(1) diff --git a/license_tools/utils/rendering_utils.py b/license_tools/utils/rendering_utils.py new file mode 100644 index 0000000..f285224 --- /dev/null +++ b/license_tools/utils/rendering_utils.py @@ -0,0 +1,41 @@ +# Copyright (c) stefan6419846. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. + +""" +Rendering utilities. +""" + +from __future__ import annotations + +from typing import Any + + +def render_dictionary(dictionary: dict[str, Any], verbose_names_mapping: dict[str, str], multi_value_keys: set[str]) -> str: + """ + Render the given dictionary as string. + + :param dictionary: The dictionary to render. + :param verbose_names_mapping: The mapping dictionary to use for the keys. + Keys not available inside this dictionary will be skipped. + :param multi_value_keys: Dictionary keys which could have multiple values. + """ + maximum_length = max(map(len, verbose_names_mapping.values())) + rendered = [] + for key, value in dictionary.items(): + if key not in verbose_names_mapping: + continue + if key in multi_value_keys and isinstance(value, (list, set, tuple)): + if isinstance(value, tuple): + value = list(value) + if len(value) == 1: + value = value.pop() + rendered.append(f"{verbose_names_mapping.get(key):>{maximum_length}}: {value}") + elif not value: + rendered.append(f"{verbose_names_mapping.get(key):>{maximum_length}}:") + else: + value = "\n" + "\n".join(map(lambda x: " " * maximum_length + f" * {x}", sorted(value))) + rendered.append(f"{verbose_names_mapping.get(key):>{maximum_length}}:{value}") + else: + rendered.append(f"{verbose_names_mapping.get(key):>{maximum_length}}: {value}") + return "\n".join(rendered) diff --git a/requirements.txt b/requirements.txt index 9940ec5..e1922c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ fontTools[woff]==4.50.0 pip-licenses-lib==0.2.1 rpmfile==2.0.0 requests==2.31.0 +tomli==2.0.1 diff --git a/setup.py b/setup.py index 39c9927..b48dcb6 100644 --- a/setup.py +++ b/setup.py @@ -29,10 +29,12 @@ "fontTools[woff]", "pip-licenses-lib", "requests", + "tomli", ], extras_require={ "dev": [ "codespell", + "coverage", "flake8", "flake8-bugbear", "pep8-naming", diff --git a/tests/__init__.py b/tests/__init__.py index bccbcdf..b54486e 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -17,8 +17,6 @@ # Python 3.8 from importlib_resources import files, as_file # type: ignore[unused-ignore,import-not-found,no-redef] -import requests - CACHE_DIRECTORY = Path(mkdtemp()) @@ -36,7 +34,9 @@ def _get_or_download(download: Download) -> Path: path = CACHE_DIRECTORY / download.name if path.is_file(): return path - path.write_bytes(requests.get(url=download.url).content) + from license_tools.utils.download_utils import get_session + session = get_session() + path.write_bytes(session.get(url=download.url).content) return path diff --git a/tests/data.py b/tests/data.py index 4ee116a..fc7bb4e 100644 --- a/tests/data.py +++ b/tests/data.py @@ -61,12 +61,30 @@ suffix=".jar", ) +BASE64__0_22_0__CRATE = Download( + url="https://crates.io/api/v1/crates/base64/0.22.0/download", + name="base64_0.22.0.crate", + suffix=".crate", +) + +BASE64__0_22_0__CARGO_TOML = Download( + url="https://github.com/marshallpierce/rust-base64/raw/v0.22.0/Cargo.toml", + name="Cargo.toml", + suffix=".toml", +) + +CRYPTOGRAPHY__42_0_0__CARGO_LOCK = Download( + url="https://github.com/pyca/cryptography/raw/42.0.0/src/rust/Cargo.lock", + name="Cargo.lock", + suffix=".lock", +) + # Generated by `scancode-toolkit==32.0.8`. SETUP_PY_LICENSES = Licenses( detected_license_expression="apache-2.0 AND (unknown-license-reference AND apache-2.0)", detected_license_expression_spdx="Apache-2.0 AND (LicenseRef-scancode-unknown-license-reference AND Apache-2.0)", - percentage_of_license_text=19.55, + percentage_of_license_text=19.34, license_detections=[ LicenseDetection( license_expression="apache-2.0", @@ -145,8 +163,8 @@ matches=[ LicenseMatch( score=95.0, - start_line=51, - end_line=51, + start_line=53, + end_line=53, matched_length=6, match_coverage=100.0, matcher="2-aho", @@ -163,8 +181,8 @@ license_clues=[ LicenseClue( score=50.0, - start_line=57, - end_line=57, + start_line=59, + end_line=59, matched_length=3, match_coverage=100.0, matcher="2-aho", diff --git a/tests/test_constants.py b/tests/test_constants.py new file mode 100644 index 0000000..dc05864 --- /dev/null +++ b/tests/test_constants.py @@ -0,0 +1,14 @@ +# Copyright (c) stefan6419846. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. + +from __future__ import annotations + +from unittest import TestCase + +from license_tools import constants + + +class ConstantsTestCase(TestCase): + def test_version(self) -> None: + self.assertIsNotNone(constants.VERSION) diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..0091805 --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,80 @@ +# Copyright (c) stefan6419846. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. + +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest import TestCase + +from license_tools import retrieval +from tests.data import TYPING_EXTENSION_4_8_0__EXPECTED_OUTPUT + + +EXAMPLE_CARGO_LOCK_FILE = """ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +""" + + +class MainTestCase(TestCase): + @property + def custom_env(self) -> dict[str, str]: + custom_env = os.environ.copy() + custom_env["COLUMNS"] = "100" + return custom_env + + def test_retrieval(self) -> None: + result = subprocess.run( + [sys.executable, "-m", "license_tools", "--package", "typing_extensions==4.8.0", "--index-url", "https://pypi.org/simple"], + capture_output=True, env=self.custom_env + ) + self.assertEqual(0, result.returncode, result) + self.assertEqual(b"", result.stderr) + self.assertEqual(TYPING_EXTENSION_4_8_0__EXPECTED_OUTPUT, result.stdout.decode("UTF-8")) + + def test_cargo_lock_download(self) -> None: + with TemporaryDirectory() as target_directory, TemporaryDirectory() as source_directory: + cargo_lock = Path(source_directory) / "Cargo.lock" + cargo_lock.write_text(EXAMPLE_CARGO_LOCK_FILE) + + result = subprocess.run( + [sys.executable, "-m", "license_tools", "--cargo-lock-download", "--cargo-lock", cargo_lock, "--target-directory", target_directory], + capture_output=True, env=self.custom_env + ) + self.assertEqual(0, result.returncode, result) + self.assertEqual(b"", result.stderr) + self.assertEqual(b"", result.stdout) + + actual = [x[1] for x in retrieval.get_files_from_directory(target_directory)] + self.assertEqual( + [ + "autocfg_1.1.0.crate", + "base64_0.21.7.crate", + "bitflags_1.3.2.crate" + ], + actual + ) diff --git a/tests/test_retrieval.py b/tests/test_retrieval.py index 42ebdf6..45a1508 100644 --- a/tests/test_retrieval.py +++ b/tests/test_retrieval.py @@ -19,15 +19,19 @@ from license_tools import retrieval from license_tools.retrieval import RetrievalFlags -from license_tools.tools.scancode_tools import FileResults, Licenses +from license_tools.tools.scancode_tools import FileResults, LicenseDetection, LicenseMatch, Licenses from tests import Download, get_from_url from tests.data import ( - LIBAIO1__0_3_109_1_25__RPM, SETUP_PATH, + BASE64__0_22_0__CARGO_TOML, + LIBAIO1__0_3_109_1_25__RPM, + SETUP_PATH, SETUP_PY_LICENSES, TYPING_EXTENSION_4_8_0__EXPECTED_OUTPUT, TYPING_EXTENSION_4_8_0__LICENSES, TYPING_EXTENSION_4_8_0__SOURCE_FILES, - TYPING_EXTENSION_4_8_0__WHEEL_FILES, TYPING_EXTENSIONS__4_8_0__SDIST, TYPING_EXTENSIONS__4_8_0__WHEEL, + TYPING_EXTENSION_4_8_0__WHEEL_FILES, + TYPING_EXTENSIONS__4_8_0__SDIST, + TYPING_EXTENSIONS__4_8_0__WHEEL, ) @@ -231,6 +235,66 @@ def test_run_on_file__font_handling(self) -> None: ) self.assertEqual("setup.py\n" + font_awesome + "\n\n", stdout) + def test_cargo_toml(self) -> None: + with get_from_url(BASE64__0_22_0__CARGO_TOML) as source_path, TemporaryDirectory() as directory: + cargo_toml_path = Path(directory) / "Cargo.toml" + cargo_toml_path.write_bytes(source_path.read_bytes()) + stdout = StringIO() + with redirect_stdout(stdout): + result = retrieval.run_on_file(path=cargo_toml_path, short_path="/path/to/Cargo.toml") + + self.assertEqual( + """ +/path/to/Cargo.toml + Name: base64 + Version: 0.22.0 + Authors: + * Alice Maz + * Marshall Pierce + Description: encodes and decodes base64 as bytes or utf8 + Repository: https://github.com/marshallpierce/rust-base64 + README: README.md + Keywords: + * base64 + * decode + * encode + * no_std + * utf8 + Categories: encoding + License: MIT OR Apache-2.0 + +"""[1:], + stdout.getvalue() + ) + self.assertEqual( + FileResults( + path=cargo_toml_path, short_path='/path/to/Cargo.toml', retrieve_copyrights=False, retrieve_emails=False, retrieve_urls=False, + retrieve_licenses=True, retrieve_file_info=False, copyrights=None, emails=None, urls=None, + licenses=Licenses( + detected_license_expression='mit OR apache-2.0', detected_license_expression_spdx='MIT OR Apache-2.0', + percentage_of_license_text=2.55, + license_detections=[ + LicenseDetection( + license_expression='mit OR apache-2.0', license_expression_spdx='MIT OR Apache-2.0', + identifier='mit_or_apache_2_0-719f8427-422e-8023-c20e-9f8dd0af13b9', + matches=[ + LicenseMatch( + score=100.0, start_line=11, end_line=11, matched_length=6, match_coverage=100.0, matcher='2-aho', + license_expression='mit OR apache-2.0', spdx_license_expression='MIT OR Apache-2.0', + rule_identifier='mit_or_apache-2.0_14.RULE', rule_relevance=100, + rule_url='https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_or_apache-2.0_14.RULE', + from_file=None, matched_text=None + ) + ] + ) + ], + license_clues=[] + ), + file_info=None + ), + result + ) + class GetFilesFromDirectoryTestCase(TestCase): def test_get_files_from_directory(self) -> None: diff --git a/tests/tools/test_cargo_tools.py b/tests/tools/test_cargo_tools.py new file mode 100644 index 0000000..112cc15 --- /dev/null +++ b/tests/tools/test_cargo_tools.py @@ -0,0 +1,221 @@ +# Copyright (c) stefan6419846. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. + +from __future__ import annotations + +import re +from contextlib import redirect_stdout +from io import StringIO +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest import mock, TestCase + +from license_tools.tools import cargo_tools +from license_tools.tools.cargo_tools import PackageVersion +from license_tools.utils.download_utils import Download +from tests import get_from_url +from tests.data import BASE64__0_22_0__CARGO_TOML, CRYPTOGRAPHY__42_0_0__CARGO_LOCK + +EXPECTED_METADATA = { + "authors": ["Alice Maz ", "Marshall Pierce "], + "categories": ["encoding"], + "description": "encodes and decodes base64 as bytes or utf8", + "documentation": "https://docs.rs/base64", + "edition": "2018", + "keywords": ["base64", "utf8", "encode", "decode", "no_std"], + "license": "MIT OR Apache-2.0", + "metadata": {"docs": {"rs": {"rustdoc-args": ["--generate-link-to-definition"]}}}, + "name": "base64", + "readme": "README.md", + "repository": "https://github.com/marshallpierce/rust-base64", + "rust-version": "1.48.0", + "version": "0.22.0", +} + + +class ReadTomlTestCase(TestCase): + def test_read_toml(self) -> None: + with get_from_url(BASE64__0_22_0__CARGO_TOML) as path: + result = cargo_tools.read_toml(path) + self.assertEqual( + { + "bench": [{"harness": False, "name": "benchmarks", "required-features": ["std"]}], + "dev-dependencies": { + "clap": {"features": ["derive"], "version": "3.2.25"}, + "criterion": "0.4.0", + "once_cell": "1", + "rand": {"features": ["small_rng"], "version": "0.8.5"}, + "rstest": "0.13.0", + "rstest_reuse": "0.6.0", + "strum": {"features": ["derive"], "version": "0.25"}, + }, + "example": [{"name": "base64", "required-features": ["std"]}], + "features": {"alloc": [], "default": ["std"], "std": ["alloc"]}, + "package": EXPECTED_METADATA, + "profile": {"bench": {"debug": True}, "test": {"opt-level": 3}}, + "test": [{"name": "tests", "required-features": ["alloc"]}, {"name": "encode", "required-features": ["alloc"]}], + }, + result, + ) + + +class AnalyzeMetadataTestCase(TestCase): + def test_path_is_cargo_toml(self) -> None: + with get_from_url(BASE64__0_22_0__CARGO_TOML) as path, TemporaryDirectory() as directory: + cargo_toml = Path(directory) / "Cargo.toml" + cargo_toml.write_bytes(path.read_bytes()) + metadata = cargo_tools.analyze_metadata(cargo_toml) + self.assertEqual(EXPECTED_METADATA, metadata) + + def test_path_is_parent_of_cargo_toml(self) -> None: + with get_from_url(BASE64__0_22_0__CARGO_TOML) as path, TemporaryDirectory() as directory: + cargo_toml = Path(directory) / "Cargo.toml" + cargo_toml.write_bytes(path.read_bytes()) + metadata = cargo_tools.analyze_metadata(Path(directory)) + self.assertEqual(EXPECTED_METADATA, metadata) + + def test_path_is_grandparent_of_cargo_toml(self) -> None: + with get_from_url(BASE64__0_22_0__CARGO_TOML) as path, TemporaryDirectory() as directory: + cargo_toml = Path(directory) / "base64-0.22.1" / "Cargo.toml" + cargo_toml.parent.mkdir() + cargo_toml.write_bytes(path.read_bytes()) + metadata = cargo_tools.analyze_metadata(Path(directory)) + self.assertEqual(EXPECTED_METADATA, metadata) + + Path(directory, "another_directory").mkdir() + with self.assertRaisesRegex(expected_exception=ValueError, expected_regex=rf"^No clear Cargo\.toml in {re.escape(directory)}\.$"): + cargo_tools.analyze_metadata(Path(directory)) + + +class CheckMetadataTestCase(TestCase): + def test_check_metadata(self) -> None: + with get_from_url(BASE64__0_22_0__CARGO_TOML) as path, TemporaryDirectory() as directory: + cargo_toml = Path(directory) / "Cargo.toml" + cargo_toml.write_bytes(path.read_bytes()) + metadata = cargo_tools.check_metadata(cargo_toml) + self.assertEqual( + """ + Name: base64 + Version: 0.22.0 + Authors: + * Alice Maz + * Marshall Pierce + Description: encodes and decodes base64 as bytes or utf8 + Repository: https://github.com/marshallpierce/rust-base64 + README: README.md + Keywords: + * base64 + * decode + * encode + * no_std + * utf8 + Categories: encoding + License: MIT OR Apache-2.0 +"""[ + 1:-1 + ], + metadata, + ) + + +class PackageVersionTestCase(TestCase): + def test_to_download(self) -> None: + package_version = PackageVersion(name="autocfg", version="1.1.0", checksum="d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa") + self.assertEqual( + Download( + url="https://crates.io/api/v1/crates/autocfg/1.1.0/download", + filename="autocfg_1.1.0.crate", + sha256="d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa", + ), + package_version.to_download(), + ) + + +class GetPackageVersionsTestCase(TestCase): + def test_get_package_versions(self) -> None: + with get_from_url(CRYPTOGRAPHY__42_0_0__CARGO_LOCK) as path: + stdout = StringIO() + with redirect_stdout(stdout): + package_versions = list(cargo_tools.get_package_versions(path)) + self.assertEqual( + [ + PackageVersion(name="asn1", version="0.15.5", checksum="ae3ecbce89a22627b5e8e6e11d69715617138290289e385cde773b1fe50befdb"), + PackageVersion(name="asn1_derive", version="0.15.5", checksum="861af988fac460ac69a09f41e6217a8fb9178797b76fcc9478444be6a59be19c"), + PackageVersion(name="autocfg", version="1.1.0", checksum="d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"), + PackageVersion(name="base64", version="0.21.7", checksum="9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"), + PackageVersion(name="bitflags", version="1.3.2", checksum="bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"), + PackageVersion(name="bitflags", version="2.4.2", checksum="ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf"), + PackageVersion(name="cc", version="1.0.83", checksum="f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"), + PackageVersion(name="cfg-if", version="1.0.0", checksum="baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"), + PackageVersion(name="foreign-types", version="0.3.2", checksum="f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"), + PackageVersion(name="foreign-types-shared", version="0.1.1", checksum="00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"), + PackageVersion(name="heck", version="0.4.1", checksum="95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"), + PackageVersion(name="indoc", version="2.0.4", checksum="1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8"), + PackageVersion(name="libc", version="0.2.152", checksum="13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7"), + PackageVersion(name="lock_api", version="0.4.11", checksum="3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"), + PackageVersion(name="memoffset", version="0.9.0", checksum="5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"), + PackageVersion(name="once_cell", version="1.19.0", checksum="3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"), + PackageVersion(name="openssl", version="0.10.63", checksum="15c9d69dd87a29568d4d017cfe8ec518706046a05184e5aea92d0af890b803c8"), + PackageVersion(name="openssl-macros", version="0.1.1", checksum="a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"), + PackageVersion(name="openssl-sys", version="0.9.99", checksum="22e1bf214306098e4832460f797824c05d25aacdf896f64a985fb0fd992454ae"), + PackageVersion(name="parking_lot", version="0.12.1", checksum="3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"), + PackageVersion(name="parking_lot_core", version="0.9.9", checksum="4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e"), + PackageVersion(name="pem", version="3.0.3", checksum="1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310"), + PackageVersion(name="pkg-config", version="0.3.29", checksum="2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb"), + PackageVersion(name="proc-macro2", version="1.0.78", checksum="e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"), + PackageVersion(name="pyo3", version="0.20.2", checksum="9a89dc7a5850d0e983be1ec2a463a171d20990487c3cfcd68b5363f1ee3d6fe0"), + PackageVersion(name="pyo3-build-config", version="0.20.2", checksum="07426f0d8fe5a601f26293f300afd1a7b1ed5e78b2a705870c5f30893c5163be"), + PackageVersion(name="pyo3-ffi", version="0.20.2", checksum="dbb7dec17e17766b46bca4f1a4215a85006b4c2ecde122076c562dd058da6cf1"), + PackageVersion(name="pyo3-macros", version="0.20.2", checksum="05f738b4e40d50b5711957f142878cfa0f28e054aa0ebdfc3fd137a843f74ed3"), + PackageVersion(name="pyo3-macros-backend", version="0.20.2", checksum="0fc910d4851847827daf9d6cdd4a823fbdaab5b8818325c5e97a86da79e8881f"), + PackageVersion(name="quote", version="1.0.35", checksum="291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"), + PackageVersion(name="redox_syscall", version="0.4.1", checksum="4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"), + PackageVersion(name="scopeguard", version="1.2.0", checksum="94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"), + PackageVersion(name="self_cell", version="1.0.3", checksum="58bf37232d3bb9a2c4e641ca2a11d83b5062066f88df7fed36c28772046d65ba"), + PackageVersion(name="smallvec", version="1.13.1", checksum="e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7"), + PackageVersion(name="syn", version="2.0.48", checksum="0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"), + PackageVersion(name="target-lexicon", version="0.12.13", checksum="69758bda2e78f098e4ccb393021a0963bb3442eac05f135c30f61b7370bbafae"), + PackageVersion(name="unicode-ident", version="1.0.12", checksum="3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"), + PackageVersion(name="unindent", version="0.2.3", checksum="c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"), + PackageVersion(name="vcpkg", version="0.2.15", checksum="accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"), + PackageVersion(name="windows-targets", version="0.48.5", checksum="9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"), + PackageVersion(name="windows_aarch64_gnullvm", version="0.48.5", checksum="2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"), + PackageVersion(name="windows_aarch64_msvc", version="0.48.5", checksum="dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"), + PackageVersion(name="windows_i686_gnu", version="0.48.5", checksum="a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"), + PackageVersion(name="windows_i686_msvc", version="0.48.5", checksum="8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"), + PackageVersion(name="windows_x86_64_gnu", version="0.48.5", checksum="53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"), + PackageVersion(name="windows_x86_64_gnullvm", version="0.48.5", checksum="0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"), + PackageVersion(name="windows_x86_64_msvc", version="0.48.5", checksum="ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"), + ], + package_versions, + ) + self.assertEqual( + """ +Skipping {'name': 'cryptography-cffi', 'version': '0.1.0', 'dependencies': ['cc', 'openssl-sys', 'pyo3']} +Skipping {'name': 'cryptography-key-parsing', 'version': '0.1.0', 'dependencies': ['asn1', 'cfg-if', 'cryptography-x509', 'openssl', 'openssl-sys']} +Skipping {'name': 'cryptography-openssl', 'version': '0.1.0', 'dependencies': ['foreign-types', 'foreign-types-shared', 'openssl', 'openssl-sys']} +Skipping {'name': 'cryptography-rust', 'version': '0.1.0', 'dependencies': ['asn1', 'cc', 'cfg-if', 'cryptography-cffi', 'cryptography-key-parsing', 'cryptography-openssl', 'cryptography-x509', 'cryptography-x509-verification', 'foreign-types-shared', 'once_cell', 'openssl', 'openssl-sys', 'pem', 'pyo3', 'self_cell']} +Skipping {'name': 'cryptography-x509', 'version': '0.1.0', 'dependencies': ['asn1']} +Skipping {'name': 'cryptography-x509-verification', 'version': '0.1.0', 'dependencies': ['asn1', 'cryptography-x509', 'once_cell', 'pem']} +"""[1:], # noqa: E501 + stdout.getvalue() + ) + + +class DownloadFromLockFileTestCase(TestCase): + def test_download_from_lock_file(self) -> None: + package_versions = [ + PackageVersion(name="lock_api", version="0.4.11", checksum="3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"), + PackageVersion(name="memoffset", version="0.9.0", checksum="5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"), + PackageVersion(name="once_cell", version="1.19.0", checksum="3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"), + ] + lock_path = Path("/path/to/Cargo.lock") + with mock.patch.object(cargo_tools, "get_package_versions", return_value=package_versions), \ + mock.patch("license_tools.utils.download_utils.download_one_file_per_second") as download_mock, \ + TemporaryDirectory() as directory: + cargo_tools.download_from_lock_file(lock_path=lock_path, target_directory=directory) + download_mock.assert_called_once_with( + downloads=[package.to_download() for package in package_versions], + directory=Path(directory) + ) diff --git a/tests/utils/test_archive_utils.py b/tests/utils/test_archive_utils.py index 3354574..717c5cd 100644 --- a/tests/utils/test_archive_utils.py +++ b/tests/utils/test_archive_utils.py @@ -13,7 +13,7 @@ from license_tools.utils import archive_utils from tests import get_from_url from tests.data import ( - JSON__20231013__JAR, LIBAIO1__0_3_109_1_25__RPM, LIBAIO1__0_3_109_1_25__SRC_RPM, TYPING_EXTENSION_4_8_0__SOURCE_FILES, + BASE64__0_22_0__CRATE, JSON__20231013__JAR, LIBAIO1__0_3_109_1_25__RPM, LIBAIO1__0_3_109_1_25__SRC_RPM, TYPING_EXTENSION_4_8_0__SOURCE_FILES, TYPING_EXTENSION_4_8_0__WHEEL_FILES, TYPING_EXTENSIONS__4_8_0__SDIST, TYPING_EXTENSIONS__4_8_0__WHEEL, ) @@ -94,6 +94,56 @@ def test_tar_gz(self) -> None: actual = [x[1] for x in retrieval.get_files_from_directory(directory)] self.assertEqual(TYPING_EXTENSION_4_8_0__SOURCE_FILES, actual) + def test_rust_crate(self) -> None: + with get_from_url(BASE64__0_22_0__CRATE) as path, TemporaryDirectory() as tempdir: + directory = Path(tempdir) + self.assertTrue(archive_utils.can_extract(path)) + archive_utils.extract(archive_path=path, target_directory=directory) + actual = [x[1] for x in retrieval.get_files_from_directory(directory)] + self.assertEqual( + [ + "base64-0.22.0/.cargo_vcs_info.json", + "base64-0.22.0/.circleci/config.yml", + "base64-0.22.0/.github/ISSUE_TEMPLATE/general-purpose-issue.md", + "base64-0.22.0/.gitignore", + "base64-0.22.0/Cargo.lock", + "base64-0.22.0/Cargo.toml", + "base64-0.22.0/Cargo.toml.orig", + "base64-0.22.0/LICENSE-APACHE", + "base64-0.22.0/LICENSE-MIT", + "base64-0.22.0/README.md", + "base64-0.22.0/RELEASE-NOTES.md", + "base64-0.22.0/benches/benchmarks.rs", + "base64-0.22.0/clippy.toml", + "base64-0.22.0/examples/base64.rs", + "base64-0.22.0/icon_CLion.svg", + "base64-0.22.0/src/alphabet.rs", + "base64-0.22.0/src/chunked_encoder.rs", + "base64-0.22.0/src/decode.rs", + "base64-0.22.0/src/display.rs", + "base64-0.22.0/src/encode.rs", + "base64-0.22.0/src/engine/general_purpose/decode.rs", + "base64-0.22.0/src/engine/general_purpose/decode_suffix.rs", + "base64-0.22.0/src/engine/general_purpose/mod.rs", + "base64-0.22.0/src/engine/mod.rs", + "base64-0.22.0/src/engine/naive.rs", + "base64-0.22.0/src/engine/tests.rs", + "base64-0.22.0/src/lib.rs", + "base64-0.22.0/src/prelude.rs", + "base64-0.22.0/src/read/decoder.rs", + "base64-0.22.0/src/read/decoder_tests.rs", + "base64-0.22.0/src/read/mod.rs", + "base64-0.22.0/src/tests.rs", + "base64-0.22.0/src/write/encoder.rs", + "base64-0.22.0/src/write/encoder_string_writer.rs", + "base64-0.22.0/src/write/encoder_tests.rs", + "base64-0.22.0/src/write/mod.rs", + "base64-0.22.0/tests/encode.rs", + "base64-0.22.0/tests/tests.rs", + ], + actual + ) + def test_zip(self) -> None: with TemporaryDirectory() as source, TemporaryDirectory() as tempdir, NamedTemporaryFile() as zip_file: zip_path = Path(f"{zip_file.name}.zip") diff --git a/tests/utils/test_download_utils.py b/tests/utils/test_download_utils.py new file mode 100644 index 0000000..3bb7696 --- /dev/null +++ b/tests/utils/test_download_utils.py @@ -0,0 +1,179 @@ +# Copyright (c) stefan6419846. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. + +from __future__ import annotations + +import datetime +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import Any +from unittest import mock, TestCase + +import requests + +from license_tools import retrieval +from license_tools.utils import download_utils +from license_tools.utils.download_utils import ChecksumError, Download, DownloadError + + +class DownloadTestCase(TestCase): + def test_verify_checksum(self) -> None: + # No checksum. + Download(url="http://localhost", filename="dummy").verify_checksum(b"Hello World!\n") + + # Correct sha256 checksum. + Download( + url="http://localhost", filename="dummy", sha256="03ba204e50d126e4674c005e04d82e84c21366780af1f43bd54a37816b6ab340" + ).verify_checksum(b"Hello World!\n") + + # Incorrect sha256 checksum. + with self.assertRaisesRegex( + expected_exception=ChecksumError, + expected_regex=r"^Checksum mismatch: Got 03ba204e50d126e4674c005e04d82e84c21366780af1f43bd54a37816b6ab340, expected INVALID!$" + ): + Download(url="http://localhost", filename="dummy", sha256="INVALID").verify_checksum(b"Hello World!\n") + + +class GetSessionTestCase(TestCase): + def test_get_session(self) -> None: + session = download_utils.get_session() + self.assertIsInstance(session, requests.Session) + self.assertIn("https://github.com/stefan6419846/license_tools version ", session.headers["User-Agent"]) + + +class DownloadFile(TestCase): + def test_reuse_session(self) -> None: + session = download_utils.get_session() + with mock.patch.object(download_utils, "get_session") as session_mock: + try: + download_utils.download_file( + download=Download(url="http://localhost", filename="dummy"), + directory=Path("/dummy"), + session=session + ) + except Exception: + pass + session_mock.assert_not_called() + + with mock.patch.object(download_utils, "get_session") as session_mock: + try: + download_utils.download_file( + download=Download(url="http://localhost", filename="dummy"), + directory=Path("/dummy"), + ) + except Exception: + pass + session_mock.assert_called_once_with() + + def test_not_okay(self) -> None: + session = download_utils.get_session() + response = requests.Response() + response.status_code = 404 + with mock.patch.object(session, "get", return_value=response): + with self.assertRaisesRegex( + expected_exception=DownloadError, + expected_regex=r"^Download not okay\? http://localhost $" + ): + download_utils.download_file( + download=Download(url="http://localhost", filename="dummy"), + directory=Path("/dummy"), + session=session, + ) + + def test_hash_mismatch(self) -> None: + session = download_utils.get_session() + response = requests.Response() + response.status_code = 200 + response._content = b"Hello World!\n" + + with mock.patch.object(session, "get", return_value=response): + with self.assertRaisesRegex( + expected_exception=ChecksumError, + expected_regex=r"^Checksum mismatch: Got 03ba204e50d126e4674c005e04d82e84c21366780af1f43bd54a37816b6ab340, expected INVALID!$" + ): + download_utils.download_file( + download=Download(url="http://localhost", filename="dummy", sha256="INVALID"), + directory=Path("/dummy"), + session=session, + ) + + def test_valid(self) -> None: + session = download_utils.get_session() + response = requests.Response() + response.status_code = 200 + response._content = b"Hello World!\n" + + with mock.patch.object(session, "get", return_value=response), TemporaryDirectory() as directory: + download_utils.download_file( + download=Download(url="http://localhost", filename="test.txt", sha256="03ba204e50d126e4674c005e04d82e84c21366780af1f43bd54a37816b6ab340"), + directory=Path(directory), + session=session, + ) + self.assertEqual( + b"Hello World!\n", + Path(directory, "test.txt").read_bytes() + ) + + +class DownloadOneFilePerSecondTestCase(TestCase): + def setUp(self) -> None: + super().setUp() + + self.timestamps: list[datetime.datetime] = [] + self.downloads = [ + Download(url="http://localhost/file1", filename="file1.txt", sha256="03ba204e50d126e4674c005e04d82e84c21366780af1f43bd54a37816b6ab340"), + Download(url="http://localhost/file2", filename="file2.txt", sha256="03ba204e50d126e4674c005e04d82e84c21366780af1f43bd54a37816b6ab340"), + Download(url="http://localhost/file3", filename="file3.txt", sha256="03ba204e50d126e4674c005e04d82e84c21366780af1f43bd54a37816b6ab340"), + ] + + self.session = download_utils.get_session() + self._session_patcher = mock.patch.object(download_utils, "get_session", return_value=self.session) + self._session_patcher.start() + self.addCleanup(self._session_patcher.stop) + + def test_error(self) -> None: + # Set wrong hash. + self.downloads[1].sha256 = "INVALID" + + def get(url: str, *args: Any, **kwargs: Any) -> requests.Response: + response = requests.Response() + response.status_code = 200 + response._content = b"Hello World!\n" + return response + + with mock.patch.object(self.session, "get", side_effect=get), TemporaryDirectory() as directory: + with self.assertRaisesRegex( + expected_exception=ChecksumError, + expected_regex=r"^Checksum mismatch: Got 03ba204e50d126e4674c005e04d82e84c21366780af1f43bd54a37816b6ab340, expected INVALID!$" + ): + download_utils.download_one_file_per_second(downloads=self.downloads, directory=Path(directory)) + actual = [x[1] for x in retrieval.get_files_from_directory(directory)] + self.assertEqual(["file1.txt"], actual) + self.assertEqual( + b"Hello World!\n", + Path(directory, "file1.txt").read_bytes() + ) + + def test_delays(self) -> None: + def get(url: str, *args: Any, **kwargs: Any) -> requests.Response: + self.timestamps.append(datetime.datetime.now()) + response = requests.Response() + response.status_code = 200 + response._content = b"Hello World!\n" + return response + + with mock.patch.object(self.session, "get", side_effect=get), TemporaryDirectory() as directory: + download_utils.download_one_file_per_second(downloads=self.downloads, directory=Path(directory)) + actual = [x[1] for x in retrieval.get_files_from_directory(directory)] + self.assertEqual(["file1.txt", "file2.txt", "file3.txt"], actual) + for name in actual: + self.assertEqual( + b"Hello World!\n", + Path(directory, name).read_bytes(), + name + ) + + deltas: list[datetime.timedelta] = [y - x for x, y in zip(self.timestamps[:-1], self.timestamps[1:])] + for delta in deltas: + self.assertGreaterEqual(delta.total_seconds(), 1) diff --git a/tests/utils/test_rendering_utils.py b/tests/utils/test_rendering_utils.py new file mode 100644 index 0000000..77e0772 --- /dev/null +++ b/tests/utils/test_rendering_utils.py @@ -0,0 +1,65 @@ +# Copyright (c) stefan6419846. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. + +from __future__ import annotations + +from unittest import TestCase + +from license_tools.utils import rendering_utils + + +class RenderDictionaryTestCase(TestCase): + def test_render_dictionary(self) -> None: + dictionary = { + "key": "value", + "pi": 3.141592653589793, + "answer": 42, + "key1": [1, 2, 3], + "key2": (1, 2, 3), + "key3": {3, 2, 1}, + "key4": {"foo": "bar"}, + "ignore": "me", + "multi1": 1337, + "multi2": [42, 43, 44], + } + mapping = { + "key": "Key", + "pi": "π", + "answer": "Answer", + "key1": "Key 1", + "key2": "Key 2", + "key3": "Key 3", + "key4": "Key 4", + "multi1": "Multiple 1", + "multi2": "Multiple 2", + } + + result = rendering_utils.render_dictionary( + dictionary=dictionary, + verbose_names_mapping=mapping, + multi_value_keys={"key1", "key2", "key3", "multi1"}, + ) + self.assertEqual( + """ + Key: value + π: 3.141592653589793 + Answer: 42 + Key 1: + * 1 + * 2 + * 3 + Key 2: + * 1 + * 2 + * 3 + Key 3: + * 1 + * 2 + * 3 + Key 4: {'foo': 'bar'} +Multiple 1: 1337 +Multiple 2: [42, 43, 44] +"""[1:-1], + result + )