diff --git a/pyproject.toml b/pyproject.toml index 22d7ce2..79811ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ namespaces = true # ----------------------------------------- Project Metadata ------------------------------------- # [project] -version = "0.0.0.dev3" +version = "0.0.0.dev4" name = "LicenseMan" requires-python = ">=3.10" dependencies = [ diff --git a/src/licenseman/spdx/__init__.py b/src/licenseman/spdx/__init__.py index db89d79..b39edf7 100644 --- a/src/licenseman/spdx/__init__.py +++ b/src/licenseman/spdx/__init__.py @@ -1,160 +1,304 @@ +from __future__ import annotations as _annotations +from typing import TYPE_CHECKING as _TYPE_CHECKING from pathlib import Path as _Path import json as _json + import platformdirs as _platdir import pylinks as _pl from pylinks.exception.api import WebAPIError as _WebAPIError +from licenseman import data as _data +from licenseman.spdx.exception import SPDXLicenseException from licenseman.spdx.license_db import SPDXLicenseDB +from licenseman.spdx.exception_db import SPDXLicenseExceptionDB from licenseman.spdx.license_list import SPDXLicenseList +from licenseman.spdx.exception_list import SPDXExceptionList from licenseman.spdx.license import SPDXLicense +from licenseman.spdx import expression from licenseman import logger +if _TYPE_CHECKING: + from typing import Literal + URL_TEMPLATE_LICENSE_XML = "https://raw.githubusercontent.com/spdx/license-list-data/refs/heads/main/license-list-XML/{}.xml" URL_TEMPLATE_LICENSE_JSON = "https://raw.githubusercontent.com/spdx/license-list-data/refs/heads/main/json/details/{}.json" + +URL_TEMPLATE_EXCEPTION_XML = "https://raw.githubusercontent.com/spdx/license-list-data/refs/heads/main/license-list-XML/exceptions/{}.xml" +URL_TEMPLATE_EXCEPTION_JSON = "https://raw.githubusercontent.com/spdx/license-list-data/refs/heads/main/json/exceptions/{}.json" + URL_LICENSE_LIST = "https://spdx.org/licenses/licenses.json" +URL_EXCEPTION_LIST = "https://spdx.org/licenses/exceptions.json" def license_db( - path: str | _Path | None = _platdir.site_cache_path( + path: str | _Path = _platdir.site_cache_path( appauthor="RepoDynamics", appname="LicenseMan", - ) / "SPDX_DB", + ) / "SPDX_DB" / "licenses", force_update: bool = False, verify: bool = True, in_memory: bool = False, ) -> SPDXLicenseDB: - db_path = _Path(path) - db_license_path = db_path / "licenses" - license_list_ = _get_global_license_list() - license_ids = license_list_.ids - if force_update or not db_path.is_dir(): - missing_ids = license_ids - intro = "Force update is enabled" if force_update else f"SPDX license database not found at {db_path}" + return _db( + typ="license", + path=_Path(path), + force_update=force_update, + verify=verify, + in_memory=in_memory, + ) + + +def exception_db( + path: str | _Path = _platdir.site_cache_path( + appauthor="RepoDynamics", + appname="LicenseMan", + ) / "SPDX_DB" / "exceptions", + force_update: bool = False, + verify: bool = True, + in_memory: bool = False, +) -> SPDXLicenseExceptionDB: + return _db( + typ="exception", + path=_Path(path), + force_update=force_update, + verify=verify, + in_memory=in_memory, + ) + + +def license_list() -> SPDXLicenseList: + """Get the latest version of the [SPDX license list](https://spdx.org/licenses/) from SPDX website.""" + data = _pl.http.request(URL_LICENSE_LIST, response_type="json") + return SPDXLicenseList(data) + + +def exception_list() -> SPDXExceptionList: + """Get the latest version of the [SPDX exception list](https://spdx.org/licenses/exceptions-index.html) from SPDX website.""" + data = _pl.http.request(URL_EXCEPTION_LIST, response_type="json") + return SPDXExceptionList(data) + + +def license(license_id: str, verify: bool = True) -> SPDXLicense: + """Get an SPDX license. + + Parameters + ---------- + license_id + SPDX license ID, e.g., 'MIT', 'GPL-2.0-or-later'. + """ + return _license(license_id, "license", verify) + + +def exception(exception_id: str, verify: bool = True) -> SPDXLicenseException: + """Get an SPDX license exception. + + Parameters + ---------- + exception_id + SPDX license exception ID, e.g., 'Autoconf-exception-2.0'. + """ + return _license(exception_id, "exception", verify) + + +def license_xml(license_id: str) -> str: + """Get an SPDX license definition in XML format from SPDX + [license-list-data](https://github.com/spdx/license-list-data) repository. + + Parameters + ---------- + license_id + SPDX license ID, e.g., 'MIT', 'GPL-2.0-or-later'. + """ + return _download(license_id, format="xml") + + +def license_json(license_id: str) -> dict: + """Get an SPDX license definition in XML format from SPDX + [license-list-data](https://github.com/spdx/license-list-data) repository. + + Parameters + ---------- + license_id + SPDX license ID, e.g., 'MIT', 'GPL-2.0-or-later'. + """ + return _download(license_id, format="json") + + +def exception_xml(exception_id: str) -> str: + """Get an SPDX license exception definition in XML format from SPDX + [license-list-data](https://github.com/spdx/license-list-data) repository. + + Parameters + ---------- + exception_id + SPDX license exception ID, e.g., 'Autoconf-exception-2.0'. + """ + return _download(exception_id, format="xml", exception=True) + + +def exception_json(exception_id: str) -> dict: + """Get an SPDX license exception definition in XML format from SPDX + [license-list-data](https://github.com/spdx/license-list-data) repository. + + Parameters + ---------- + exception_id + SPDX license exception ID, e.g., 'Autoconf-exception-2.0'. + """ + return _download(exception_id, format="json", exception=True) + + +def trove_classifier(license_id: str) -> str | None: + """Get the Trove classifier for an SPDX license. + + Parameters + ---------- + license_id + SPDX license ID, e.g., 'MIT', 'GPL-2.0-or-later'. + """ + return _get_global_trove_mapping().get(license_id) + + +def _db( + typ: Literal["license", "exception"], + path: _Path, + force_update: bool = False, + verify: bool = True, + in_memory: bool = False, +) -> SPDXLicenseDB | SPDXLicenseExceptionDB: + if typ == "license": + name = "license" + class_ = SPDXLicenseDB + func = license + list_ = _get_global_license_list() + else: + name = "license exception" + class_ = SPDXLicenseExceptionDB + func = exception + list_ = _get_global_exception_list() + + name_title = name.title() + ids = list_.ids + if force_update or not path.is_dir(): + missing_ids = ids + intro = "Force update is enabled" if force_update else f"SPDX {name} database not found at {path}" logger.log( "info" if force_update else "notice", - "SPDX License Database Load", - f"{intro}; downloading all latest SPDX license data." + f"SPDX {name_title} Database Load", + f"{intro}; downloading all latest SPDX {name} data." ) else: missing_ids = [] - for license_id in license_ids: - if not (db_license_path / f"{license_id}.json").is_file(): + for license_id in ids: + if not (path / f"{license_id}.json").is_file(): missing_ids.append(license_id) if not missing_ids: logger.success( - "SPDX License Database Load", - f"Loaded database from {db_path}; all {len(license_ids)} license files found." + f"SPDX {name_title} Database Load", + f"Loaded database from {path}; all {len(ids)} {name}s files found." ) - return SPDXLicenseDB( - license_list=license_list_, - db_path=db_path, - in_memory=in_memory, - verify=verify, + return class_( + list_, + path, + in_memory, + verify, ) num_missing = len(missing_ids) - num_available = len(license_ids) - num_missing + num_available = len(ids) - num_missing logger.log( "notice", - "SPDX License Database Load", - f"Loaded database from {db_path}; " + f"SPDX {name_title} Database Load", + f"Loaded database from {path}; " f"found {num_missing} missing license files (available: {num_available})." ) - db_license_path.mkdir(parents=True, exist_ok=True) + path.mkdir(parents=True, exist_ok=True) licenses = {} for missing_id in missing_ids: - output_path = db_license_path / f"{missing_id}.json" - license_data = license(missing_id, verify=False if in_memory else verify) + output_path = path / f"{missing_id}.json" + data = func(missing_id, verify=False if in_memory else verify) with open(output_path, "w") as f: - _json.dump(license_data.raw_data, f) + _json.dump(data.raw_data, f) logger.success( - "SPDX License Database Update", + f"SPDX {name_title} Database Update", f"Downloaded '{missing_id}' to 'file://{output_path}'.", ) if in_memory: - licenses[missing_id] = license_data - return SPDXLicenseDB( - license_list=license_list_, - db_path=db_path, - in_memory=in_memory, - verify=verify, - licenses=licenses, + licenses[missing_id] = data + return class_( + list_, + path, + in_memory, + verify, + licenses, ) -def license_list() -> SPDXLicenseList: - """Get the latest version of the [SPDX license list](https://spdx.org/licenses/) from SPDX website.""" - data = _pl.http.request(URL_LICENSE_LIST, response_type="json") - return SPDXLicenseList(data) - - -def license(license_id: str, verify: bool = True) -> SPDXLicense: - """Get an SPDX license. +def _license(spdx_id: str, typ: Literal["license", "exception"], verify: bool = True) -> SPDXLicense | SPDXLicenseException: + """Get an SPDX license or exception. Parameters ---------- - license_id - SPDX license ID, e.g., 'MIT', 'GPL-2.0-or-later'. + SPDX_id + SPDX ID, e.g., 'MIT', 'GPL-2.0-or-later'. """ - data = license_json(license_id) - data["xml"] = license_xml(license_id) - license_list_ = _get_global_license_list() - for list_entry_key, list_entry_val in license_list_[license_id].items(): + if typ == "license": + name = "license" + func_json = license_json + func_xml = license_xml + class_ = SPDXLicense + list_ = _get_global_license_list() + else: + name = "license exception" + func_json = exception_json + func_xml = exception_xml + class_ = SPDXLicenseException + list_ = _get_global_exception_list() + + data = func_json(spdx_id) + data["xml"] = func_xml(spdx_id) + name_title = name.title() + + for list_entry_key, list_entry_val in list_[spdx_id].items(): # 'detailsUrl', 'reference', 'referenceNumber' are not present in JSON data if list_entry_key not in data: data[list_entry_key] = list_entry_val logger.info( - "SPDX JSON License Load", - f"Added missing '{list_entry_key}' entry to '{license_id}' JSON data from license list." + f"SPDX JSON {name_title} Load", + f"Added missing '{list_entry_key}' entry to '{spdx_id}' JSON data from {name} list." ) elif data[list_entry_key] != list_entry_val: logger.warning( - "SPDX JSON License Load", - f"Mismatched '{list_entry_key}' entry in '{license_id}' JSON data.", + f"SPDX JSON {name_title} Load", + f"Mismatched '{list_entry_key}' entry in '{spdx_id}' JSON data.", "JSON content:", logger.pretty(data[list_entry_key]), - "License list content:", + f"{name.capitalize()} list content:", logger.pretty(list_entry_val), ) - return SPDXLicense(data, verify=verify) + return class_(data, verify=verify) -def license_xml(license_id: str) -> str: - """Get an SPDX license definition in XML format from SPDX - [license-list-data](https://github.com/spdx/license-list-data) repository. - - Parameters - ---------- - license_id - SPDX license ID, e.g., 'MIT', 'GPL-2.0-or-later'. - """ - try: - xml_str = _pl.http.request( - URL_TEMPLATE_LICENSE_XML.format(license_id), - response_type="str" - ) - except _WebAPIError as e: - raise Exception(f"Error downloading license XML for ID '{license_id}") from e - return xml_str - - -def license_json(license_id: str) -> dict: - """Get an SPDX license definition in XML format from SPDX - [license-list-data](https://github.com/spdx/license-list-data) repository. - - Parameters - ---------- - license_id - SPDX license ID, e.g., 'MIT', 'GPL-2.0-or-later'. - """ +def _download( + spdx_id: str, + format: Literal["xml", "json"], + exception: bool = False, +) -> str | dict: + if exception: + url = URL_TEMPLATE_EXCEPTION_XML if format == "xml" else URL_TEMPLATE_EXCEPTION_JSON + else: + url = URL_TEMPLATE_LICENSE_XML if format == "xml" else URL_TEMPLATE_LICENSE_JSON try: - json_data = _pl.http.request( - URL_TEMPLATE_LICENSE_JSON.format(license_id), - response_type="json" + data = _pl.http.request( + url.format(spdx_id), + response_type="str" if format == "xml" else "json" ) except _WebAPIError as e: - raise Exception(f"Error downloading license JSON for ID '{license_id}") from e - return json_data + msg_typ = "license" if not exception else "license exception" + msg_format = "XML" if format == "xml" else "JSON" + msg = f"Error downloading {msg_typ} {msg_format} for ID '{spdx_id}" + raise Exception(msg) from e + return data def _get_global_license_list() -> SPDXLicenseList: @@ -164,4 +308,20 @@ def _get_global_license_list() -> SPDXLicenseList: return _LICENSE_LIST -_LICENSE_LIST: SPDXLicenseList | None = None \ No newline at end of file +def _get_global_exception_list() -> SPDXExceptionList: + global _EXCEPTION_LIST + if _EXCEPTION_LIST is None: + _EXCEPTION_LIST = exception_list() + return _EXCEPTION_LIST + + +def _get_global_trove_mapping() -> dict[str, str]: + global _TROVE_MAPPING + if _TROVE_MAPPING is None: + _TROVE_MAPPING = _data.spdx_to_trove_mapping()["map"] + return _TROVE_MAPPING + + +_LICENSE_LIST: SPDXLicenseList | None = None +_EXCEPTION_LIST: SPDXExceptionList | None = None +_TROVE_MAPPING: dict[str, str] | None = None \ No newline at end of file diff --git a/src/licenseman/spdx/exception.py b/src/licenseman/spdx/exception.py new file mode 100644 index 0000000..552ac03 --- /dev/null +++ b/src/licenseman/spdx/exception.py @@ -0,0 +1,344 @@ +from __future__ import annotations as _annotations + +from typing import TYPE_CHECKING as _TYPE_CHECKING +import datetime as _dt +from xml.etree import ElementTree as _ElementTree +from dataclasses import dataclass as _dataclass + +from licenseman import logger as _logger +from licenseman.spdx.license_text import SPDXLicenseTextPlain + +if _TYPE_CHECKING: + from typing import Literal, Any + + +class SPDXLicenseException: + """SPDX License definition. + + Parameters + ---------- + xml + SPDX license XML content as a string. + + References + ---------- + - [SPDX Docs](https://github.com/spdx/license-list-XML/blob/main/DOCS/README.md) + - [SPDX Docs - XML Fields](https://github.com/spdx/license-list-XML/blob/main/DOCS/xml-fields.md) + - [XML Schema](https://github.com/spdx/license-list-XML/blob/main/schema/ListedLicense.xsd) + - [GitHub Repository](https://github.com/spdx/license-list-XML) + """ + + def __init__(self,data: dict, verify: bool = True): + try: + root = _ElementTree.fromstring(data["xml"]) + except _ElementTree.ParseError as e: + raise Exception(f"Error parsing license XML content.") from e + self._ns: dict = {'': 'http://www.spdx.org/license'} + self._xml: _ElementTree.Element = root.find('exception', self._ns) + self._data: dict = data + if verify: + self.verify() + return + + def verify(self): + + def log(key_json: str, missing_in: Literal["xml", "json"], data: Any, key_xml: str | None = None): + if key_xml is None: + key_xml = key_json + if missing_in == "xml": + missing_source = "XML" + existing_source = "JSON" + missing_key = key_xml + existing_key = key_json + else: + missing_source = "JSON" + existing_source = "XML" + missing_key = key_json + existing_key = key_xml + _logger.notice( + log_title, + f"The value of '{missing_key}' is not defined in the {missing_source} data. " + f"Using the {existing_source} data value of '{existing_key}':", + _logger.pretty(data) + ) + return + + def deprecated_version(): + key = "deprecatedVersion" + xml = self._xml.attrib.get(key) + json = self._data.get(key) + if json != xml: + if xml is None: + log(key_json=key, missing_in="xml", data=json) + elif json is None: + log(key_json=key, missing_in="json", data=xml) + self._data[key] = xml + else: + raise Exception( + "Deprecated version mismatch between XML and JSON data. " + f"XML: {xml}, JSON: {json}" + ) + return + + def cross_refs(): + xml_elem = self._xml.find('crossRefs', self._ns) + xml = sorted( + [ref.text.strip() for ref in xml_elem.findall('crossRef', self._ns)] + ) if xml_elem else [] + json_seealso = sorted(self._data.get("seeAlso", [])) + if json_seealso != xml: + if not xml: + log("seeAlso", "xml", data=json_seealso) + return + if not json_seealso: + log("seeAlso", "json", data=xml) + self._data["seeAlso"] = xml + return + raise Exception( + "Cross references mismatch between XML and JSON data. " + f"XML: {xml}, JSON: {json_seealso}" + ) + + log_title = f"{self.id} License Exception Verification" + if self._data["licenseExceptionId"] != self._xml.attrib.get('licenseId'): + raise Exception("License exception ID mismatch between XML and JSON data.") + if self._data["name"] != self._xml.attrib.get('name'): + raise Exception("License exception name mismatch between XML and JSON data.") + deprecated_version() + cross_refs() + return + + def generate_text( + self, + title: str | bool = True, + copyright: str | bool = False, + optionals: bool | list[bool] = True, + alts: dict[str, str] | None = None, + line_length: int = 88, + list_indent: int = 3, + list_item_indent: int = 2, + list_item_vertical_spacing: int = 2, + list_bullet_prefer_default: bool = True, + list_bullet_ordered: bool = True, + list_bullet_unordered_char: str = "–", + title_centered: bool = True, + title_separator_full_line: bool = True, + title_separator: Literal["-", "=", "_", "*"] = "=", + subtitle_separator: Literal["-", "=", "_", "*"] = "-", + ) -> tuple[str, str | None]: + """Generate plain-text license. + + Parameters + ---------- + title + Determines how to treat the license title, if any. + Since the title is [optional](https://spdx.github.io/spdx-spec/v3.0.1/annexes/license-matching-guidelines-and-templates/#license-name-or-title) + and not used in matching, it can be omitted or replaced with a custom title. + If True, the title is included as-is. If False, the title is omitted. + If a string, the title is replaced with the custom string, if a title is present. + copyright + Determines how to treat the copyright notice, if any. + Since the copyright notice is [optional](https://spdx.github.io/spdx-spec/v3.0.1/annexes/license-matching-guidelines-and-templates/#copyright-notice) + and not used in matching, it can be omitted or replaced with a custom notice. + If True, the notice is included as-is. If False, the notice is omitted. + If a string, the notice is replaced with the custom string, if a notice is present. + optionals : bool, optional + Whether to include elements in the output, by default True. + alts : dict[str, int] | None, optional + A dictionary specifying choices for elements. Keys are 'name' attributes, + and values are the value to use. + line_length + The maximum line length for the plain-text output. + list_indent + The number of spaces separating list items from the left margin. + list_item_indent + The number of spaces separating list items from the bullet character. + list_item_vertical_spacing + The number of newlines separating list items. + list_bullet_prefer_default + Whether to use the license's default bullet character or number for list items, if available. + list_bullet_ordered + Whether to use numbered (True) or bulleted (False) list items, + if no default bullet is available or `list_bullet_prefer_default` is False. + list_bullet_unordered_char + The character to use for unordered list items if `list_bullet_ordered` is False. + + Returns + ------- + The plain-text version of the license + plus the license header text, if present. + """ + return SPDXLicenseTextPlain(text=self.text_xml).generate( + title=title, + copyright=copyright, + optionals=optionals, + alts=alts, + line_length=line_length, + list_indent=list_indent, + list_item_indent=list_item_indent, + list_item_vertical_spacing=list_item_vertical_spacing, + list_bullet_prefer_default=list_bullet_prefer_default, + list_bullet_ordered=list_bullet_ordered, + list_bullet_unordered_char=list_bullet_unordered_char, + title_centered=title_centered, + title_separator_full_line=title_separator_full_line, + title_separator=title_separator, + subtitle_separator=subtitle_separator, + ) + + @property + def raw_data(self) -> dict: + """Raw license data.""" + return self._data + + @property + def id(self) -> str: + """SPDX license ID.""" + return self._data["licenseExceptionId"] + + @property + def name(self) -> str: + """Full name of the license""" + return self._data["name"] + + @property + def text_plain(self) -> str: + """Original license text in plain text format.""" + return self._data["licenseExceptionText"] + + @property + def text_html(self) -> str | None: + """Original license text in HTML format.""" + return self._data.get("exceptionTextHtml") + + @property + def text_template(self) -> str | None: + """License text template.""" + return self._data.get("licenseExceptionTemplate") + + @property + def text_xml(self) -> _ElementTree.Element: + return self._xml.find('text', self._ns) + + @property + def title_text_xml(self) -> _ElementTree.Element | None: + """Title of the license as defined in the text, if any.""" + return self._xml.find('.//titleText', self._ns) + + @property + def copyright_text_xml(self) -> _ElementTree.Element | None: + """Copyright notice of the license is defined in the text, if any.""" + return self._xml.find('.//copyrightText', self._ns) + + @property + def optionals_xml(self) -> list[_ElementTree.Element]: + """Optional fields in the license text, if any.""" + return self._xml.findall('.//optional', self._ns) + + @property + def alts(self) -> dict[str, dict[str, str]]: + """ + + Returns + ------- + A dictionary where keys are the alternative field names, and values are dictionaries with keys: + `text` : str + + Default value. + `match` : str + + Regular expression (RegEx) pattern to validate user input for `text`. + """ + alts = {} + for alt in self._xml.findall('.//alt', self._ns): + alts[alt.attrib['name']] = {'text': alt.text, 'match': alt.attrib['match']} + return alts + + @property + def reference_number(self) -> int: + """Reference number of the license.""" + return self._data["referenceNumber"] + + @property + def url_reference(self) -> str: + """URL to the license reference page at SPDX.org.""" + return self._data["reference"] + + @property + def url_json(self) -> str: + """URL to the license JSON data.""" + return self._data["detailsUrl"] + + @property + def url_others(self) -> list[str]: + """URLs to license resources, if any.""" + return self._data.get("seeAlso", []) + + @property + def deprecated(self) -> bool: + """Whether the license is deprecated. + + Returns + ------- + A boolean, or `None` if the value is not defined in the data. + """ + return self._data["isDeprecatedLicenseId"] + + @property + def version_deprecated(self) -> str | None: + """Version of the SPDX License List in which the license was deprecated, if applicable. + + Returns + ------- + Version number string, or `None` if the value is not defined in the data. + """ + return self._data.get("deprecatedVersion") + + @property + def obsoleted_by(self) -> list[dict[str, str]] | None: + """New licenses that obsolete this license, if any. + + Returns + ------- + A list of dictionaries with keys: + `id` : str + + SPDX license ID of the successor license. + `expression` : str + + [SPDX license expression](https://spdx.github.io/spdx-spec/v3.0.1/annexes/spdx-license-expressions/) + which is obsoleted by the successor license; + in most cases, this is the same as the current license's ID, unless the current license + is a complex expression, and only a part of it is obsoleted by the successor. + """ + return [ + {"id": elem.text, "expression": elem.attrib.get("expression")} + for elem in self._xml.findall('.//obsoletedBy', self._ns) + ] + + @property + def version_added(self) -> str | None: + """Version of the SPDX License List in which the license was first added. + + Returns + ------- + Version number string, or `None` if the value is not defined in the data. + """ + return self._xml.attrib.get('listVersionAdded') + + @property + def comments(self) -> str | None: + """Comments about the license, if any.""" + return self._data.get("licenseComments") + + @property + def notes(self) -> str | None: + """General comments about the license, if any.""" + elem = self._xml.find('notes', self._ns) + return elem.text if elem is not None else None + + def __repr__(self): + return f"" + + def __str__(self): + return self.text_plain \ No newline at end of file diff --git a/src/licenseman/spdx/exception_db.py b/src/licenseman/spdx/exception_db.py new file mode 100644 index 0000000..1bd8e43 --- /dev/null +++ b/src/licenseman/spdx/exception_db.py @@ -0,0 +1,67 @@ +from __future__ import annotations as annotations + +from typing import TYPE_CHECKING as TYPE_CHECKING +import json as _json + +from licenseman.spdx.exception import SPDXLicenseException as _SPDXLicenseException + +if TYPE_CHECKING: + from pathlib import Path + from typing import Sequence, Generator + from licenseman.spdx.exception_list import SPDXExceptionList + + +class SPDXLicenseExceptionDB: + + def __init__( + self, + exception_list: SPDXExceptionList, + db_path: Path, + in_memory: bool = False, + verify: bool = True, + exceptions: dict[str, _SPDXLicenseException] | None = None, + ): + self._exception_list = exception_list + self._db_path = db_path + self._in_memory = in_memory + self._verify = verify + self._exceptions: dict[str, _SPDXLicenseException] = exceptions or {} + if in_memory: + self.load(verify=verify) + elif verify: + for _ in self.get(verify=True): + pass + return + + def load(self, exception_ids: Sequence[str] | None = None, verify: bool | None = None): + exception_ids = exception_ids or self._exception_list.ids + for exception_id, exception_data in zip(exception_ids, self.get(exception_ids, verify=verify)): + self._exceptions[exception_id] = exception_data + return + + def get(self, exception_ids: Sequence[str] | None = None, verify: bool | None = None) -> Generator[_SPDXLicenseException]: + for exception_id in exception_ids or self._exception_list.ids: + if exception_id in self._exceptions: + yield self._exceptions[exception_id] + else: + with open(self._db_path / f"{exception_id}.json") as f: + data = _json.load(f) + yield _SPDXLicenseException( + data, + verify=verify if verify is not None else self._verify, + ) + + def alts(self, exception_ids: Sequence[str] | None = None): + exception_ids = exception_ids or self._exception_list.ids + alts: dict[str, list[dict[str, str]]] = {} + for exception in self.get(exception_ids): + for alt_name, alt_data in exception.alts.items(): + alts.setdefault(alt_name, []).append({"id": exception.id, **alt_data}) + return alts + + def __getitem__(self, exception_id: str) -> _SPDXLicenseException: + return self.get([exception_id]).__next__() + + def __contains__(self, exception_id: str) -> bool: + return exception_id in self._exception_list + diff --git a/src/licenseman/spdx/exception_list.py b/src/licenseman/spdx/exception_list.py new file mode 100644 index 0000000..fd7afca --- /dev/null +++ b/src/licenseman/spdx/exception_list.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +import datetime as _dt + + +class SPDXExceptionList: + """SPDX exception list.""" + + def __init__(self, data: dict): + self._data = data + self._map = {exception["licenseExceptionId"]: exception for exception in data["exceptions"]} + return + + @property + def exceptions(self) -> list[dict]: + """List of SPDX exceptions.""" + return self._data["exceptions"] + + @property + def ids(self) -> list[str]: + """List of SPDX exception IDs.""" + return list(self._map.keys()) + + @property + def release_date(self) -> _dt.date: + """Release date of the SPDX license list.""" + return _dt.datetime.fromisoformat(self._data["releaseDate"]).date() + + @property + def version(self) -> str: + """Version of the SPDX license list.""" + return self._data["licenseListVersion"] + + def get(self, key: str) -> dict | None: + """Get a license by its key.""" + + return self._map.get(key) + + def __getitem__(self, key: str) -> dict: + return self._map[key] + + def __contains__(self, key: str) -> bool: + return key in self._map + + def __repr__(self): + return f"" diff --git a/src/licenseman/spdx/expression.py b/src/licenseman/spdx/expression.py new file mode 100644 index 0000000..c949252 --- /dev/null +++ b/src/licenseman/spdx/expression.py @@ -0,0 +1,71 @@ +"""Process SPDX [license expressions](https://spdx.github.io/spdx-spec/v3.0.1/annexes/spdx-license-expressions/). + +See Also +-------- +- https://pypi.org/project/license-expression/ +""" + +import re as _re + +from licenseman import spdx as _spdx + + +def ids(expression: str) -> tuple[list[str], list[str]]: + """Get all SPDX license and exception IDs from an expression. + + Parameters + ---------- + expression + SPDX license expression. + + Returns + ------- + List of registered and custom SPDX license and exception IDs in the expression. + """ + license_ids, license_ids_custom = license_ids(expression) + exception_ids, exception_ids_custom = exception_ids(expression) + return license_ids + exception_ids, license_ids_custom + exception_ids_custom + + +def license_ids(expression: str) -> tuple[list[str], list[str]]: + """Get all SPDX license IDs from an expression. + + Parameters + ---------- + expression + SPDX license expression. + + Returns + ------- + List of registered and custom SPDX license IDs in the expression. + """ + return _get_ids(expression, exception=False) + + +def exception_ids(expression: str) -> tuple[list[str], list[str]]: + """Get all SPDX license exception IDs from an expression. + + Parameters + ---------- + expression + SPDX license expression. + + Returns + ------- + List of registered and custom SPDX license exception IDs in the expression. + """ + return _get_ids(expression, exception=True) + + +def _get_ids(expression: str, exception: bool): + list_ = _spdx._get_global_exception_list() if exception else _spdx._get_global_license_list() + ids = list_.ids + registered = _re.findall( + rf"({'|'.join(_re.escape(exception_id) for exception_id in ids)})", + expression + ) + customs = _re.findall( + rf"(?:DocumentRef-[a-zA-Z0-9-.]+:)?{'AdditionRef' if exception else 'LicenseRef'}-[a-zA-Z0-9.-]+", + expression + ) + return registered, customs diff --git a/src/licenseman/spdx/license.py b/src/licenseman/spdx/license.py index a5af157..2871720 100644 --- a/src/licenseman/spdx/license.py +++ b/src/licenseman/spdx/license.py @@ -102,16 +102,15 @@ def osi_approved(): return def deprecated_version(): - key_xml = "licenseVersionDeprecated" - key_json = "deprecatedVersion" - xml = self._xml.attrib.get(key_xml) - json = self._data.get(key_json) + key = "deprecatedVersion" + xml = self._xml.attrib.get(key) + json = self._data.get(key) if json != xml: if xml is None: - log(key_json=key_json, key_xml=key_xml, missing_in="xml", data=json) + log(key_json=key, missing_in="xml", data=json) elif json is None: - log(key_json=key_json, key_xml=key_xml, missing_in="json", data=xml) - self._data[key_json] = xml + log(key_json=key, missing_in="json", data=xml) + self._data[key] = xml else: raise Exception( "Deprecated version mismatch between XML and JSON data. " @@ -283,7 +282,7 @@ def title_text_xml(self) -> _ElementTree.Element | None: return self._xml.find('.//titleText', self._ns) @property - def copyright_notice_xml(self) -> _ElementTree.Element | None: + def copyright_text_xml(self) -> _ElementTree.Element | None: """Copyright notice of the license is defined in the text, if any.""" return self._xml.find('.//copyrightText', self._ns) @@ -312,20 +311,28 @@ def alts(self) -> dict[str, dict[str, str]]: return alts @property - def ref_num(self) -> int: + def reference_number(self) -> int: """Reference number of the license.""" return self._data["referenceNumber"] @property - def ref_url(self) -> str: + def url_reference(self) -> str: """URL to the license reference page at SPDX.org.""" return self._data["reference"] @property - def json_url(self) -> str: + def url_json(self) -> str: """URL to the license JSON data.""" return self._data["detailsUrl"] + @property + def url_others(self) -> list[str]: + """URLs to other resources related to the license. + + This is a list of URLs identical to `cross_refs`. + """ + return self._data.get("seeAlso", []) + @property def cross_refs(self) -> list[SPDXLicenseCrossRef]: """URLs to license resources, if any.""" @@ -411,7 +418,7 @@ def version_added(self) -> str | None: ------- Version number string, or `None` if the value is not defined in the data. """ - return self._xml.attrib.get('licenseVersion') + return self._xml.attrib.get('listVersionAdded') @property def comments(self) -> str | None: diff --git a/src/licenseman/spdx/license_db.py b/src/licenseman/spdx/license_db.py index 85eafb0..411ad64 100644 --- a/src/licenseman/spdx/license_db.py +++ b/src/licenseman/spdx/license_db.py @@ -44,7 +44,7 @@ def get(self, license_ids: Sequence[str] | None = None, verify: bool | None = No if license_id in self._licenses: yield self._licenses[license_id] else: - with open(self._db_path / f"licenses/{license_id}.json") as f: + with open(self._db_path / f"{license_id}.json") as f: data = _json.load(f) yield _SPDXLicense( data, diff --git a/src/licenseman/spdx/license_list.py b/src/licenseman/spdx/license_list.py index 96b90b9..9472890 100644 --- a/src/licenseman/spdx/license_list.py +++ b/src/licenseman/spdx/license_list.py @@ -41,3 +41,6 @@ def __getitem__(self, key: str) -> dict: def __contains__(self, key: str) -> bool: return key in self._map + + def __repr__(self): + return f"" diff --git a/src/licenseman/spdx/license_text.py b/src/licenseman/spdx/license_text.py index 2b1e359..3d9bb60 100644 --- a/src/licenseman/spdx/license_text.py +++ b/src/licenseman/spdx/license_text.py @@ -27,6 +27,7 @@ class SPDXLicenseText: ---------- - official matcher: https://github.com/spdx/spdx-license-matcher - third-party matcher: https://github.com/MikeMoore63/spdx_matcher + - Matching Guidelines: https://spdx.github.io/spdx-spec/v3.0.1/annexes/license-matching-guidelines-and-templates/ """ def __init__(self, text: ET.Element):