From 758cf9cd05d14828d05ec77f7e7a3d0ed6e9bf8a Mon Sep 17 00:00:00 2001 From: RepoDynamicsBot <80158628+AAriam@users.noreply.github.com> Date: Sun, 13 Oct 2024 10:15:14 +0200 Subject: [PATCH] Release version 0.0.0.dev6 --- pyproject.toml | 12 +- requirements.txt | 7 + src/licenseman/spdx/entry.py | 395 ++++++++++++++++++++++++++++ src/licenseman/spdx/exception.py | 327 +---------------------- src/licenseman/spdx/license.py | 370 +++----------------------- src/licenseman/spdx/license_text.py | 139 +++++----- 6 files changed, 516 insertions(+), 734 deletions(-) create mode 100644 requirements.txt create mode 100644 src/licenseman/spdx/entry.py diff --git a/pyproject.toml b/pyproject.toml index 79811ca..ffbc272 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,15 +17,15 @@ namespaces = true # ----------------------------------------- Project Metadata ------------------------------------- # [project] -version = "0.0.0.dev4" +version = "0.0.0.dev6" name = "LicenseMan" requires-python = ">=3.10" dependencies = [ - "LoggerMan == 0.0.0.dev49", - "PyLinks", + "LoggerMan == 0.0.0.dev50", + "PyLinks == 0.0.0.dev34", "PkgData", - "PySerials", - "MDit == 0.0.0.dev20", - "ExceptionMan == 0.0.0.dev20", + "PySerials == 0.0.0.dev24", + "MDit == 0.0.0.dev21", + "ExceptionMan == 0.0.0.dev21", "platformdirs >= 4.3, < 5", ] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..731477d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +LoggerMan == 0.0.0.dev50 +PyLinks == 0.0.0.dev34 +PkgData +PySerials == 0.0.0.dev24 +MDit == 0.0.0.dev21 +ExceptionMan == 0.0.0.dev21 +platformdirs >= 4.3, < 5 \ No newline at end of file diff --git a/src/licenseman/spdx/entry.py b/src/licenseman/spdx/entry.py new file mode 100644 index 0000000..5982848 --- /dev/null +++ b/src/licenseman/spdx/entry.py @@ -0,0 +1,395 @@ +from __future__ import annotations as _annotations + +import copy as _copy +from typing import TYPE_CHECKING as _TYPE_CHECKING +import datetime as _dt +from xml.etree import ElementTree as _ElementTree + +from licenseman import logger as _logger +from licenseman.spdx.license_text import SPDXLicenseTextPlain + +if _TYPE_CHECKING: + from typing import Literal, Any + + + +class SPDXEntry: + """SPDX License or exception definition. + + References + ---------- + - [SPDX Docs](https://github.com/spdx/license-list-XML/blob/main/DOCS/README.md) + - [SPDX Docs - XML Fields](https://github.com/spdx/license-list-XML/blob/main/DOCS/xml-fields.md) + - [XML Schema](https://github.com/spdx/license-list-XML/blob/main/schema/ListedLicense.xsd) + - [GitHub Repository](https://github.com/spdx/license-list-XML) + """ + + def __init__(self, data: dict, entry_type: Literal["license", "exception"], verify: bool = True): + try: + root = _ElementTree.fromstring(data["xml"]) + except _ElementTree.ParseError as e: + raise Exception(f"Error parsing license XML content.") from e + self._ns_url = 'http://www.spdx.org/license' + self._ns: dict = {'': self._ns_url} + self._xml: _ElementTree.Element = root.find(entry_type, self._ns) + self._entry_type: Literal["license", "exception"] = entry_type + self._data: dict = data + if verify: + self.verify() + return + + def verify(self): + + def log(key_json: str, missing_in: Literal["xml", "json"], data: Any, key_xml: str | None = None): + if key_xml is None: + key_xml = key_json + if missing_in == "xml": + missing_source = "XML" + existing_source = "JSON" + missing_key = key_xml + existing_key = key_json + else: + missing_source = "JSON" + existing_source = "XML" + missing_key = key_json + existing_key = key_xml + _logger.notice( + f"{self.id} License{" Exception" if self._entry_type == "exception" else ""} Verification", + f"The value of '{missing_key}' is not defined in the {missing_source} data. " + f"Using the {existing_source} data value of '{existing_key}':", + _logger.pretty(data) + ) + return + + def osi_approved(): + key = "isOsiApproved" + xml_raw = self._xml.attrib.get(key) + if xml_raw == "true": + xml = True + elif xml_raw == "false": + xml = False + else: + if xml_raw is not None: + raise Exception(f"Invalid value for '{key}' in XML data: {xml_raw}") + xml = None + json = self._data[key] + if json != xml: + if xml is None: + log(key, "xml", json) + return + if json is None: + log(key, "json", xml) + self._data[key] = xml + return + raise Exception( + "OSI approved mismatch between XML and JSON data. " + f"XML: {xml}, JSON: {json}" + ) + return + + def deprecated_version(): + key = "deprecatedVersion" + xml = self._xml.attrib.get(key) + json = self._data.get(key) + if json != xml: + if xml is None: + log(key_json=key, missing_in="xml", data=json) + elif json is None: + log(key_json=key, missing_in="json", data=xml) + self._data[key] = xml + else: + raise Exception( + "Deprecated version mismatch between XML and JSON data. " + f"XML: {xml}, JSON: {json}" + ) + return + + def cross_refs(): + xml_elem = self._xml.find('crossRefs', self._ns) + xml = sorted( + [ref.text.strip() for ref in xml_elem.findall('crossRef', self._ns)] + ) if xml_elem else [] + json_seealso = sorted(self._data.get("seeAlso", [])) + + if json_seealso != xml: + if not xml: + log("seeAlso", "xml", data=json_seealso) + return + if not json_seealso: + log("seeAlso", "json", data=xml, key_xml="crossRefs") + self._data["seeAlso"] = xml + return + raise Exception( + "Cross references mismatch between XML and JSON data. " + f"XML: {xml}, JSON: {json_seealso}" + ) + if self._entry_type == "license": + json = sorted([ref["url"] for ref in self._data.get("crossRef", [])]) + if json != json_seealso: + raise Exception( + "Cross references mismatch between 'crossRefs' and 'seeAlso' JSON data. ", + f"CrossRefs: {json}, SeeAlso: {json_seealso}" + ) + return + + if self.id != self._xml.attrib.get('licenseId'): + raise Exception("License ID mismatch between XML and JSON data.") + if self._data["name"] != self._xml.attrib.get('name'): + raise Exception("License name mismatch between XML and JSON data.") + + deprecated_version() + cross_refs() + if self._entry_type == "license": + osi_approved() + return + + def generate_text_plain( + self, + title: str | bool = True, + copyright_notice: str | bool = False, + optionals: bool | list[bool] = True, + alts: dict[str, str] | None = None, + line_length: int = 88, + list_indent: int = 0, + item_indent: int = 1, + item_spacing: int = 1, + bullet: str | int | None = 1, + title_centered: bool = False, + title_underline: Literal["-", "=", "_", "*"] = "=", + title_underline_full: bool = False, + subtitle_underline: Literal["-", "=", "_", "*"] = "-", + line_breaks: int = 2, + ) -> str: + return SPDXLicenseTextPlain(text=self.text_xml).generate( + title=title, + copyright_notice=copyright_notice, + optionals=optionals, + alts=alts, + line_length=line_length, + list_indent=list_indent, + item_indent=item_indent, + item_spacing=item_spacing, + bullet=bullet, + title_centered=title_centered, + title_underline=title_underline, + title_underline_full=title_underline_full, + subtitle_underline=subtitle_underline, + line_breaks=line_breaks, + ) + + @property + def raw_data(self) -> dict: + """Raw license data.""" + return self._data + + @property + def id(self) -> str: + """SPDX license ID.""" + return self._data["licenseId" if self._entry_type == "license" else "licenseExceptionId"] + + @property + def name(self) -> str: + """Full name of the license""" + return self._data["name"] + + @property + def text_plain(self) -> str: + """Original license text in plain text format.""" + return self._data["licenseText" if self._entry_type == "license" else "licenseExceptionText"] + + @property + def text_template(self) -> str | None: + """License text template.""" + return self._data.get("standardLicenseTemplate" if self._entry_type == "license" else "licenseExceptionTemplate") + + @property + def text_html(self) -> str | None: + """Original license text in HTML format.""" + return self._data.get("licenseTextHtml" if self._entry_type == "license" else "exceptionTextHtml") + + @property + def text_xml(self) -> _ElementTree.Element: + return self._xml.find('text', self._ns) + + @property + def text_xml_str(self) -> str: + return self._xml_str(self.text_xml) + + @property + def title_text_xml(self) -> _ElementTree.Element | None: + """Title of the license as defined in the text, if any.""" + return self._xml.find('.//titleText', self._ns) + + @property + def copyright_text_xml(self) -> _ElementTree.Element | None: + """Copyright notice of the license is defined in the text, if any.""" + return self._xml.find('.//copyrightText', self._ns) + + @property + def optionals_xml(self) -> list[_ElementTree.Element]: + """Optional fields in the license text, if any.""" + return self._xml.findall('.//optional', self._ns) + + @property + def optionals_xml_str(self) -> list[str]: + """Optional fields in the license text, if any.""" + out = [] + for optional in self.optionals_xml: + out.append(self._xml_str(optional)) + return out + + @property + def alts(self) -> dict[str, dict[str, str]]: + """ + + Returns + ------- + A dictionary where keys are the alternative field names, and values are dictionaries with keys: + `text` : str + + Default value. + `match` : str + + Regular expression (RegEx) pattern to validate user input for `text`. + """ + alts = {} + for alt in self._xml.findall('.//alt', self._ns): + alts[alt.attrib['name']] = {'text': alt.text, 'match': alt.attrib['match']} + return alts + + @property + def reference_number(self) -> int: + """Reference number of the license.""" + return self._data["referenceNumber"] + + @property + def url_reference(self) -> str: + """URL to the license reference page at SPDX.org.""" + return self._data["reference"] + + @property + def url_json(self) -> str: + """URL to the license JSON data.""" + return self._data["detailsUrl"] + + @property + def url_cross_refs(self) -> list[str]: + """URLs to license resources, if any.""" + return self._data.get("seeAlso", []) + + @property + def deprecated(self) -> bool: + """Whether the license is deprecated. + + Returns + ------- + A boolean, or `None` if the value is not defined in the data. + """ + return self._data["isDeprecatedLicenseId"] + + @property + def version_deprecated(self) -> str | None: + """Version of the SPDX License List in which the license was deprecated, if applicable. + + Returns + ------- + Version number string, or `None` if the value is not defined in the data. + """ + return self._data.get("deprecatedVersion") + + @property + def obsoleted_by(self) -> list[dict[str, str]] | None: + """New licenses that obsolete this license, if any. + + Returns + ------- + A list of dictionaries with keys: + `id` : str + + SPDX license ID of the successor license. + `expression` : str + + [SPDX license expression](https://spdx.github.io/spdx-spec/v3.0.1/annexes/spdx-license-expressions/) + which is obsoleted by the successor license; + in most cases, this is the same as the current license's ID, unless the current license + is a complex expression, and only a part of it is obsoleted by the successor. + """ + return [ + {"id": elem.text, "expression": elem.attrib.get("expression")} + for elem in self._xml.findall('.//obsoletedBy', self._ns) + ] + + @property + def version_added(self) -> str | None: + """Version of the SPDX License List in which the license was first added. + + Returns + ------- + Version number string, or `None` if the value is not defined in the data. + """ + return self._xml.attrib.get('listVersionAdded') + + @property + def comments(self) -> str | None: + """Comments about the license, if any.""" + return self._data.get("licenseComments") + + @property + def notes(self) -> str | None: + """General comments about the entry, if any.""" + elem = self._xml.find('notes', self._ns) + return elem.text if elem is not None else None + + @property + def xml(self) -> _ElementTree.Element: + return self._xml + + @property + def xml_attributes(self) -> dict[str, str]: + return self._xml.attrib + + @property + def xml_tags(self) -> list[str]: + """Set of all XML tags used in the license.""" + def traverse(elem): + tags.add(elem.tag.removeprefix(f"{{{self._ns_url}}}")) + for child in elem: + traverse(child) + return + + tags = set() + traverse(self._xml) + return list(tags) + + def xml_tag_paths(self, tag: str) -> list[str]: + """Get all paths to XML elements with a specific tag.""" + + def find_paths(current_element, current_path): + # Construct the current element's path + current_tag = current_element.tag.removeprefix(f"{{{self._ns_url}}}") + new_path = f"{current_path}/{current_tag}" if current_path else current_tag + if current_tag == tag: + paths.append(new_path) + for child in current_element: + find_paths(child, new_path) + return + + paths = [] + find_paths(self._xml, "") + return paths + + def __repr__(self): + return f"" + + def __str__(self): + return self.text_plain + + @staticmethod + def _xml_str(element: _ElementTree.Element): + optional_copy = _copy.deepcopy(element) + optional_copy.tail = None + return _ElementTree.tostring( + optional_copy, + encoding='unicode', + xml_declaration=False, + ) \ No newline at end of file diff --git a/src/licenseman/spdx/exception.py b/src/licenseman/spdx/exception.py index 552ac03..b8d2fbc 100644 --- a/src/licenseman/spdx/exception.py +++ b/src/licenseman/spdx/exception.py @@ -1,18 +1,7 @@ -from __future__ import annotations as _annotations +from licenseman.spdx.entry import SPDXEntry -from typing import TYPE_CHECKING as _TYPE_CHECKING -import datetime as _dt -from xml.etree import ElementTree as _ElementTree -from dataclasses import dataclass as _dataclass -from licenseman import logger as _logger -from licenseman.spdx.license_text import SPDXLicenseTextPlain - -if _TYPE_CHECKING: - from typing import Literal, Any - - -class SPDXLicenseException: +class SPDXLicenseException(SPDXEntry): """SPDX License definition. Parameters @@ -28,317 +17,9 @@ class SPDXLicenseException: - [GitHub Repository](https://github.com/spdx/license-list-XML) """ - def __init__(self,data: dict, verify: bool = True): - try: - root = _ElementTree.fromstring(data["xml"]) - except _ElementTree.ParseError as e: - raise Exception(f"Error parsing license XML content.") from e - self._ns: dict = {'': 'http://www.spdx.org/license'} - self._xml: _ElementTree.Element = root.find('exception', self._ns) - self._data: dict = data - if verify: - self.verify() + def __init__(self, data: dict, verify: bool = True): + super().__init__(data=data, entry_type="exception", verify=verify) return - def verify(self): - - def log(key_json: str, missing_in: Literal["xml", "json"], data: Any, key_xml: str | None = None): - if key_xml is None: - key_xml = key_json - if missing_in == "xml": - missing_source = "XML" - existing_source = "JSON" - missing_key = key_xml - existing_key = key_json - else: - missing_source = "JSON" - existing_source = "XML" - missing_key = key_json - existing_key = key_xml - _logger.notice( - log_title, - f"The value of '{missing_key}' is not defined in the {missing_source} data. " - f"Using the {existing_source} data value of '{existing_key}':", - _logger.pretty(data) - ) - return - - def deprecated_version(): - key = "deprecatedVersion" - xml = self._xml.attrib.get(key) - json = self._data.get(key) - if json != xml: - if xml is None: - log(key_json=key, missing_in="xml", data=json) - elif json is None: - log(key_json=key, missing_in="json", data=xml) - self._data[key] = xml - else: - raise Exception( - "Deprecated version mismatch between XML and JSON data. " - f"XML: {xml}, JSON: {json}" - ) - return - - def cross_refs(): - xml_elem = self._xml.find('crossRefs', self._ns) - xml = sorted( - [ref.text.strip() for ref in xml_elem.findall('crossRef', self._ns)] - ) if xml_elem else [] - json_seealso = sorted(self._data.get("seeAlso", [])) - if json_seealso != xml: - if not xml: - log("seeAlso", "xml", data=json_seealso) - return - if not json_seealso: - log("seeAlso", "json", data=xml) - self._data["seeAlso"] = xml - return - raise Exception( - "Cross references mismatch between XML and JSON data. " - f"XML: {xml}, JSON: {json_seealso}" - ) - - log_title = f"{self.id} License Exception Verification" - if self._data["licenseExceptionId"] != self._xml.attrib.get('licenseId'): - raise Exception("License exception ID mismatch between XML and JSON data.") - if self._data["name"] != self._xml.attrib.get('name'): - raise Exception("License exception name mismatch between XML and JSON data.") - deprecated_version() - cross_refs() - return - - def generate_text( - self, - title: str | bool = True, - copyright: str | bool = False, - optionals: bool | list[bool] = True, - alts: dict[str, str] | None = None, - line_length: int = 88, - list_indent: int = 3, - list_item_indent: int = 2, - list_item_vertical_spacing: int = 2, - list_bullet_prefer_default: bool = True, - list_bullet_ordered: bool = True, - list_bullet_unordered_char: str = "–", - title_centered: bool = True, - title_separator_full_line: bool = True, - title_separator: Literal["-", "=", "_", "*"] = "=", - subtitle_separator: Literal["-", "=", "_", "*"] = "-", - ) -> tuple[str, str | None]: - """Generate plain-text license. - - Parameters - ---------- - title - Determines how to treat the license title, if any. - Since the title is [optional](https://spdx.github.io/spdx-spec/v3.0.1/annexes/license-matching-guidelines-and-templates/#license-name-or-title) - and not used in matching, it can be omitted or replaced with a custom title. - If True, the title is included as-is. If False, the title is omitted. - If a string, the title is replaced with the custom string, if a title is present. - copyright - Determines how to treat the copyright notice, if any. - Since the copyright notice is [optional](https://spdx.github.io/spdx-spec/v3.0.1/annexes/license-matching-guidelines-and-templates/#copyright-notice) - and not used in matching, it can be omitted or replaced with a custom notice. - If True, the notice is included as-is. If False, the notice is omitted. - If a string, the notice is replaced with the custom string, if a notice is present. - optionals : bool, optional - Whether to include elements in the output, by default True. - alts : dict[str, int] | None, optional - A dictionary specifying choices for elements. Keys are 'name' attributes, - and values are the value to use. - line_length - The maximum line length for the plain-text output. - list_indent - The number of spaces separating list items from the left margin. - list_item_indent - The number of spaces separating list items from the bullet character. - list_item_vertical_spacing - The number of newlines separating list items. - list_bullet_prefer_default - Whether to use the license's default bullet character or number for list items, if available. - list_bullet_ordered - Whether to use numbered (True) or bulleted (False) list items, - if no default bullet is available or `list_bullet_prefer_default` is False. - list_bullet_unordered_char - The character to use for unordered list items if `list_bullet_ordered` is False. - - Returns - ------- - The plain-text version of the license - plus the license header text, if present. - """ - return SPDXLicenseTextPlain(text=self.text_xml).generate( - title=title, - copyright=copyright, - optionals=optionals, - alts=alts, - line_length=line_length, - list_indent=list_indent, - list_item_indent=list_item_indent, - list_item_vertical_spacing=list_item_vertical_spacing, - list_bullet_prefer_default=list_bullet_prefer_default, - list_bullet_ordered=list_bullet_ordered, - list_bullet_unordered_char=list_bullet_unordered_char, - title_centered=title_centered, - title_separator_full_line=title_separator_full_line, - title_separator=title_separator, - subtitle_separator=subtitle_separator, - ) - - @property - def raw_data(self) -> dict: - """Raw license data.""" - return self._data - - @property - def id(self) -> str: - """SPDX license ID.""" - return self._data["licenseExceptionId"] - - @property - def name(self) -> str: - """Full name of the license""" - return self._data["name"] - - @property - def text_plain(self) -> str: - """Original license text in plain text format.""" - return self._data["licenseExceptionText"] - - @property - def text_html(self) -> str | None: - """Original license text in HTML format.""" - return self._data.get("exceptionTextHtml") - - @property - def text_template(self) -> str | None: - """License text template.""" - return self._data.get("licenseExceptionTemplate") - - @property - def text_xml(self) -> _ElementTree.Element: - return self._xml.find('text', self._ns) - - @property - def title_text_xml(self) -> _ElementTree.Element | None: - """Title of the license as defined in the text, if any.""" - return self._xml.find('.//titleText', self._ns) - - @property - def copyright_text_xml(self) -> _ElementTree.Element | None: - """Copyright notice of the license is defined in the text, if any.""" - return self._xml.find('.//copyrightText', self._ns) - - @property - def optionals_xml(self) -> list[_ElementTree.Element]: - """Optional fields in the license text, if any.""" - return self._xml.findall('.//optional', self._ns) - - @property - def alts(self) -> dict[str, dict[str, str]]: - """ - - Returns - ------- - A dictionary where keys are the alternative field names, and values are dictionaries with keys: - `text` : str - - Default value. - `match` : str - - Regular expression (RegEx) pattern to validate user input for `text`. - """ - alts = {} - for alt in self._xml.findall('.//alt', self._ns): - alts[alt.attrib['name']] = {'text': alt.text, 'match': alt.attrib['match']} - return alts - - @property - def reference_number(self) -> int: - """Reference number of the license.""" - return self._data["referenceNumber"] - - @property - def url_reference(self) -> str: - """URL to the license reference page at SPDX.org.""" - return self._data["reference"] - - @property - def url_json(self) -> str: - """URL to the license JSON data.""" - return self._data["detailsUrl"] - - @property - def url_others(self) -> list[str]: - """URLs to license resources, if any.""" - return self._data.get("seeAlso", []) - - @property - def deprecated(self) -> bool: - """Whether the license is deprecated. - - Returns - ------- - A boolean, or `None` if the value is not defined in the data. - """ - return self._data["isDeprecatedLicenseId"] - - @property - def version_deprecated(self) -> str | None: - """Version of the SPDX License List in which the license was deprecated, if applicable. - - Returns - ------- - Version number string, or `None` if the value is not defined in the data. - """ - return self._data.get("deprecatedVersion") - - @property - def obsoleted_by(self) -> list[dict[str, str]] | None: - """New licenses that obsolete this license, if any. - - Returns - ------- - A list of dictionaries with keys: - `id` : str - - SPDX license ID of the successor license. - `expression` : str - - [SPDX license expression](https://spdx.github.io/spdx-spec/v3.0.1/annexes/spdx-license-expressions/) - which is obsoleted by the successor license; - in most cases, this is the same as the current license's ID, unless the current license - is a complex expression, and only a part of it is obsoleted by the successor. - """ - return [ - {"id": elem.text, "expression": elem.attrib.get("expression")} - for elem in self._xml.findall('.//obsoletedBy', self._ns) - ] - - @property - def version_added(self) -> str | None: - """Version of the SPDX License List in which the license was first added. - - Returns - ------- - Version number string, or `None` if the value is not defined in the data. - """ - return self._xml.attrib.get('listVersionAdded') - - @property - def comments(self) -> str | None: - """Comments about the license, if any.""" - return self._data.get("licenseComments") - - @property - def notes(self) -> str | None: - """General comments about the license, if any.""" - elem = self._xml.find('notes', self._ns) - return elem.text if elem is not None else None - def __repr__(self): return f"" - - def __str__(self): - return self.text_plain \ No newline at end of file diff --git a/src/licenseman/spdx/license.py b/src/licenseman/spdx/license.py index 2871720..74cc716 100644 --- a/src/licenseman/spdx/license.py +++ b/src/licenseman/spdx/license.py @@ -5,11 +5,11 @@ from xml.etree import ElementTree as _ElementTree from dataclasses import dataclass as _dataclass -from licenseman import logger as _logger from licenseman.spdx.license_text import SPDXLicenseTextPlain +from licenseman.spdx.entry import SPDXEntry if _TYPE_CHECKING: - from typing import Literal, Any + from typing import Literal @_dataclass @@ -24,7 +24,7 @@ class SPDXLicenseCrossRef: wayback: bool -class SPDXLicense: +class SPDXLicense(SPDXEntry): """SPDX License definition. Parameters @@ -40,298 +40,68 @@ class SPDXLicense: - [GitHub Repository](https://github.com/spdx/license-list-XML) """ - def __init__(self,data: dict, verify: bool = True): - try: - root = _ElementTree.fromstring(data["xml"]) - except _ElementTree.ParseError as e: - raise Exception(f"Error parsing license XML content.") from e - self._ns: dict = {'': 'http://www.spdx.org/license'} - self._xml: _ElementTree.Element = root.find('license', self._ns) - self._data: dict = data - if verify: - self.verify() + def __init__(self, data: dict, verify: bool = True): + super().__init__(data=data, entry_type="license", verify=verify) return - def verify(self): - - def log(key_json: str, missing_in: Literal["xml", "json"], data: Any, key_xml: str | None = None): - if key_xml is None: - key_xml = key_json - if missing_in == "xml": - missing_source = "XML" - existing_source = "JSON" - missing_key = key_xml - existing_key = key_json - else: - missing_source = "JSON" - existing_source = "XML" - missing_key = key_json - existing_key = key_xml - _logger.notice( - log_title, - f"The value of '{missing_key}' is not defined in the {missing_source} data. " - f"Using the {existing_source} data value of '{existing_key}':", - _logger.pretty(data) - ) - return - - def osi_approved(): - key = "isOsiApproved" - xml_raw = self._xml.attrib.get(key) - if xml_raw == "true": - xml = True - elif xml_raw == "false": - xml = False - else: - if xml_raw is not None: - raise Exception(f"Invalid value for '{key}' in XML data: {xml_raw}") - xml = None - json = self.osi_approved - if json != xml: - if xml is None: - log(key, "xml", json) - return - if json is None: - log(key, "json", xml) - self._data[key] = xml - return - raise Exception( - "OSI approved mismatch between XML and JSON data. " - f"XML: {xml}, JSON: {self.osi_approved}" - ) - return - - def deprecated_version(): - key = "deprecatedVersion" - xml = self._xml.attrib.get(key) - json = self._data.get(key) - if json != xml: - if xml is None: - log(key_json=key, missing_in="xml", data=json) - elif json is None: - log(key_json=key, missing_in="json", data=xml) - self._data[key] = xml - else: - raise Exception( - "Deprecated version mismatch between XML and JSON data. " - f"XML: {xml}, JSON: {json}" - ) - return - - def cross_refs(): - xml_elem = self._xml.find('crossRefs', self._ns) - xml = sorted( - [ref.text.strip() for ref in xml_elem.findall('crossRef', self._ns)] - ) if xml_elem else [] - json = sorted([ref["url"] for ref in self._data.get("crossRef", [])]) - json_seealso = sorted(self._data.get("seeAlso", [])) - if json != json_seealso: - raise Exception( - "Cross references mismatch between 'crossRefs' and 'seeAlso' JSON data. ", - f"CrossRefs: {json}, SeeAlso: {json_seealso}" - ) - if json != xml: - if not xml: - log("crossRef", "xml", data=json) - return - raise Exception( - "Cross references mismatch between XML and JSON data. " - f"XML: {xml}, JSON: {json}" - ) - - log_title = f"{self.id} License Verification" - if self._data["licenseId"] != self._xml.attrib.get('licenseId'): - raise Exception("License ID mismatch between XML and JSON data.") - if self._data["name"] != self._xml.attrib.get('name'): - raise Exception("License name mismatch between XML and JSON data.") - osi_approved() - deprecated_version() - cross_refs() - return - - def generate_text( + def generate_header_plain( self, title: str | bool = True, - copyright: str | bool = False, - optionals: bool = True, + copyright_notice: str | bool = False, + optionals: bool | list[bool] = True, alts: dict[str, str] | None = None, line_length: int = 88, - list_indent: int = 2, - list_item_indent: int = 1, - list_item_vertical_spacing: int = 2, - list_bullet_prefer_default: bool = True, - list_bullet_ordered: bool = True, - list_bullet_unordered_char: str = "–", - heading_char: str = "=", - subheading_char: str = "–", - ) -> tuple[str, str | None]: - """Generate plain-text license. - - Parameters - ---------- - title - Determines how to treat the license title, if any. - Since the title is [optional](https://spdx.github.io/spdx-spec/v3.0.1/annexes/license-matching-guidelines-and-templates/#license-name-or-title) - and not used in matching, it can be omitted or replaced with a custom title. - If True, the title is included as-is. If False, the title is omitted. - If a string, the title is replaced with the custom string, if a title is present. - copyright - Determines how to treat the copyright notice, if any. - Since the copyright notice is [optional](https://spdx.github.io/spdx-spec/v3.0.1/annexes/license-matching-guidelines-and-templates/#copyright-notice) - and not used in matching, it can be omitted or replaced with a custom notice. - If True, the notice is included as-is. If False, the notice is omitted. - If a string, the notice is replaced with the custom string, if a notice is present. - optionals : bool, optional - Whether to include elements in the output, by default True. - alts : dict[str, int] | None, optional - A dictionary specifying choices for elements. Keys are 'name' attributes, - and values are the value to use. - line_length - The maximum line length for the plain-text output. - list_indent - The number of spaces separating list items from the left margin. - list_item_indent - The number of spaces separating list items from the bullet character. - list_item_vertical_spacing - The number of newlines separating list items. - list_bullet_prefer_default - Whether to use the license's default bullet character or number for list items, if available. - list_bullet_ordered - Whether to use numbered (True) or bulleted (False) list items, - if no default bullet is available or `list_bullet_prefer_default` is False. - list_bullet_unordered_char - The character to use for unordered list items if `list_bullet_ordered` is False. - - Returns - ------- - The plain-text version of the license - plus the license header text, if present. - """ - return SPDXLicenseTextPlain(text=self.text_xml).generate( + list_indent: int = 0, + item_indent: int = 1, + item_spacing: int = 1, + bullet: str | int | None = 1, + title_centered: bool = False, + title_underline: Literal["-", "=", "_", "*"] = "=", + title_underline_full: bool = False, + subtitle_underline: Literal["-", "=", "_", "*"] = "-", + line_breaks: int = 2, + ) -> str: + if not self.header_xml: + return "" + return SPDXLicenseTextPlain(text=self.header_xml).generate( title=title, - copyright=copyright, + copyright_notice=copyright_notice, optionals=optionals, alts=alts, line_length=line_length, list_indent=list_indent, - list_item_indent=list_item_indent, - list_item_vertical_spacing=list_item_vertical_spacing, - list_bullet_prefer_default=list_bullet_prefer_default, - list_bullet_ordered=list_bullet_ordered, - list_bullet_unordered_char=list_bullet_unordered_char, - title_separator=heading_char, - subtitle_separator=subheading_char, + item_indent=item_indent, + item_spacing=item_spacing, + bullet=bullet, + title_centered=title_centered, + title_underline=title_underline, + title_underline_full=title_underline_full, + subtitle_underline=subtitle_underline, + line_breaks=line_breaks, ) - @property - def raw_data(self) -> dict: - """Raw license data.""" - return self._data - - @property - def id(self) -> str: - """SPDX license ID.""" - return self._data["licenseId"] - - @property - def name(self) -> str: - """Full name of the license""" - return self._data["name"] - - @property - def text_plain(self) -> str: - """Original license text in plain text format.""" - return self._data["licenseText"] - - @property - def text_html(self) -> str | None: - """Original license text in HTML format.""" - return self._data.get("licenseTextHtml") - - @property - def text_template(self) -> str | None: - """License text template.""" - return self._data.get("standardLicenseTemplate") - - @property - def text_xml(self) -> _ElementTree.Element: - return self._xml.find('text', self._ns) - @property def header_plain(self) -> str | None: """Original license header in plain text format.""" return self._data.get("standardLicenseHeader") - @property - def header_html(self) -> str | None: - """Original license header in HTML format.""" - return self._data.get("standardLicenseHeaderHtml") - @property def header_template(self) -> str | None: """License header template.""" return self._data.get("standardLicenseHeaderTemplate") @property - def header_xml(self) -> _ElementTree.Element: - return self._xml.find('.//standardLicenseHeader', self._ns) - - @property - def title_text_xml(self) -> _ElementTree.Element | None: - """Title of the license as defined in the text, if any.""" - return self._xml.find('.//titleText', self._ns) - - @property - def copyright_text_xml(self) -> _ElementTree.Element | None: - """Copyright notice of the license is defined in the text, if any.""" - return self._xml.find('.//copyrightText', self._ns) - - @property - def optionals_xml(self) -> list[_ElementTree.Element]: - """Optional fields in the license text, if any.""" - return self._xml.findall('.//optional', self._ns) - - @property - def alts(self) -> dict[str, dict[str, str]]: - """ - - Returns - ------- - A dictionary where keys are the alternative field names, and values are dictionaries with keys: - `text` : str - - Default value. - `match` : str - - Regular expression (RegEx) pattern to validate user input for `text`. - """ - alts = {} - for alt in self._xml.findall('.//alt', self._ns): - alts[alt.attrib['name']] = {'text': alt.text, 'match': alt.attrib['match']} - return alts - - @property - def reference_number(self) -> int: - """Reference number of the license.""" - return self._data["referenceNumber"] - - @property - def url_reference(self) -> str: - """URL to the license reference page at SPDX.org.""" - return self._data["reference"] + def header_html(self) -> str | None: + """Original license header in HTML format.""" + return self._data.get("standardLicenseHeaderHtml") @property - def url_json(self) -> str: - """URL to the license JSON data.""" - return self._data["detailsUrl"] + def header_xml(self) -> _ElementTree.Element: + return self._xml.find('.//standardLicenseHeader', self._ns) @property - def url_others(self) -> list[str]: - """URLs to other resources related to the license. - - This is a list of URLs identical to `cross_refs`. - """ - return self._data.get("seeAlso", []) + def header_xml_str(self) -> str: + return self._xml_str(self.header_xml) @property def cross_refs(self) -> list[SPDXLicenseCrossRef]: @@ -368,71 +138,5 @@ def fsf_libre(self) -> bool | None: """ return self._data.get("isFsfLibre") - @property - def deprecated(self) -> bool: - """Whether the license is deprecated. - - Returns - ------- - A boolean, or `None` if the value is not defined in the data. - """ - return self._data["isDeprecatedLicenseId"] - - @property - def version_deprecated(self) -> str | None: - """Version of the SPDX License List in which the license was deprecated, if applicable. - - Returns - ------- - Version number string, or `None` if the value is not defined in the data. - """ - return self._data.get("deprecatedVersion") - - @property - def obsoleted_by(self) -> list[dict[str, str]] | None: - """New licenses that obsolete this license, if any. - - Returns - ------- - A list of dictionaries with keys: - `id` : str - - SPDX license ID of the successor license. - `expression` : str - - [SPDX license expression](https://spdx.github.io/spdx-spec/v3.0.1/annexes/spdx-license-expressions/) - which is obsoleted by the successor license; - in most cases, this is the same as the current license's ID, unless the current license - is a complex expression, and only a part of it is obsoleted by the successor. - """ - return [ - {"id": elem.text, "expression": elem.attrib.get("expression")} - for elem in self._xml.findall('.//obsoletedBy', self._ns) - ] - - @property - def version_added(self) -> str | None: - """Version of the SPDX License List in which the license was first added. - - Returns - ------- - Version number string, or `None` if the value is not defined in the data. - """ - return self._xml.attrib.get('listVersionAdded') - - @property - def comments(self) -> str | None: - """Comments about the license, if any.""" - return self._data.get("licenseComments") - - @property - def notes(self) -> str | None: - """General comments about the license, if any.""" - elem = self._xml.find('notes', self._ns) - return elem.text if elem is not None else None - def __repr__(self): return f"" - - def __str__(self): - return self.text_plain \ No newline at end of file diff --git a/src/licenseman/spdx/license_text.py b/src/licenseman/spdx/license_text.py index 3d9bb60..ea00df0 100644 --- a/src/licenseman/spdx/license_text.py +++ b/src/licenseman/spdx/license_text.py @@ -54,7 +54,7 @@ def generate( self, alts: dict[str, str] | None = None, optionals: bool | list[bool] = True, - ) -> tuple[Any, Any]: + ) -> Any: """Generate license full text and header. Parameters @@ -69,10 +69,7 @@ def generate( """ self._alt = alts or {} self._optionals = optionals - fulltext = self.generate_full(self._text) - header = self._text.find('.//standardLicenseHeader', self._ns) - notice = (self.generate_notice(header)) if header else None - return fulltext, notice + return self.generate_full(self._text) def process(self, element: ET.Element) -> str: tag = self.clean_tag(element.tag) @@ -183,40 +180,38 @@ def __init__(self, text: ET.Element): self._copyright: str | bool = False self._optionals: bool | list[bool] = True self._line_len: int = 88 - self._list_item_indent: int = 1 - self._list_item_vertical_spacing: int = 1 + self._item_indent: int = 1 + self._item_spacing: int = 1 self._current_list_nesting: int = 0 self._list_indent: int = 4 - self._list_bullet_prefer_default: bool = True - self._list_bullet_ordered: bool = True - self._list_bullet_unordered_char: str = "–" + self._bullet: bool = True self._text_wrapper: _TextWrapper | None = None self._curr_bullet_len: int = 0 self._title_centered: bool = True - self._title_separator_full_line: bool = True - self._title_separator: str = "=" - self._subtitle_separator: str = "–" + self._title_underline_full: bool = True + self._title_underline: str = "=" + self._subtitle_underline: str = "–" self._count_optional = 0 + self._line_breaks = 2 return def generate( self, title: str | bool = True, - copyright: str | bool = False, + copyright_notice: str | bool = False, optionals: bool | list[bool] = True, alts: dict[str, str] | None = None, line_length: int = 88, - list_indent: int = 3, - list_item_indent: int = 2, - list_item_vertical_spacing: int = 2, - list_bullet_prefer_default: bool = True, - list_bullet_ordered: bool = True, - list_bullet_unordered_char: str = "–", - title_centered: bool = True, - title_separator_full_line: bool = True, - title_separator: Literal["-", "=", "_", "*"] = "=", - subtitle_separator: Literal["-", "=", "_", "*"] = "-", - ) -> tuple[str, str | None]: + list_indent: int = 0, + item_indent: int = 1, + item_spacing: int = 1, + bullet: str | int | None = 1, + title_centered: bool = False, + title_underline: Literal["-", "=", "_", "*"] = "=", + title_underline_full: bool = False, + subtitle_underline: Literal["-", "=", "_", "*"] = "-", + line_breaks: int = 2, + ) -> str: """Generate plain-text license. Parameters @@ -227,7 +222,7 @@ def generate( and not used in matching, it can be omitted or replaced with a custom title. If True, the title is included as-is. If False, the title is omitted. If a string, the title is replaced with the custom string, if a title is present. - copyright + copyright_notice Determines how to treat the copyright notice, if any. Since the copyright notice is [optional](https://spdx.github.io/spdx-spec/v3.0.1/annexes/license-matching-guidelines-and-templates/#copyright-notice) and not used in matching, it can be omitted or replaced with a custom notice. @@ -242,17 +237,25 @@ def generate( The maximum line length for the plain-text output. list_indent The number of spaces separating list items from the left margin. - list_item_indent + item_indent The number of spaces separating list items from the bullet character. - list_item_vertical_spacing + item_spacing The number of newlines separating list items. - list_bullet_prefer_default - Whether to use the license's default bullet character or number for list items, if available. - list_bullet_ordered - Whether to use numbered (True) or bulleted (False) list items, - if no default bullet is available or `list_bullet_prefer_default` is False. - list_bullet_unordered_char - The character to use for unordered list items if `list_bullet_ordered` is False. + bullet + If `None`, the license's default bullet characters are used for list items. + If a string, the specified character is used. + If an integer, items are numbered starting from the specified number. + title_centered + Whether to center the title text. + title_underline + The character to use for underlining the title. + Set to `None` to disable underlining. + title_underline_full + Whether to extend the underline to the full line length. + subtitle_underline + The character to use for underlining subtitles. + line_breaks + Number of newlines to add for each
element. Returns ------- @@ -260,7 +263,7 @@ def generate( plus the license header text, if present. """ self._title = title - self._copyright = copyright + self._copyright = copyright_notice self._optionals = optionals self._line_len = line_length self._text_wrapper = _TextWrapper( @@ -274,21 +277,18 @@ def generate( self._curr_bullet_len = 0 self._count_optional = 0 self._list_indent = list_indent - self._list_item_indent = list_item_indent - self._list_item_vertical_spacing = list_item_vertical_spacing - self._list_bullet_prefer_default = list_bullet_prefer_default - self._list_bullet_ordered = list_bullet_ordered - self._list_bullet_unordered_char = list_bullet_unordered_char + self._item_indent = item_indent + self._item_spacing = item_spacing + self._bullet = bullet self._title_centered = title_centered - self._title_separator_full_line = title_separator_full_line - self._title_separator = title_separator - self._subtitle_separator = subtitle_separator - fulltext, notice = super().generate(alts=alts, optionals=optionals) - return tuple(self.finalize(text) for text in (fulltext, notice)) - - def finalize(self, text: str | None) -> str | None: - if text is None: - return + self._title_underline = title_underline + self._title_underline_full = title_underline_full + self._subtitle_underline = subtitle_underline + self._line_breaks = line_breaks + fulltext = super().generate(alts=alts, optionals=optionals) + return self.finalize(fulltext) + + def finalize(self, text: str | None) -> str: to_wrap_section_indices = [] cleaned_sections = [[]] section_breaks = [0] @@ -367,9 +367,6 @@ def finalize(self, text: str | None) -> str | None: def generate_full(self, text: ET.Element): return self.generic(text) - def generate_notice(self, standard_license_header: ET.Element): - return self.generic(standard_license_header) - def generic( self, element: ET.Element, @@ -416,18 +413,18 @@ def generic( content_lines = content.strip("\n").splitlines() out.append(f"{content_lines[0].strip()}\n") out.extend([f"{bullet_len * " "}{line}\n" for line in content_lines[1:]]) - if self._subtitle_separator and len(content_lines) == 1: + if self._subtitle_underline and len(content_lines) == 1: num_chars = len(content_lines[0].strip()) + bullet_len - out.append(f"{self._subtitle_separator * num_chars}\n\n") + out.append(f"{self._subtitle_underline * num_chars}\n\n") else: out.append("\n\n") if tag_name == "bullet": # There is a bullet element outside of a list item. if self.element_has_tail(child): - if self._subtitle_separator: + if self._subtitle_underline: num_chars = len(content.strip()) leading_spaces = (len(content) - len(content.lstrip(' '))) * " " - out.append(f"{leading_spaces}{self._subtitle_separator * num_chars}\n\n") + out.append(f"{leading_spaces}{self._subtitle_underline * num_chars}\n\n") else: # The bullet has no text after it (example: CPL-1.0); # Add the next element as the list item text. @@ -458,16 +455,16 @@ def title_text(self, element: ET.Element) -> str: line = line.strip() if not line: continue - if self._title_separator and all(char in ("-", "=", "_", "*") for char in line): + if self._title_underline and all(char in ("-", "=", "_", "*") for char in line): continue if self._title_centered: line = line.center(self._line_len) title_lines.append(line) - if self._title_separator: - if self._title_separator_full_line: - title_lines.append(self._title_separator * self._line_len) + if self._title_underline: + if self._title_underline_full: + title_lines.append(self._title_underline * self._line_len) else: - separator_line = self._title_separator * max(len(line) for line in title_lines) + separator_line = self._title_underline * max(len(line) for line in title_lines) if self._title_centered: separator_line = separator_line.center(self._line_len) title_lines.append(separator_line) @@ -508,7 +505,7 @@ def list(self, elem: ET.Element) -> str: self._current_list_nesting += 1 if elem.text and elem.text.strip(): raise ValueError("List element should not have text content") - if self._list_bullet_prefer_default: + if self._bullet: bullet_elems = elem.findall("./item/bullet", self._ns) + elem.findall("./item/p/bullet", self._ns) max_bullet_width = max([len(bullet.text.strip()) for bullet in bullet_elems], default=0) else: @@ -523,7 +520,7 @@ def list(self, elem: ET.Element) -> str: [f"{' ' * self._list_indent}{line}" for line in item_str.splitlines()]) items.append(item_str_indented) self._current_list_nesting -= 1 - newlines = max(1, self._list_item_vertical_spacing) * "\n" + newlines = max(1, self._item_spacing + 1) * "\n" list_str = newlines.join(items) return f"{newlines}{list_str}{newlines}" @@ -532,10 +529,10 @@ def item(self, elem: ET.Element, idx: int, max_bullet_width: int) -> str: if len(bullet_elems) > 1: raise ValueError("Item element should contain at most one bullet element") if len(bullet_elems) == 1: - bullet = bullet_elems[0].text.strip() if self._list_bullet_prefer_default else ( - f"{idx + 1}." if self._list_bullet_ordered else self._list_bullet_unordered_char + bullet = bullet_elems[0].text.strip() if not self._bullet else ( + f"{idx + self._bullet}." if isinstance(self._bullet, int) else self._bullet ) - bullet_post_space = max_bullet_width + self._list_item_indent - len(bullet) + bullet_post_space = max_bullet_width + self._item_indent - len(bullet) bullet += bullet_post_space * " " subsequent_indent = len(bullet) * " " else: @@ -605,7 +602,7 @@ def p(self, element: ET.Element) -> str: paragraph_raw = " ".join(paragraph_components) paragraph_normalized = _re.sub(r'\s+', ' ', paragraph_raw).strip() paragraphs.append(self.wrap_text(paragraph_normalized)) - return f"\n\n{"\n".join(paragraphs)}\n\n" + return f"\n\n{("\n" * self._line_breaks).join(paragraphs)}\n\n" def alt(self, element: ET.Element) -> str: """Process an element by selecting the appropriate alternative based on `self._alt`. @@ -624,7 +621,7 @@ def bullet(self, element: ET.Element) -> str: raise ValueError("Bullet element should not have children") if not self.element_has_text(element): raise ValueError("Bullet element should have text content") - bullet = f"{element.text.strip()}{" " * self._list_item_indent}" + bullet = f"{element.text.strip()}{" " * self._item_indent}" item = f"{bullet}{element.tail.strip()}" if self.element_has_tail(element) else bullet return f"\n{self.process_text(item)}\n" @@ -637,7 +634,7 @@ def br(self, element: ET.Element) -> str: tail = self.process_text(element.tail) else: tail = "" - return f"\n{tail} " + return f"{"\n" * self._line_breaks}{tail} " def process_text(self, text: str) -> str: space_normalized_text = _re.sub(r'\s+', ' ', text.strip()) @@ -651,8 +648,6 @@ def wrap_text(self, text: str) -> str: ---------- text : str The text to wrap. - current_indent : int - The current indentation level. """ if self._current_list_nesting: extra_width = (self._current_list_nesting * self._list_indent) + self._curr_bullet_len