From ffd6ae73f2fc23780c4bdbaeefa0920bfcc69d3b Mon Sep 17 00:00:00 2001 From: Orestis Ioannou Date: Fri, 4 Sep 2015 09:42:20 +0200 Subject: [PATCH] Add route, view and helpers for exporting d/copyright to spdx --- debsources/app/copyright/routes.py | 13 +- .../templates/copyright/license.html | 1 + debsources/app/copyright/views.py | 50 ++++++ debsources/app/views.py | 7 +- debsources/license_helper.py | 163 ++++++++++++++++++ 5 files changed, 231 insertions(+), 3 deletions(-) diff --git a/debsources/app/copyright/routes.py b/debsources/app/copyright/routes.py index c48044d1..0ffd722b 100644 --- a/debsources/app/copyright/routes.py +++ b/debsources/app/copyright/routes.py @@ -12,13 +12,14 @@ from __future__ import absolute_import -from flask import jsonify +from flask import jsonify, make_response from ..helper import bind_render from . import bp_copyright from ..views import (IndexView, PrefixView, ListPackagesView, ErrorHandler, Ping, PackageVersionsView, DocView, AboutView, SearchView) -from .views import LicenseView, ChecksumLicenseView, SearchFileView, StatsView +from .views import (LicenseView, ChecksumLicenseView, SearchFileView, + StatsView, SPDXView) # context vars @@ -254,3 +255,11 @@ def skeleton_variables(): render_func=jsonify, err_func=ErrorHandler(mode='json'), get_objects='stats_suite')) + +# SDPX view +bp_copyright.add_url_rule( + '/spdx//', + view_func=SPDXView.as_view( + 'spdx', + render_func=make_response, + err_func=ErrorHandler(mode='json'))) diff --git a/debsources/app/copyright/templates/copyright/license.html b/debsources/app/copyright/templates/copyright/license.html index baa671c9..9e7a84d8 100644 --- a/debsources/app/copyright/templates/copyright/license.html +++ b/debsources/app/copyright/templates/copyright/license.html @@ -27,6 +27,7 @@

{{ self.title() }} / {{ version }}

{% if dump == 'True' %} {% include "source_file_code.inc.html" %} {% else %} + {% include "copyright/license_render.inc.html" %} {% endif %} {% endblock %} diff --git a/debsources/app/copyright/views.py b/debsources/app/copyright/views.py index e72bb5a1..2800a890 100644 --- a/debsources/app/copyright/views.py +++ b/debsources/app/copyright/views.py @@ -339,3 +339,53 @@ def get_stats(self): dual_results=dual_res, dual_licenses=sorted(dual_licenses), suites=all_suites) + + +class SPDXView(GeneralView): + + def _generate_file(self, spdx_values): + output = '' + for value in spdx_values: + output += value.decode('utf-8') + '\n' + return output + + def get_objects(self, path_to): + path_dict = path_to.split('/') + + package = path_dict[0] + version = path_dict[1] + path = '/'.join(path_dict[2:]) + + if version == "latest": # we search the latest available version + return self._handle_latest_version(request.endpoint, + package, path) + + versions = self.handle_versions(version, package, path) + if versions: + redirect_url_parts = [package, versions[-1]] + if path: + redirect_url_parts.append(path) + redirect_url = '/'.join(redirect_url_parts) + return self._redirect_to_url(request.endpoint, + redirect_url, redirect_code=302) + + try: + sources_path = helper.get_sources_path(session, package, version, + current_app.config) + except FileOrFolderNotFound: + raise Http404ErrorSuggestions(package, version, + 'debian/copyright') + except InvalidPackageOrVersionError: + raise Http404ErrorSuggestions(package, version, '') + + try: + c = helper.parse_license(sources_path) + except Exception: + # non machine readable license + return dict(return_code=404) + spdx = helper.export_copyright_to_spdx( + c, session=session, package=package, version=version) + attachment = "attachment;" + "filename=" + \ + path_to.replace('/', '_') + ".spdx" + return dict(spdx=self._generate_file(spdx), + header=attachment) diff --git a/debsources/app/views.py b/debsources/app/views.py index 99bfe055..9e3573e4 100644 --- a/debsources/app/views.py +++ b/debsources/app/views.py @@ -19,7 +19,8 @@ from debian.debian_support import version_compare from flask import ( - current_app, jsonify, render_template, request, url_for, redirect) + current_app, jsonify, render_template, request, url_for, redirect, + make_response) from flask.views import View from debsources.excepts import ( @@ -192,6 +193,10 @@ def dispatch_request(self, **kwargs): """ try: context = self.get_objects(**kwargs) + if self.render_func is make_response: + response = make_response(context['spdx']) + response.headers["Content-Disposition"] = context['header'] + return response return self.render_func(**context) except Http403Error as e: return self.err_func(e, http=403) diff --git a/debsources/license_helper.py b/debsources/license_helper.py index d6f44d3b..f5bbe4c4 100644 --- a/debsources/license_helper.py +++ b/debsources/license_helper.py @@ -12,10 +12,13 @@ import io import logging import re +import hashlib +from datetime import datetime from flask import url_for from debian import copyright +from debsources.models import Checksum, File, Package, PackageName from debsources.navigation import Location, SourceFile # import debsources.query as qry @@ -134,6 +137,10 @@ def get_license(session, package, version, path, license_path=None): return None +def get_paragraph(c, path): + return c.find_files_paragraph(path) + + def get_copyright_header(copyright): """ Return all the header attributs @@ -197,6 +204,8 @@ def create_url(glob="", base=None,): def match_license(synopsis): """ Matches a `synopsis` with a license and creates a url """ + if any(keyword in synopsis for keyword in ['with', 'exception']): + return None key = filter(lambda x: re.search(x, synopsis) is not None, Licenses) if len(key) is not 0: return Licenses[key[0]] @@ -241,3 +250,157 @@ def anchor_to_license(copyright, synopsis): return '#license-' + str(licenses.index(synopsis)) else: return None + + +def export_copyright_to_spdx(c, package, version, session): + """ Creates the SPDX document and saves the result in fname + + """ + + def create_package_code(session, package, version): + sha = (session.query(Checksum.sha256.label("sha256")) + .filter(Checksum.package_id == Package.id) + .filter(Checksum.file_id == File.id) + .filter(Package.name_id == PackageName.id) + .filter(PackageName.name == package) + .filter(Package.version == version) + .order_by("sha256") + ).all() + sha_values = [sha256[0] for sha256 in sha] + return hashlib.sha256("".join(sha_values)).hexdigest() + + def create_license_ref(license, count, refs, unknown): + """ Creates license references and adds it in the specific + dictionnary. Also adds the non standard licenses in unknown + licenses. + """ + if license not in refs.keys() and license is not u'': + if not match_license(license): + l_id = 'LicenseRef-' + str(count) + refs[license] = l_id + count += 1 + unknown[license] = "LicenseId: " + l_id + \ + "\nLicenseName: " + l + + return refs, unknown, count + + # find out which are not standard and save SPDX required information + # Non standard licenses are referenced as LicenseRed- + refs = dict() + count = 0 + unknown = dict() + for par in c.all_files_paragraphs(): + try: + l = par.license.synopsis + if any(keyword in l for keyword in ['and', 'or']): + licenses = re.split(', |and |or ', l) + for license in licenses: + refs, unknown, count = create_license_ref(license.rstrip(), + count, refs, + unknown) + else: + refs, unknown, count = create_license_ref(l, count, + refs, unknown) + + except (AttributeError, ValueError): + pass + + # add the available extracted license text for unknown licenses + for par in c.all_license_paragraphs(): + try: + l = par.license.synopsis + if l in refs.keys() and not match_license(l): + unknown[l] = "LicenseID: " + refs[l] + \ + "\nExtractedText: " + \ + par.license.text + "" + \ + "\nLicenseName: " + l + except (AttributeError, ValueError): + pass + + time = datetime.now() + now = str(time.date()) + 'T' + str(time.time()).split('.')[0] + 'Z' + + spdx = ["SPDXVersion: SPDX-2.0", "DataLicense:CC0-1.0", + "SPDXID: SPDXRef-DOCUMENT", + "Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-Package", + "DocumentName: " + c.header.upstream_name, + "DocumentNamespace: http://spdx.org/spdxdocs/" + + "spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301", + "LicenseListVersion: 2.0", + "Creator: Person: Debsources", + "Creator: Organization: Debsources", + "Creator: Tool: Debsources", + "Created: " + now, + "CreatorComment: This document was created by" + + "Debsources by parsing the respective debian/copyright" + + "file of the package provided by the Debian project. You" + + "may follow these links: http://debian.org/ " + + "http://sources.debian.net/ to get more information about " + + "Debian and Debsources. ", + "DocumentComment: This document was created using" + + "SPDX 2.0, version 2.3 of the SPDX License List.", + "PackageName: " + c.header.upstream_name, + "SPDXID: SPDXRef-Package", + "PackageDownloadLocation: NOASSERTION", + "PackageVerificationCode: " + create_package_code(session, + package, + version), + "PackageLicenseConcluded: NOASSERTION"] + for value in set(refs.values()): + spdx.append("PackageLicenseInfoFromFiles: " + value) + + spdx.extend(["PackageLicenseDeclared: NOASSERTION", + "PackageCopyrightText: NOASSERTION"]) + for files in get_files_spdx(refs, package, version, session, c): + for item in files: + spdx.append(str(item)) + for u in unknown: + spdx.append(unknown[u]) + return spdx + + +def get_files_spdx(refs, package, version, session, c): + """ Get all files from the DB for a specific package and version and + then create a dictionnary for the SPDX entries + + """ + + def replace_all(text, dic): + """ Replace all occurences of the keys in dic by the corresponding + value + """ + for i, j in dic.iteritems(): + text = text.replace(i, j) + return text + + files = (session.query(Checksum.sha256.label("sha256"), + File.path.label("path")) + .filter(Checksum.package_id == Package.id) + .filter(Checksum.file_id == File.id) + .filter(Package.name_id == PackageName.id) + .filter(PackageName.name == package) + .filter(Package.version == version) + ) + + files_info = [] + + for i, f in enumerate(files.all()): + par = get_paragraph(c, f.path) + try: + if not match_license(par.license.synopsis): + license_concluded = replace_all(par.license.synopsis, refs) + else: + license_concluded = par.license.synopsis + except (AttributeError, ValueError): + license_concluded = "None" + # NOASSERTION means that the SPDX generator did not calculate that + # value. + sha = 'NOASSERTION' if not f.sha256 else f.sha256 + files_info.append(["FileName: " + f.path, + "SPDXID: SPDX-FILE-REF-" + str(i), + "FileChecksum: SHA256: " + sha, + "LicenseConcluded: " + license_concluded, + "LicenseInfoInFile: NOASSERTION", + "FileCopyrightText: " + + par.copyright.encode('utf-8') + ""]) + return files_info