From 3c735b4b402636bfdc100831ba4d964a1a2d0f92 Mon Sep 17 00:00:00 2001 From: Orestis Ioannou Date: Fri, 4 Sep 2015 09:42:20 +0200 Subject: [PATCH 1/3] Add route, view and helpers for exporting d/copyright to spdx --- debsources/app/copyright/routes.py | 15 +- .../templates/copyright/license.html | 1 + .../copyright/missing_copyright.html | 22 +++ debsources/app/copyright/views.py | 34 ++++ debsources/app/views.py | 20 +- debsources/excepts.py | 8 + debsources/license_helper.py | 173 ++++++++++++++++++ 7 files changed, 268 insertions(+), 5 deletions(-) create mode 100644 debsources/app/copyright/templates/copyright/missing_copyright.html diff --git a/debsources/app/copyright/routes.py b/debsources/app/copyright/routes.py index 26f26041..ced3c9a2 100644 --- a/debsources/app/copyright/routes.py +++ b/debsources/app/copyright/routes.py @@ -12,13 +12,14 @@ from __future__ import absolute_import -from flask import jsonify, request, render_template +from flask import jsonify, request, render_template, make_response from ..helper import bind_render, generic_before_request from . import bp_copyright from ..views import (IndexView, PrefixView, ListPackagesView, ErrorHandler, Ping, PackageVersionsView, SearchView) -from .views import LicenseView, ChecksumLicenseView, SearchFileView, StatsView +from .views import (LicenseView, ChecksumLicenseView, SearchFileView, + StatsView, SPDXView) from debsources.excepts import Http404Error @@ -39,7 +40,7 @@ def skeleton_variables(): # Before request @bp_copyright.before_request def before_request(): - endpoints = ['license', 'file', 'api_file'] + endpoints = ['license', 'file', 'api_file', 'spdx'] if request.endpoint.replace('copyright.', '', 1) in endpoints: try: return generic_before_request(request, 3) @@ -224,3 +225,11 @@ def before_request(): render_func=jsonify, err_func=ErrorHandler(mode='json'), get_objects='stats_suite')) + +# SDPX view +bp_copyright.add_url_rule( + '/spdx///', + view_func=SPDXView.as_view( + 'spdx', + render_func=make_response, + err_func=ErrorHandler('copyright'))) diff --git a/debsources/app/copyright/templates/copyright/license.html b/debsources/app/copyright/templates/copyright/license.html index baa671c9..4fe13f9b 100644 --- a/debsources/app/copyright/templates/copyright/license.html +++ b/debsources/app/copyright/templates/copyright/license.html @@ -27,6 +27,7 @@

{{ self.title() }} / {{ version }}

{% if dump == 'True' %} {% include "source_file_code.inc.html" %} {% else %} + {% include "copyright/license_render.inc.html" %} {% endif %} {% endblock %} diff --git a/debsources/app/copyright/templates/copyright/missing_copyright.html b/debsources/app/copyright/templates/copyright/missing_copyright.html new file mode 100644 index 00000000..2101a0fd --- /dev/null +++ b/debsources/app/copyright/templates/copyright/missing_copyright.html @@ -0,0 +1,22 @@ +{# + Copyright (C) 2016 The Debsources developers . + See the AUTHORS file at the top-level directory of this distribution and at + https://anonscm.debian.org/gitweb/?p=qa/debsources.git;a=blob;f=AUTHORS;hb=HEAD + License: GNU Affero General Public License, version 3 or above. +#} +{% extends name+"/base.html" %} + +{% block title %}Error{% endblock %} +{% block content %} +

{{ self.title() }}

+

The debian/copyright file has a file paragraph without the required copyright field. The files paragraph is: +

    +{% for files in paragraph %} +
  • {{ files }}
  • +{% endfor %} +
+View raw copyright file +

+Go back to the license page + +{% endblock %} diff --git a/debsources/app/copyright/views.py b/debsources/app/copyright/views.py index 3163febb..7798c391 100644 --- a/debsources/app/copyright/views.py +++ b/debsources/app/copyright/views.py @@ -299,3 +299,37 @@ def get_stats(self): dual_results=dual_res, dual_licenses=sorted(dual_licenses), suites=all_suites) + + +class SPDXView(GeneralView): + + def _generate_file(self, spdx_values): + output = '' + for value in spdx_values: + output += value.decode('utf-8') + '\n' + return output + + def get_objects(self, packagename, version): + try: + sources_path = helper.get_sources_path(session, packagename, + version, + current_app.config) + except FileOrFolderNotFound: + raise Http404ErrorSuggestions(packagename, version, + 'debian/copyright') + except InvalidPackageOrVersionError: + raise Http404ErrorSuggestions(packagename, version, '') + + try: + c = helper.parse_license(sources_path) + except Exception: + # non machine readable license + return dict(return_code=404) + + spdx = helper.export_copyright_to_spdx( + c, session=session, package=packagename, + version=version) + attachment = "attachment;" + "filename=" + \ + packagename + '_' + version + ".spdx" + return dict(spdx=self._generate_file(spdx), + header=attachment) diff --git a/debsources/app/views.py b/debsources/app/views.py index 0dac6cf1..fccc55e8 100644 --- a/debsources/app/views.py +++ b/debsources/app/views.py @@ -17,12 +17,14 @@ import six from flask import ( - current_app, jsonify, render_template, request, url_for, redirect) + current_app, jsonify, render_template, request, url_for, redirect, + make_response) from flask.views import View from debsources.excepts import ( Http500Error, Http404Error, Http404ErrorSuggestions, Http403Error, - InvalidPackageOrVersionError, Http404MissingCopyright) + InvalidPackageOrVersionError, Http404MissingCopyright, + MissingCopyrightField, CopyrightValueError) from debsources.models import Package import debsources.query as qry from debsources.sqla_session import _close_session @@ -126,6 +128,16 @@ def error_404(self, error): else: return render_template('copyright/404_missing.html', suggestions=suggestions), 404 + elif isinstance(error, MissingCopyrightField): + return render_template('copyright/missing_copyright.html', + paragraph=error.par, + package=error.package, + version=error.version) + elif isinstance(error, CopyrightValueError): + return render_template('copyright/value_error.html', + error=error.error, + package=error.package, + version=error.version) else: return render_template('404.html'), 404 @@ -189,6 +201,10 @@ def dispatch_request(self, **kwargs): """ try: context = self.get_objects(**kwargs) + if self.render_func is make_response: + response = make_response(context['spdx']) + response.headers["Content-Disposition"] = context['header'] + return response return self.render_func(**context) except Http403Error as e: return self.err_func(e, http=403) diff --git a/debsources/excepts.py b/debsources/excepts.py index b2b7068a..28c4fc31 100644 --- a/debsources/excepts.py +++ b/debsources/excepts.py @@ -46,3 +46,11 @@ def __init__(self, package, version, path): class Http403Error(Exception): pass + + +class MissingCopyrightField(Http404Error): + def __init__(self, package, version, par): + self.package = package + self.version = version + self.par = par + super(MissingCopyrightField, self).__init__() diff --git a/debsources/license_helper.py b/debsources/license_helper.py index 4d7dbaf0..4ccf6b75 100644 --- a/debsources/license_helper.py +++ b/debsources/license_helper.py @@ -12,11 +12,15 @@ import io import logging import re +import hashlib +from datetime import datetime from flask import url_for from debian import copyright +from debsources.models import Checksum, File, Package, PackageName from debsources.navigation import Location, SourceFile +from debsources.excepts import MissingCopyrightField # import debsources.query as qry @@ -134,6 +138,10 @@ def get_license(session, package, version, path, license_path=None): return None +def get_paragraph(c, path): + return c.find_files_paragraph(path) + + def get_copyright_header(copyright): """ Return all the header attributs @@ -197,6 +205,8 @@ def create_url(glob="", base=None,): def match_license(synopsis): """ Matches a `synopsis` with a license and creates a url """ + if any(keyword in synopsis for keyword in ['with', 'exception']): + return None key = filter(lambda x: re.search(x, synopsis) is not None, Licenses) if len(key) is not 0: return Licenses[key[0]] @@ -241,3 +251,166 @@ def anchor_to_license(copyright, synopsis): return '#license-' + str(licenses.index(synopsis)) else: return None + + +def export_copyright_to_spdx(c, package, version, session): + """ Creates the SPDX document and saves the result in fname + + """ + + def create_package_code(session, package, version): + sha = (session.query(Checksum.sha256.label("sha256")) + .filter(Checksum.package_id == Package.id) + .filter(Checksum.file_id == File.id) + .filter(Package.name_id == PackageName.id) + .filter(PackageName.name == package) + .filter(Package.version == version) + .order_by("sha256") + ).all() + sha_values = [sha256[0] for sha256 in sha] + return hashlib.sha256("".join(sha_values)).hexdigest() + + def create_license_ref(license, count, refs, unknown): + """ Creates license references and adds it in the specific + dictionnary. Also adds the non standard licenses in unknown + licenses. + """ + if license not in refs.keys() and license is not u'': + if not match_license(license): + l_id = 'LicenseRef-' + str(count) + refs[license] = l_id + count += 1 + unknown[license] = "LicenseId: " + l_id + \ + "\nLicenseName: " + l + else: + refs[license] = license + return refs, unknown, count + + # set upstream name for native packages + if c.header.upstream_name is not None: + upstream_name = c.header.upstream_name + else: + upstream_name = package + # find out which are not standard and save SPDX required information + # Non standard licenses are referenced as LicenseRef- + refs = dict() + count = 0 + unknown = dict() + for par in c.all_files_paragraphs(): + try: + l = par.license.synopsis + if any(keyword in l for keyword in ['and', 'or']): + licenses = re.split(', |and |or ', l) + for license in licenses: + refs, unknown, count = create_license_ref(license.rstrip(), + count, refs, + unknown) + else: + refs, unknown, count = create_license_ref(l, count, + refs, unknown) + + except (AttributeError, ValueError): + pass + + # add the available extracted license text for unknown licenses + for par in c.all_license_paragraphs(): + try: + l = par.license.synopsis + if l in refs.keys() and not match_license(l): + unknown[l] = "LicenseID: " + refs[l] + \ + "\nExtractedText: " + \ + par.license.text + "" + \ + "\nLicenseName: " + l + except (AttributeError, ValueError): + pass + + time = datetime.now() + now = str(time.date()) + 'T' + str(time.time()).split('.')[0] + 'Z' + + spdx = ["SPDXVersion: SPDX-2.0", "DataLicense:CC0-1.0", + "SPDXID: SPDXRef-DOCUMENT", + "Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-Package", + "DocumentName: " + upstream_name, + "DocumentNamespace: http://spdx.org/spdxdocs/" + + "spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301", + "LicenseListVersion: 2.0", + "Creator: Person: Debsources", + "Creator: Organization: Debsources", + "Creator: Tool: Debsources", + "Created: " + now, + "CreatorComment: This document was created by" + + "Debsources by parsing the respective debian/copyright" + + "file of the package provided by the Debian project. You" + + "may follow these links: http://debian.org/ " + + "http://sources.debian.net/ to get more information about " + + "Debian and Debsources. ", + "DocumentComment: This document was created using" + + "SPDX 2.0, version 2.3 of the SPDX License List.", + "PackageName: " + upstream_name, + "SPDXID: SPDXRef-Package", + "PackageDownloadLocation: NOASSERTION", + "PackageVerificationCode: " + create_package_code(session, + package, + version), + "PackageLicenseConcluded: NOASSERTION"] + for value in set(refs.values()): + spdx.append("PackageLicenseInfoFromFiles: " + value) + + spdx.extend(["PackageLicenseDeclared: NOASSERTION", + "PackageCopyrightText: NOASSERTION"]) + for files in get_files_spdx(refs, package, version, session, c): + for item in files: + spdx.append(str(item)) + for u in unknown: + spdx.append(unknown[u]) + return spdx + + +def get_files_spdx(refs, package, version, session, c): + """ Get all files from the DB for a specific package and version and + then create a dictionnary for the SPDX entries + + """ + + def replace_all(text, dic): + """ Replace all occurences of the keys in dic by the corresponding + value + """ + for i, j in dic.iteritems(): + text = text.replace(i, j) + return text + + files = (session.query(Checksum.sha256.label("sha256"), + File.path.label("path")) + .filter(Checksum.package_id == Package.id) + .filter(Checksum.file_id == File.id) + .filter(Package.name_id == PackageName.id) + .filter(PackageName.name == package) + .filter(Package.version == version) + ) + + files_info = [] + + for i, f in enumerate(files.all()): + par = get_paragraph(c, f.path) + try: + if not match_license(par.license.synopsis): + license_concluded = replace_all(par.license.synopsis, refs) + else: + license_concluded = par.license.synopsis + except (AttributeError, ValueError): + license_concluded = "None" + # NOASSERTION means that the SPDX generator did not calculate that + # value. + sha = 'NOASSERTION' if not f.sha256 else f.sha256 + try: + files_info.append(["FileName: " + f.path, + "SPDXID: SPDX-FILE-REF-" + str(i), + "FileChecksum: SHA256: " + sha, + "LicenseConcluded: " + license_concluded, + "LicenseInfoInFile: NOASSERTION", + "FileCopyrightText: " + + par.copyright.encode('utf-8') + ""]) + except AttributeError: + raise MissingCopyrightField(package, version, par.files) + return files_info From 29e077c550401aeb9f1f42c7b16d5e35d3037b61 Mon Sep 17 00:00:00 2001 From: Orestis Ioannou Date: Mon, 18 Jan 2016 00:02:25 +0100 Subject: [PATCH 2/3] Copyright: expect value error in spdx export --- .../templates/copyright/value_error.html | 20 +++++++++++++++++++ debsources/app/copyright/views.py | 20 +++++++++++-------- debsources/excepts.py | 8 ++++++++ debsources/license_helper.py | 2 +- 4 files changed, 41 insertions(+), 9 deletions(-) create mode 100644 debsources/app/copyright/templates/copyright/value_error.html diff --git a/debsources/app/copyright/templates/copyright/value_error.html b/debsources/app/copyright/templates/copyright/value_error.html new file mode 100644 index 00000000..3af78595 --- /dev/null +++ b/debsources/app/copyright/templates/copyright/value_error.html @@ -0,0 +1,20 @@ +{# + Copyright (C) 2016 The Debsources developers . + See the AUTHORS file at the top-level directory of this distribution and at + https://anonscm.debian.org/gitweb/?p=qa/debsources.git;a=blob;f=AUTHORS;hb=HEAD + License: GNU Affero General Public License, version 3 or above. +#} +{% extends name+"/base.html" %} + +{% block title %}Error{% endblock %} +{% block content %} +

{{ self.title() }}

+

Parsing the debian/copyright file failed due to one or more of the following causes: +

    +
  • continued line must begin with " "
  • +
  • missing value in one of the required attributes (Files, Copyright, License)
  • +
+ +

View raw copyright file

+

+{% endblock %} diff --git a/debsources/app/copyright/views.py b/debsources/app/copyright/views.py index 7798c391..d647672a 100644 --- a/debsources/app/copyright/views.py +++ b/debsources/app/copyright/views.py @@ -22,7 +22,8 @@ import debsources.statistics as statistics from debsources.excepts import (Http404ErrorSuggestions, FileOrFolderNotFound, InvalidPackageOrVersionError, - Http404MissingCopyright, Http404Error) + Http404MissingCopyright, Http404Error, + CopyrightValueError) from ..views import GeneralView, ChecksumView, session, app from ..sourcecode import SourceCodeIterator from ..pagination import Pagination @@ -52,13 +53,16 @@ def get_objects(self, packagename, version): code=sourcefile, dump='True', nlines=sourcefile.get_number_of_lines(),) - return dict(package=packagename, - version=version, - dump='False', - header=helper.get_copyright_header(c), - files=helper.parse_copyright_paragraphs_for_html_render( - c, "/src/" + packagename + "/" + version + "/"), - licenses=helper.parse_licenses_for_html_render(c)) + try: + return dict(package=packagename, + version=version, + dump='False', + header=helper.get_copyright_header(c), + files=helper.parse_copyright_paragraphs_html_render( + c, "/src/" + packagename + "/" + version + "/"), + licenses=helper.parse_licenses_for_html_render(c)) + except ValueError as e: + raise CopyrightValueError(packagename, version, e.message) class ChecksumLicenseView(ChecksumView): diff --git a/debsources/excepts.py b/debsources/excepts.py index 28c4fc31..315db668 100644 --- a/debsources/excepts.py +++ b/debsources/excepts.py @@ -54,3 +54,11 @@ def __init__(self, package, version, par): self.version = version self.par = par super(MissingCopyrightField, self).__init__() + + +class CopyrightValueError(Http404Error): + def __init__(self, package, version, error): + self.package = package + self.version = version + self.error = error + super(CopyrightValueError, self).__init__() diff --git a/debsources/license_helper.py b/debsources/license_helper.py index 4ccf6b75..c61e9369 100644 --- a/debsources/license_helper.py +++ b/debsources/license_helper.py @@ -149,7 +149,7 @@ def get_copyright_header(copyright): return copyright.header._RestrictedWrapper__data -def parse_copyright_paragraphs_for_html_render(copyright, base_url): +def parse_copyright_paragraphs_html_render(copyright, base_url): """ Returns list of File objects. If `base_url` is provided then it creates links to base_url+glob """ From 27a3734f72a2bd4c6444758afa9e1ce5dae3e2fb Mon Sep 17 00:00:00 2001 From: Orestis Ioannou Date: Mon, 18 Jan 2016 10:16:17 +0100 Subject: [PATCH 3/3] Tests: fix test due to better matching standard licenses --- debsources/tests/test_web_cp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debsources/tests/test_web_cp.py b/debsources/tests/test_web_cp.py index 13834c0b..88ca3145 100644 --- a/debsources/tests/test_web_cp.py +++ b/debsources/tests/test_web_cp.py @@ -319,8 +319,8 @@ def test_synopsis_parsing(self): self.assertIn("FSF-configure", rv.data) # Test separating by ',' and, or and create correct links synopsis = "FSF-configure, and " \ - "GPL-2+"\ - " with Libtool exception or GPL-2+ with Libtool exception"\ + " or GPL-3+" self.assertIn(synopsis, rv.data)