From 3c735b4b402636bfdc100831ba4d964a1a2d0f92 Mon Sep 17 00:00:00 2001
From: Orestis Ioannou
Date: Fri, 4 Sep 2015 09:42:20 +0200
Subject: [PATCH 1/3] Add route, view and helpers for exporting d/copyright to
spdx
---
debsources/app/copyright/routes.py | 15 +-
.../templates/copyright/license.html | 1 +
.../copyright/missing_copyright.html | 22 +++
debsources/app/copyright/views.py | 34 ++++
debsources/app/views.py | 20 +-
debsources/excepts.py | 8 +
debsources/license_helper.py | 173 ++++++++++++++++++
7 files changed, 268 insertions(+), 5 deletions(-)
create mode 100644 debsources/app/copyright/templates/copyright/missing_copyright.html
diff --git a/debsources/app/copyright/routes.py b/debsources/app/copyright/routes.py
index 26f26041..ced3c9a2 100644
--- a/debsources/app/copyright/routes.py
+++ b/debsources/app/copyright/routes.py
@@ -12,13 +12,14 @@
from __future__ import absolute_import
-from flask import jsonify, request, render_template
+from flask import jsonify, request, render_template, make_response
from ..helper import bind_render, generic_before_request
from . import bp_copyright
from ..views import (IndexView, PrefixView, ListPackagesView, ErrorHandler,
Ping, PackageVersionsView, SearchView)
-from .views import LicenseView, ChecksumLicenseView, SearchFileView, StatsView
+from .views import (LicenseView, ChecksumLicenseView, SearchFileView,
+ StatsView, SPDXView)
from debsources.excepts import Http404Error
@@ -39,7 +40,7 @@ def skeleton_variables():
# Before request
@bp_copyright.before_request
def before_request():
- endpoints = ['license', 'file', 'api_file']
+ endpoints = ['license', 'file', 'api_file', 'spdx']
if request.endpoint.replace('copyright.', '', 1) in endpoints:
try:
return generic_before_request(request, 3)
@@ -224,3 +225,11 @@ def before_request():
render_func=jsonify,
err_func=ErrorHandler(mode='json'),
get_objects='stats_suite'))
+
+# SDPX view
+bp_copyright.add_url_rule(
+ '/spdx///',
+ view_func=SPDXView.as_view(
+ 'spdx',
+ render_func=make_response,
+ err_func=ErrorHandler('copyright')))
diff --git a/debsources/app/copyright/templates/copyright/license.html b/debsources/app/copyright/templates/copyright/license.html
index baa671c9..4fe13f9b 100644
--- a/debsources/app/copyright/templates/copyright/license.html
+++ b/debsources/app/copyright/templates/copyright/license.html
@@ -27,6 +27,7 @@ {{ self.title() }} / {{ version }}
{% if dump == 'True' %}
{% include "source_file_code.inc.html" %}
{% else %}
+
{% include "copyright/license_render.inc.html" %}
{% endif %}
{% endblock %}
diff --git a/debsources/app/copyright/templates/copyright/missing_copyright.html b/debsources/app/copyright/templates/copyright/missing_copyright.html
new file mode 100644
index 00000000..2101a0fd
--- /dev/null
+++ b/debsources/app/copyright/templates/copyright/missing_copyright.html
@@ -0,0 +1,22 @@
+{#
+ Copyright (C) 2016 The Debsources developers .
+ See the AUTHORS file at the top-level directory of this distribution and at
+ https://anonscm.debian.org/gitweb/?p=qa/debsources.git;a=blob;f=AUTHORS;hb=HEAD
+ License: GNU Affero General Public License, version 3 or above.
+#}
+{% extends name+"/base.html" %}
+
+{% block title %}Error{% endblock %}
+{% block content %}
+{{ self.title() }}
+The debian/copyright file has a file paragraph without the required copyright field. The files paragraph is:
+
+{% for files in paragraph %}
+- {{ files }}
+{% endfor %}
+
+View raw copyright file
+
+Go back to the license page
+
+{% endblock %}
diff --git a/debsources/app/copyright/views.py b/debsources/app/copyright/views.py
index 3163febb..7798c391 100644
--- a/debsources/app/copyright/views.py
+++ b/debsources/app/copyright/views.py
@@ -299,3 +299,37 @@ def get_stats(self):
dual_results=dual_res,
dual_licenses=sorted(dual_licenses),
suites=all_suites)
+
+
+class SPDXView(GeneralView):
+
+ def _generate_file(self, spdx_values):
+ output = ''
+ for value in spdx_values:
+ output += value.decode('utf-8') + '\n'
+ return output
+
+ def get_objects(self, packagename, version):
+ try:
+ sources_path = helper.get_sources_path(session, packagename,
+ version,
+ current_app.config)
+ except FileOrFolderNotFound:
+ raise Http404ErrorSuggestions(packagename, version,
+ 'debian/copyright')
+ except InvalidPackageOrVersionError:
+ raise Http404ErrorSuggestions(packagename, version, '')
+
+ try:
+ c = helper.parse_license(sources_path)
+ except Exception:
+ # non machine readable license
+ return dict(return_code=404)
+
+ spdx = helper.export_copyright_to_spdx(
+ c, session=session, package=packagename,
+ version=version)
+ attachment = "attachment;" + "filename=" + \
+ packagename + '_' + version + ".spdx"
+ return dict(spdx=self._generate_file(spdx),
+ header=attachment)
diff --git a/debsources/app/views.py b/debsources/app/views.py
index 0dac6cf1..fccc55e8 100644
--- a/debsources/app/views.py
+++ b/debsources/app/views.py
@@ -17,12 +17,14 @@
import six
from flask import (
- current_app, jsonify, render_template, request, url_for, redirect)
+ current_app, jsonify, render_template, request, url_for, redirect,
+ make_response)
from flask.views import View
from debsources.excepts import (
Http500Error, Http404Error, Http404ErrorSuggestions, Http403Error,
- InvalidPackageOrVersionError, Http404MissingCopyright)
+ InvalidPackageOrVersionError, Http404MissingCopyright,
+ MissingCopyrightField, CopyrightValueError)
from debsources.models import Package
import debsources.query as qry
from debsources.sqla_session import _close_session
@@ -126,6 +128,16 @@ def error_404(self, error):
else:
return render_template('copyright/404_missing.html',
suggestions=suggestions), 404
+ elif isinstance(error, MissingCopyrightField):
+ return render_template('copyright/missing_copyright.html',
+ paragraph=error.par,
+ package=error.package,
+ version=error.version)
+ elif isinstance(error, CopyrightValueError):
+ return render_template('copyright/value_error.html',
+ error=error.error,
+ package=error.package,
+ version=error.version)
else:
return render_template('404.html'), 404
@@ -189,6 +201,10 @@ def dispatch_request(self, **kwargs):
"""
try:
context = self.get_objects(**kwargs)
+ if self.render_func is make_response:
+ response = make_response(context['spdx'])
+ response.headers["Content-Disposition"] = context['header']
+ return response
return self.render_func(**context)
except Http403Error as e:
return self.err_func(e, http=403)
diff --git a/debsources/excepts.py b/debsources/excepts.py
index b2b7068a..28c4fc31 100644
--- a/debsources/excepts.py
+++ b/debsources/excepts.py
@@ -46,3 +46,11 @@ def __init__(self, package, version, path):
class Http403Error(Exception):
pass
+
+
+class MissingCopyrightField(Http404Error):
+ def __init__(self, package, version, par):
+ self.package = package
+ self.version = version
+ self.par = par
+ super(MissingCopyrightField, self).__init__()
diff --git a/debsources/license_helper.py b/debsources/license_helper.py
index 4d7dbaf0..4ccf6b75 100644
--- a/debsources/license_helper.py
+++ b/debsources/license_helper.py
@@ -12,11 +12,15 @@
import io
import logging
import re
+import hashlib
+from datetime import datetime
from flask import url_for
from debian import copyright
+from debsources.models import Checksum, File, Package, PackageName
from debsources.navigation import Location, SourceFile
+from debsources.excepts import MissingCopyrightField
# import debsources.query as qry
@@ -134,6 +138,10 @@ def get_license(session, package, version, path, license_path=None):
return None
+def get_paragraph(c, path):
+ return c.find_files_paragraph(path)
+
+
def get_copyright_header(copyright):
""" Return all the header attributs
@@ -197,6 +205,8 @@ def create_url(glob="", base=None,):
def match_license(synopsis):
""" Matches a `synopsis` with a license and creates a url
"""
+ if any(keyword in synopsis for keyword in ['with', 'exception']):
+ return None
key = filter(lambda x: re.search(x, synopsis) is not None, Licenses)
if len(key) is not 0:
return Licenses[key[0]]
@@ -241,3 +251,166 @@ def anchor_to_license(copyright, synopsis):
return '#license-' + str(licenses.index(synopsis))
else:
return None
+
+
+def export_copyright_to_spdx(c, package, version, session):
+ """ Creates the SPDX document and saves the result in fname
+
+ """
+
+ def create_package_code(session, package, version):
+ sha = (session.query(Checksum.sha256.label("sha256"))
+ .filter(Checksum.package_id == Package.id)
+ .filter(Checksum.file_id == File.id)
+ .filter(Package.name_id == PackageName.id)
+ .filter(PackageName.name == package)
+ .filter(Package.version == version)
+ .order_by("sha256")
+ ).all()
+ sha_values = [sha256[0] for sha256 in sha]
+ return hashlib.sha256("".join(sha_values)).hexdigest()
+
+ def create_license_ref(license, count, refs, unknown):
+ """ Creates license references and adds it in the specific
+ dictionnary. Also adds the non standard licenses in unknown
+ licenses.
+ """
+ if license not in refs.keys() and license is not u'':
+ if not match_license(license):
+ l_id = 'LicenseRef-' + str(count)
+ refs[license] = l_id
+ count += 1
+ unknown[license] = "LicenseId: " + l_id + \
+ "\nLicenseName: " + l
+ else:
+ refs[license] = license
+ return refs, unknown, count
+
+ # set upstream name for native packages
+ if c.header.upstream_name is not None:
+ upstream_name = c.header.upstream_name
+ else:
+ upstream_name = package
+ # find out which are not standard and save SPDX required information
+ # Non standard licenses are referenced as LicenseRef-
+ refs = dict()
+ count = 0
+ unknown = dict()
+ for par in c.all_files_paragraphs():
+ try:
+ l = par.license.synopsis
+ if any(keyword in l for keyword in ['and', 'or']):
+ licenses = re.split(', |and |or ', l)
+ for license in licenses:
+ refs, unknown, count = create_license_ref(license.rstrip(),
+ count, refs,
+ unknown)
+ else:
+ refs, unknown, count = create_license_ref(l, count,
+ refs, unknown)
+
+ except (AttributeError, ValueError):
+ pass
+
+ # add the available extracted license text for unknown licenses
+ for par in c.all_license_paragraphs():
+ try:
+ l = par.license.synopsis
+ if l in refs.keys() and not match_license(l):
+ unknown[l] = "LicenseID: " + refs[l] + \
+ "\nExtractedText: " + \
+ par.license.text + "" + \
+ "\nLicenseName: " + l
+ except (AttributeError, ValueError):
+ pass
+
+ time = datetime.now()
+ now = str(time.date()) + 'T' + str(time.time()).split('.')[0] + 'Z'
+
+ spdx = ["SPDXVersion: SPDX-2.0", "DataLicense:CC0-1.0",
+ "SPDXID: SPDXRef-DOCUMENT",
+ "Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-Package",
+ "DocumentName: " + upstream_name,
+ "DocumentNamespace: http://spdx.org/spdxdocs/" +
+ "spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301",
+ "LicenseListVersion: 2.0",
+ "Creator: Person: Debsources",
+ "Creator: Organization: Debsources",
+ "Creator: Tool: Debsources",
+ "Created: " + now,
+ "CreatorComment: This document was created by" +
+ "Debsources by parsing the respective debian/copyright" +
+ "file of the package provided by the Debian project. You" +
+ "may follow these links: http://debian.org/ " +
+ "http://sources.debian.net/ to get more information about " +
+ "Debian and Debsources. ",
+ "DocumentComment: This document was created using" +
+ "SPDX 2.0, version 2.3 of the SPDX License List.",
+ "PackageName: " + upstream_name,
+ "SPDXID: SPDXRef-Package",
+ "PackageDownloadLocation: NOASSERTION",
+ "PackageVerificationCode: " + create_package_code(session,
+ package,
+ version),
+ "PackageLicenseConcluded: NOASSERTION"]
+ for value in set(refs.values()):
+ spdx.append("PackageLicenseInfoFromFiles: " + value)
+
+ spdx.extend(["PackageLicenseDeclared: NOASSERTION",
+ "PackageCopyrightText: NOASSERTION"])
+ for files in get_files_spdx(refs, package, version, session, c):
+ for item in files:
+ spdx.append(str(item))
+ for u in unknown:
+ spdx.append(unknown[u])
+ return spdx
+
+
+def get_files_spdx(refs, package, version, session, c):
+ """ Get all files from the DB for a specific package and version and
+ then create a dictionnary for the SPDX entries
+
+ """
+
+ def replace_all(text, dic):
+ """ Replace all occurences of the keys in dic by the corresponding
+ value
+ """
+ for i, j in dic.iteritems():
+ text = text.replace(i, j)
+ return text
+
+ files = (session.query(Checksum.sha256.label("sha256"),
+ File.path.label("path"))
+ .filter(Checksum.package_id == Package.id)
+ .filter(Checksum.file_id == File.id)
+ .filter(Package.name_id == PackageName.id)
+ .filter(PackageName.name == package)
+ .filter(Package.version == version)
+ )
+
+ files_info = []
+
+ for i, f in enumerate(files.all()):
+ par = get_paragraph(c, f.path)
+ try:
+ if not match_license(par.license.synopsis):
+ license_concluded = replace_all(par.license.synopsis, refs)
+ else:
+ license_concluded = par.license.synopsis
+ except (AttributeError, ValueError):
+ license_concluded = "None"
+ # NOASSERTION means that the SPDX generator did not calculate that
+ # value.
+ sha = 'NOASSERTION' if not f.sha256 else f.sha256
+ try:
+ files_info.append(["FileName: " + f.path,
+ "SPDXID: SPDX-FILE-REF-" + str(i),
+ "FileChecksum: SHA256: " + sha,
+ "LicenseConcluded: " + license_concluded,
+ "LicenseInfoInFile: NOASSERTION",
+ "FileCopyrightText: " +
+ par.copyright.encode('utf-8') + ""])
+ except AttributeError:
+ raise MissingCopyrightField(package, version, par.files)
+ return files_info
From 29e077c550401aeb9f1f42c7b16d5e35d3037b61 Mon Sep 17 00:00:00 2001
From: Orestis Ioannou
Date: Mon, 18 Jan 2016 00:02:25 +0100
Subject: [PATCH 2/3] Copyright: expect value error in spdx export
---
.../templates/copyright/value_error.html | 20 +++++++++++++++++++
debsources/app/copyright/views.py | 20 +++++++++++--------
debsources/excepts.py | 8 ++++++++
debsources/license_helper.py | 2 +-
4 files changed, 41 insertions(+), 9 deletions(-)
create mode 100644 debsources/app/copyright/templates/copyright/value_error.html
diff --git a/debsources/app/copyright/templates/copyright/value_error.html b/debsources/app/copyright/templates/copyright/value_error.html
new file mode 100644
index 00000000..3af78595
--- /dev/null
+++ b/debsources/app/copyright/templates/copyright/value_error.html
@@ -0,0 +1,20 @@
+{#
+ Copyright (C) 2016 The Debsources developers .
+ See the AUTHORS file at the top-level directory of this distribution and at
+ https://anonscm.debian.org/gitweb/?p=qa/debsources.git;a=blob;f=AUTHORS;hb=HEAD
+ License: GNU Affero General Public License, version 3 or above.
+#}
+{% extends name+"/base.html" %}
+
+{% block title %}Error{% endblock %}
+{% block content %}
+{{ self.title() }}
+Parsing the debian/copyright file failed due to one or more of the following causes:
+
+ - continued line must begin with " "
+ - missing value in one of the required attributes (Files, Copyright, License)
+
+
+View raw copyright file
+
+{% endblock %}
diff --git a/debsources/app/copyright/views.py b/debsources/app/copyright/views.py
index 7798c391..d647672a 100644
--- a/debsources/app/copyright/views.py
+++ b/debsources/app/copyright/views.py
@@ -22,7 +22,8 @@
import debsources.statistics as statistics
from debsources.excepts import (Http404ErrorSuggestions, FileOrFolderNotFound,
InvalidPackageOrVersionError,
- Http404MissingCopyright, Http404Error)
+ Http404MissingCopyright, Http404Error,
+ CopyrightValueError)
from ..views import GeneralView, ChecksumView, session, app
from ..sourcecode import SourceCodeIterator
from ..pagination import Pagination
@@ -52,13 +53,16 @@ def get_objects(self, packagename, version):
code=sourcefile,
dump='True',
nlines=sourcefile.get_number_of_lines(),)
- return dict(package=packagename,
- version=version,
- dump='False',
- header=helper.get_copyright_header(c),
- files=helper.parse_copyright_paragraphs_for_html_render(
- c, "/src/" + packagename + "/" + version + "/"),
- licenses=helper.parse_licenses_for_html_render(c))
+ try:
+ return dict(package=packagename,
+ version=version,
+ dump='False',
+ header=helper.get_copyright_header(c),
+ files=helper.parse_copyright_paragraphs_html_render(
+ c, "/src/" + packagename + "/" + version + "/"),
+ licenses=helper.parse_licenses_for_html_render(c))
+ except ValueError as e:
+ raise CopyrightValueError(packagename, version, e.message)
class ChecksumLicenseView(ChecksumView):
diff --git a/debsources/excepts.py b/debsources/excepts.py
index 28c4fc31..315db668 100644
--- a/debsources/excepts.py
+++ b/debsources/excepts.py
@@ -54,3 +54,11 @@ def __init__(self, package, version, par):
self.version = version
self.par = par
super(MissingCopyrightField, self).__init__()
+
+
+class CopyrightValueError(Http404Error):
+ def __init__(self, package, version, error):
+ self.package = package
+ self.version = version
+ self.error = error
+ super(CopyrightValueError, self).__init__()
diff --git a/debsources/license_helper.py b/debsources/license_helper.py
index 4ccf6b75..c61e9369 100644
--- a/debsources/license_helper.py
+++ b/debsources/license_helper.py
@@ -149,7 +149,7 @@ def get_copyright_header(copyright):
return copyright.header._RestrictedWrapper__data
-def parse_copyright_paragraphs_for_html_render(copyright, base_url):
+def parse_copyright_paragraphs_html_render(copyright, base_url):
""" Returns list of File objects. If `base_url` is provided
then it creates links to base_url+glob
"""
From 27a3734f72a2bd4c6444758afa9e1ce5dae3e2fb Mon Sep 17 00:00:00 2001
From: Orestis Ioannou
Date: Mon, 18 Jan 2016 10:16:17 +0100
Subject: [PATCH 3/3] Tests: fix test due to better matching standard licenses
---
debsources/tests/test_web_cp.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/debsources/tests/test_web_cp.py b/debsources/tests/test_web_cp.py
index 13834c0b..88ca3145 100644
--- a/debsources/tests/test_web_cp.py
+++ b/debsources/tests/test_web_cp.py
@@ -319,8 +319,8 @@ def test_synopsis_parsing(self):
self.assertIn("FSF-configure", rv.data)
# Test separating by ',' and, or and create correct links
synopsis = "FSF-configure, and " \
- "GPL-2+"\
- " with Libtool exception or GPL-2+ with Libtool exception"\
+ " or GPL-3+"
self.assertIn(synopsis, rv.data)