diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index e7073a480..043ca3723 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -38,6 +38,11 @@ v34.9.4 (unreleased)
- Add a "TODOS" sheet containing on REQUIRES_REVIEW resources in XLSX.
https://github.com/aboutcode-org/scancode.io/issues/1524
+- Improve XLSX output for Vulnerabilities.
+ Replace the ``affected_by_vulnerabilities`` field in the PACKAGES and DEPENDENCIES
+ sheets with a dedicated VULNERABILITIES sheet.
+ https://github.com/aboutcode-org/scancode.io/issues/1519
+
v34.9.3 (2024-12-31)
--------------------
diff --git a/scanpipe/models.py b/scanpipe/models.py
index e445a4b36..e419a79e2 100644
--- a/scanpipe/models.py
+++ b/scanpipe/models.py
@@ -3086,12 +3086,15 @@ def with_resources_count(self):
)
return self.annotate(resources_count=count_subquery)
- def only_package_url_fields(self):
+ def only_package_url_fields(self, extra=None):
"""
Only select and return the UUID and PURL fields.
Minimum requirements to render a Package link in the UI.
"""
- return self.only("uuid", *PACKAGE_URL_FIELDS)
+ if not extra:
+ extra = []
+
+ return self.only("uuid", *PACKAGE_URL_FIELDS, *extra)
def filter(self, *args, **kwargs):
"""Add support for using ``package_url`` as a field lookup."""
@@ -3671,7 +3674,9 @@ def as_cyclonedx(self):
class DiscoveredDependencyQuerySet(
- PackageURLQuerySetMixin, VulnerabilityQuerySetMixin, ProjectRelatedQuerySet
+ PackageURLQuerySetMixin,
+ VulnerabilityQuerySetMixin,
+ ProjectRelatedQuerySet,
):
def prefetch_for_serializer(self):
"""
@@ -3692,6 +3697,16 @@ def prefetch_for_serializer(self):
),
)
+ def only_package_url_fields(self, extra=None):
+ """
+ Only select and return the UUID and PURL fields.
+ Minimum requirements to render a Package link in the UI.
+ """
+ if not extra:
+ extra = []
+
+ return self.only("dependency_uid", *PACKAGE_URL_FIELDS, *extra)
+
class DiscoveredDependency(
ProjectRelatedModel,
diff --git a/scanpipe/pipes/output.py b/scanpipe/pipes/output.py
index e7b5ad408..786fad734 100644
--- a/scanpipe/pipes/output.py
+++ b/scanpipe/pipes/output.py
@@ -101,7 +101,7 @@ def get_queryset(project, model_name):
queryset = querysets.get(model_name)
if project:
- queryset = queryset.filter(project=project)
+ queryset = queryset.project(project)
return queryset
@@ -303,6 +303,14 @@ def to_json(project):
"projectmessage": "MESSAGES",
}
+model_name_to_object_type = {
+ "discoveredpackage": "package",
+ "discovereddependency": "dependency",
+ "codebaseresource": "resource",
+ "codebaserelation": "relation",
+ "projectmessage": "message",
+}
+
def queryset_to_xlsx_worksheet(
queryset,
@@ -333,7 +341,7 @@ def queryset_to_xlsx_worksheet(
if prepend_fields:
fields = prepend_fields + fields
- return _add_xlsx_worksheet(
+ return add_xlsx_worksheet(
workbook=workbook,
worksheet_name=worksheet_name,
rows=queryset,
@@ -341,7 +349,7 @@ def queryset_to_xlsx_worksheet(
)
-def _add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
+def add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
"""
Add a new ``worksheet_name`` worksheet to the ``workbook``
``xlsxwriter.Workbook``. Write the iterable of ``rows`` objects using their
@@ -478,6 +486,7 @@ def to_xlsx(project):
"license_detections",
"other_license_detections",
"license_clues",
+ "affected_by_vulnerabilities",
]
if not project.policies_enabled:
@@ -497,17 +506,79 @@ def to_xlsx(project):
queryset_to_xlsx_worksheet(queryset, workbook, exclude_fields)
if layers_data := docker.get_layers_data(project):
- _add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields)
+ add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields)
- todos_queryset = get_queryset(project, "todos")
- if todos_queryset:
- queryset_to_xlsx_worksheet(
- todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"
- )
+ add_vulnerabilities_sheet(workbook, project)
+ add_todos_sheet(workbook, project, exclude_fields)
return output_file
+def add_vulnerabilities_sheet(workbook, project):
+ vulnerable_packages_queryset = (
+ DiscoveredPackage.objects.project(project)
+ .vulnerable()
+ .only_package_url_fields(extra=["affected_by_vulnerabilities"])
+ .order_by_package_url()
+ )
+ vulnerable_dependencies_queryset = (
+ DiscoveredDependency.objects.project(project)
+ .vulnerable()
+ .only_package_url_fields(extra=["affected_by_vulnerabilities"])
+ .order_by_package_url()
+ )
+ vulnerable_querysets = [
+ vulnerable_packages_queryset,
+ vulnerable_dependencies_queryset,
+ ]
+
+ vulnerability_fields = [
+ "vulnerability_id",
+ "aliases",
+ "summary",
+ "risk_score",
+ "exploitability",
+ "weighted_severity",
+ "resource_url",
+ ]
+ sheet_fields = ["object_type", "package_url"] + vulnerability_fields
+
+ rows = []
+ for queryset in vulnerable_querysets:
+ model_name = queryset.model._meta.model_name
+ object_type = model_name_to_object_type.get(model_name)
+
+ for package in queryset:
+ package_url = package.package_url
+
+ for vulnerability_data in package.affected_by_vulnerabilities:
+ row = {
+ "object_type": object_type,
+ "package_url": package_url,
+ **{
+ field_name: vulnerability_data.get(field_name, "")
+ for field_name in vulnerability_fields
+ },
+ }
+ rows.append(row)
+
+ if rows:
+ add_xlsx_worksheet(
+ workbook=workbook,
+ worksheet_name="VULNERABILITIES",
+ rows=rows,
+ fields=sheet_fields,
+ )
+
+
+def add_todos_sheet(workbook, project, exclude_fields):
+ todos_queryset = get_queryset(project, "todos")
+ if todos_queryset:
+ queryset_to_xlsx_worksheet(
+ todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"
+ )
+
+
def _get_spdx_extracted_licenses(license_expressions):
"""
Generate and return the SPDX `extracted_licenses` from provided
diff --git a/scanpipe/templates/scanpipe/includes/pagination_header_relations.html b/scanpipe/templates/scanpipe/includes/pagination_header_relations.html
index c2ab655f2..a63fc5ddd 100644
--- a/scanpipe/templates/scanpipe/includes/pagination_header_relations.html
+++ b/scanpipe/templates/scanpipe/includes/pagination_header_relations.html
@@ -5,13 +5,13 @@
{{ paginator.count|intcomma }} relations
-
+
Un-mapped to/ resources
-
+
Un-mapped from/ resources
diff --git a/scanpipe/tests/pipes/test_output.py b/scanpipe/tests/pipes/test_output.py
index f244f769b..88d20982b 100644
--- a/scanpipe/tests/pipes/test_output.py
+++ b/scanpipe/tests/pipes/test_output.py
@@ -217,13 +217,13 @@ def test_scanpipe_pipes_outputs_to_xlsx(self):
project=project, path="path/file1.ext", status=flag.REQUIRES_REVIEW
)
- output_file = output.to_xlsx(project=project)
+ with self.assertNumQueries(12):
+ output_file = output.to_xlsx(project=project)
self.assertIn(output_file.name, project.output_root)
# Make sure the output can be generated even if the work_directory was wiped
shutil.rmtree(project.work_directory)
- with self.assertNumQueries(10):
- output_file = output.to_xlsx(project=project)
+ output_file = output.to_xlsx(project=project)
self.assertIn(output_file.name, project.output_root)
workbook = openpyxl.load_workbook(output_file, read_only=True, data_only=True)
@@ -499,7 +499,7 @@ def test_scanpipe_pipes_outputs_to_attribution(self):
class ScanPipeXLSXOutputPipesTest(TestCase):
- def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
+ def test_add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
# This test verifies that we do not truncate long text silently
test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test"))
@@ -532,7 +532,7 @@ def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
if r != x:
self.assertEqual(r[-50:], x)
- def test__add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self):
+ def test_add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self):
# This test verifies that we do not truncate long text silently
test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test"))
@@ -736,7 +736,7 @@ def __init__(self, foo):
output_file = test_dir / workbook_name
with xlsxwriter.Workbook(str(output_file)) as workbook:
- output._add_xlsx_worksheet(
+ output.add_xlsx_worksheet(
workbook=workbook,
worksheet_name="packages",
rows=rows,