Improve XLSX output for Vulnerabilities #1519

Signed-off-by: tdruez <[email protected]>
aboutcode-org · Jan 13, 2025 · 8719906 · 8719906
1 parent 07a0022
commit 8719906
Show file tree

Hide file tree

Showing 4 changed files with 108 additions and 23 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -38,6 +38,11 @@ v34.9.4 (unreleased)
 - Add a "TODOS" sheet containing on REQUIRES_REVIEW resources in XLSX.
   https://github.com/aboutcode-org/scancode.io/issues/1524
 
+- Improve XLSX output for Vulnerabilities.
+  Replace the ``affected_by_vulnerabilities`` field in the PACKAGES and DEPENDENCIES
+  sheets with a dedicated VULNERABILITIES sheet.
+  https://github.com/aboutcode-org/scancode.io/issues/1519
+
 v34.9.3 (2024-12-31)
 --------------------
 

diff --git a/scanpipe/models.py b/scanpipe/models.py
@@ -3071,9 +3071,22 @@ def vulnerable(self):
         return self.filter(~Q(affected_by_vulnerabilities__in=EMPTY_VALUES))
 
 
+class OnlyPackageURLFieldsQuerySetMixin:
+    def only_package_url_fields(self, extra=None):
+        """
+        Only select and return the UUID and PURL fields.
+        Minimum requirements to render a Package link in the UI.
+        """
+        if not extra:
+            extra = []
+
+        return self.only("pk", *PACKAGE_URL_FIELDS, *extra)
+
+
 class DiscoveredPackageQuerySet(
     VulnerabilityQuerySetMixin,
     PackageURLQuerySetMixin,
+    OnlyPackageURLFieldsQuerySetMixin,
     ComplianceAlertQuerySetMixin,
     ProjectRelatedQuerySet,
 ):
@@ -3086,13 +3099,6 @@ def with_resources_count(self):
         )
         return self.annotate(resources_count=count_subquery)
 
-    def only_package_url_fields(self):
-        """
-        Only select and return the UUID and PURL fields.
-        Minimum requirements to render a Package link in the UI.
-        """
-        return self.only("uuid", *PACKAGE_URL_FIELDS)
-
     def filter(self, *args, **kwargs):
         """Add support for using ``package_url`` as a field lookup."""
         if purl_str := kwargs.pop("package_url", None):
@@ -3671,7 +3677,10 @@ def as_cyclonedx(self):
 
 
 class DiscoveredDependencyQuerySet(
-    PackageURLQuerySetMixin, VulnerabilityQuerySetMixin, ProjectRelatedQuerySet
+    PackageURLQuerySetMixin,
+    OnlyPackageURLFieldsQuerySetMixin,
+    VulnerabilityQuerySetMixin,
+    ProjectRelatedQuerySet,
 ):
     def prefetch_for_serializer(self):
         """

diff --git a/scanpipe/pipes/output.py b/scanpipe/pipes/output.py
@@ -101,7 +101,7 @@ def get_queryset(project, model_name):
 
     queryset = querysets.get(model_name)
     if project:
-        queryset = queryset.filter(project=project)
+        queryset = queryset.project(project)
 
     return queryset
 
@@ -303,6 +303,14 @@ def to_json(project):
     "projectmessage": "MESSAGES",
 }
 
+model_name_to_object_type = {
+    "discoveredpackage": "package",
+    "discovereddependency": "dependency",
+    "codebaseresource": "resource",
+    "codebaserelation": "relation",
+    "projectmessage": "message",
+}
+
 
 def queryset_to_xlsx_worksheet(
     queryset,
@@ -333,15 +341,15 @@ def queryset_to_xlsx_worksheet(
     if prepend_fields:
         fields = prepend_fields + fields
 
-    return _add_xlsx_worksheet(
+    return add_xlsx_worksheet(
         workbook=workbook,
         worksheet_name=worksheet_name,
         rows=queryset,
         fields=fields,
     )
 
 
-def _add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
+def add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
     """
     Add a new ``worksheet_name`` worksheet to the ``workbook``
     ``xlsxwriter.Workbook``. Write the iterable of ``rows`` objects using their
@@ -478,6 +486,7 @@ def to_xlsx(project):
         "license_detections",
         "other_license_detections",
         "license_clues",
+        "affected_by_vulnerabilities",
     ]
 
     if not project.policies_enabled:
@@ -497,17 +506,79 @@ def to_xlsx(project):
             queryset_to_xlsx_worksheet(queryset, workbook, exclude_fields)
 
         if layers_data := docker.get_layers_data(project):
-            _add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields)
+            add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields)
 
-        todos_queryset = get_queryset(project, "todos")
-        if todos_queryset:
-            queryset_to_xlsx_worksheet(
-                todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"
-            )
+        add_vulnerabilities_sheet(workbook, project)
+        add_todos_sheet(workbook, project, exclude_fields)
 
     return output_file
 
 
+def add_vulnerabilities_sheet(workbook, project):
+    vulnerable_packages_queryset = (
+        DiscoveredPackage.objects.project(project)
+        .vulnerable()
+        .only_package_url_fields(extra=["affected_by_vulnerabilities"])
+        .order_by_package_url()
+    )
+    vulnerable_dependencies_queryset = (
+        DiscoveredDependency.objects.project(project)
+        .vulnerable()
+        .only_package_url_fields(extra=["affected_by_vulnerabilities"])
+        .order_by_package_url()
+    )
+    vulnerable_querysets = [
+        vulnerable_packages_queryset,
+        vulnerable_dependencies_queryset,
+    ]
+
+    vulnerability_fields = [
+        "vulnerability_id",
+        "aliases",
+        "summary",
+        "risk_score",
+        "exploitability",
+        "weighted_severity",
+        "resource_url",
+    ]
+    sheet_fields = ["object_type", "package_url"] + vulnerability_fields
+
+    rows = []
+    for queryset in vulnerable_querysets:
+        model_name = queryset.model._meta.model_name
+        object_type = model_name_to_object_type.get(model_name)
+
+        for package in queryset:
+            package_url = package.package_url
+
+            for vulnerability_data in package.affected_by_vulnerabilities:
+                row = {
+                    "object_type": object_type,
+                    "package_url": package_url,
+                    **{
+                        field_name: vulnerability_data.get(field_name, "")
+                        for field_name in vulnerability_fields
+                    },
+                }
+                rows.append(row)
+
+    if rows:
+        add_xlsx_worksheet(
+            workbook=workbook,
+            worksheet_name="VULNERABILITIES",
+            rows=rows,
+            fields=sheet_fields,
+        )
+
+
+def add_todos_sheet(workbook, project, exclude_fields):
+    todos_queryset = get_queryset(project, "todos")
+    if todos_queryset:
+        queryset_to_xlsx_worksheet(
+            todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"
+        )
+
+
 def _get_spdx_extracted_licenses(license_expressions):
     """
     Generate and return the SPDX `extracted_licenses` from provided

diff --git a/scanpipe/tests/pipes/test_output.py b/scanpipe/tests/pipes/test_output.py
@@ -217,13 +217,13 @@ def test_scanpipe_pipes_outputs_to_xlsx(self):
             project=project, path="path/file1.ext", status=flag.REQUIRES_REVIEW
         )
 
-        output_file = output.to_xlsx(project=project)
+        with self.assertNumQueries(12):
+            output_file = output.to_xlsx(project=project)
         self.assertIn(output_file.name, project.output_root)
 
         # Make sure the output can be generated even if the work_directory was wiped
         shutil.rmtree(project.work_directory)
-        with self.assertNumQueries(10):
-            output_file = output.to_xlsx(project=project)
+        output_file = output.to_xlsx(project=project)
         self.assertIn(output_file.name, project.output_root)
 
         workbook = openpyxl.load_workbook(output_file, read_only=True, data_only=True)
@@ -499,7 +499,7 @@ def test_scanpipe_pipes_outputs_to_attribution(self):
 
 
 class ScanPipeXLSXOutputPipesTest(TestCase):
-    def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
+    def test_add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
         # This test verifies that we do not truncate long text silently
 
         test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test"))
@@ -532,7 +532,7 @@ def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
             if r != x:
                 self.assertEqual(r[-50:], x)
 
-    def test__add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self):
+    def test_add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self):
         # This test verifies that we do not truncate long text silently
 
         test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test"))
@@ -736,7 +736,7 @@ def __init__(self, foo):
 
     output_file = test_dir / workbook_name
     with xlsxwriter.Workbook(str(output_file)) as workbook:
-        output._add_xlsx_worksheet(
+        output.add_xlsx_worksheet(
             workbook=workbook,
             worksheet_name="packages",
             rows=rows,