From 8719906907af2ddfb62094f113e623b0cafe363e Mon Sep 17 00:00:00 2001 From: tdruez Date: Mon, 13 Jan 2025 15:23:17 +0100 Subject: [PATCH 1/3] Improve XLSX output for Vulnerabilities #1519 Signed-off-by: tdruez --- CHANGELOG.rst | 5 ++ scanpipe/models.py | 25 +++++--- scanpipe/pipes/output.py | 89 ++++++++++++++++++++++++++--- scanpipe/tests/pipes/test_output.py | 12 ++-- 4 files changed, 108 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e7073a480..043ca3723 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -38,6 +38,11 @@ v34.9.4 (unreleased) - Add a "TODOS" sheet containing on REQUIRES_REVIEW resources in XLSX. https://github.com/aboutcode-org/scancode.io/issues/1524 +- Improve XLSX output for Vulnerabilities. + Replace the ``affected_by_vulnerabilities`` field in the PACKAGES and DEPENDENCIES + sheets with a dedicated VULNERABILITIES sheet. + https://github.com/aboutcode-org/scancode.io/issues/1519 + v34.9.3 (2024-12-31) -------------------- diff --git a/scanpipe/models.py b/scanpipe/models.py index e445a4b36..dc3bd33b6 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -3071,9 +3071,22 @@ def vulnerable(self): return self.filter(~Q(affected_by_vulnerabilities__in=EMPTY_VALUES)) +class OnlyPackageURLFieldsQuerySetMixin: + def only_package_url_fields(self, extra=None): + """ + Only select and return the UUID and PURL fields. + Minimum requirements to render a Package link in the UI. + """ + if not extra: + extra = [] + + return self.only("pk", *PACKAGE_URL_FIELDS, *extra) + + class DiscoveredPackageQuerySet( VulnerabilityQuerySetMixin, PackageURLQuerySetMixin, + OnlyPackageURLFieldsQuerySetMixin, ComplianceAlertQuerySetMixin, ProjectRelatedQuerySet, ): @@ -3086,13 +3099,6 @@ def with_resources_count(self): ) return self.annotate(resources_count=count_subquery) - def only_package_url_fields(self): - """ - Only select and return the UUID and PURL fields. - Minimum requirements to render a Package link in the UI. - """ - return self.only("uuid", *PACKAGE_URL_FIELDS) - def filter(self, *args, **kwargs): """Add support for using ``package_url`` as a field lookup.""" if purl_str := kwargs.pop("package_url", None): @@ -3671,7 +3677,10 @@ def as_cyclonedx(self): class DiscoveredDependencyQuerySet( - PackageURLQuerySetMixin, VulnerabilityQuerySetMixin, ProjectRelatedQuerySet + PackageURLQuerySetMixin, + OnlyPackageURLFieldsQuerySetMixin, + VulnerabilityQuerySetMixin, + ProjectRelatedQuerySet, ): def prefetch_for_serializer(self): """ diff --git a/scanpipe/pipes/output.py b/scanpipe/pipes/output.py index e7b5ad408..786fad734 100644 --- a/scanpipe/pipes/output.py +++ b/scanpipe/pipes/output.py @@ -101,7 +101,7 @@ def get_queryset(project, model_name): queryset = querysets.get(model_name) if project: - queryset = queryset.filter(project=project) + queryset = queryset.project(project) return queryset @@ -303,6 +303,14 @@ def to_json(project): "projectmessage": "MESSAGES", } +model_name_to_object_type = { + "discoveredpackage": "package", + "discovereddependency": "dependency", + "codebaseresource": "resource", + "codebaserelation": "relation", + "projectmessage": "message", +} + def queryset_to_xlsx_worksheet( queryset, @@ -333,7 +341,7 @@ def queryset_to_xlsx_worksheet( if prepend_fields: fields = prepend_fields + fields - return _add_xlsx_worksheet( + return add_xlsx_worksheet( workbook=workbook, worksheet_name=worksheet_name, rows=queryset, @@ -341,7 +349,7 @@ def queryset_to_xlsx_worksheet( ) -def _add_xlsx_worksheet(workbook, worksheet_name, rows, fields): +def add_xlsx_worksheet(workbook, worksheet_name, rows, fields): """ Add a new ``worksheet_name`` worksheet to the ``workbook`` ``xlsxwriter.Workbook``. Write the iterable of ``rows`` objects using their @@ -478,6 +486,7 @@ def to_xlsx(project): "license_detections", "other_license_detections", "license_clues", + "affected_by_vulnerabilities", ] if not project.policies_enabled: @@ -497,17 +506,79 @@ def to_xlsx(project): queryset_to_xlsx_worksheet(queryset, workbook, exclude_fields) if layers_data := docker.get_layers_data(project): - _add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields) + add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields) - todos_queryset = get_queryset(project, "todos") - if todos_queryset: - queryset_to_xlsx_worksheet( - todos_queryset, workbook, exclude_fields, worksheet_name="TODOS" - ) + add_vulnerabilities_sheet(workbook, project) + add_todos_sheet(workbook, project, exclude_fields) return output_file +def add_vulnerabilities_sheet(workbook, project): + vulnerable_packages_queryset = ( + DiscoveredPackage.objects.project(project) + .vulnerable() + .only_package_url_fields(extra=["affected_by_vulnerabilities"]) + .order_by_package_url() + ) + vulnerable_dependencies_queryset = ( + DiscoveredDependency.objects.project(project) + .vulnerable() + .only_package_url_fields(extra=["affected_by_vulnerabilities"]) + .order_by_package_url() + ) + vulnerable_querysets = [ + vulnerable_packages_queryset, + vulnerable_dependencies_queryset, + ] + + vulnerability_fields = [ + "vulnerability_id", + "aliases", + "summary", + "risk_score", + "exploitability", + "weighted_severity", + "resource_url", + ] + sheet_fields = ["object_type", "package_url"] + vulnerability_fields + + rows = [] + for queryset in vulnerable_querysets: + model_name = queryset.model._meta.model_name + object_type = model_name_to_object_type.get(model_name) + + for package in queryset: + package_url = package.package_url + + for vulnerability_data in package.affected_by_vulnerabilities: + row = { + "object_type": object_type, + "package_url": package_url, + **{ + field_name: vulnerability_data.get(field_name, "") + for field_name in vulnerability_fields + }, + } + rows.append(row) + + if rows: + add_xlsx_worksheet( + workbook=workbook, + worksheet_name="VULNERABILITIES", + rows=rows, + fields=sheet_fields, + ) + + +def add_todos_sheet(workbook, project, exclude_fields): + todos_queryset = get_queryset(project, "todos") + if todos_queryset: + queryset_to_xlsx_worksheet( + todos_queryset, workbook, exclude_fields, worksheet_name="TODOS" + ) + + def _get_spdx_extracted_licenses(license_expressions): """ Generate and return the SPDX `extracted_licenses` from provided diff --git a/scanpipe/tests/pipes/test_output.py b/scanpipe/tests/pipes/test_output.py index f244f769b..88d20982b 100644 --- a/scanpipe/tests/pipes/test_output.py +++ b/scanpipe/tests/pipes/test_output.py @@ -217,13 +217,13 @@ def test_scanpipe_pipes_outputs_to_xlsx(self): project=project, path="path/file1.ext", status=flag.REQUIRES_REVIEW ) - output_file = output.to_xlsx(project=project) + with self.assertNumQueries(12): + output_file = output.to_xlsx(project=project) self.assertIn(output_file.name, project.output_root) # Make sure the output can be generated even if the work_directory was wiped shutil.rmtree(project.work_directory) - with self.assertNumQueries(10): - output_file = output.to_xlsx(project=project) + output_file = output.to_xlsx(project=project) self.assertIn(output_file.name, project.output_root) workbook = openpyxl.load_workbook(output_file, read_only=True, data_only=True) @@ -499,7 +499,7 @@ def test_scanpipe_pipes_outputs_to_attribution(self): class ScanPipeXLSXOutputPipesTest(TestCase): - def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self): + def test_add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self): # This test verifies that we do not truncate long text silently test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test")) @@ -532,7 +532,7 @@ def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self): if r != x: self.assertEqual(r[-50:], x) - def test__add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self): + def test_add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self): # This test verifies that we do not truncate long text silently test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test")) @@ -736,7 +736,7 @@ def __init__(self, foo): output_file = test_dir / workbook_name with xlsxwriter.Workbook(str(output_file)) as workbook: - output._add_xlsx_worksheet( + output.add_xlsx_worksheet( workbook=workbook, worksheet_name="packages", rows=rows, From 10137666df1002207d262e02b7cf1cfde9489dab Mon Sep 17 00:00:00 2001 From: tdruez Date: Mon, 13 Jan 2025 15:25:54 +0100 Subject: [PATCH 2/3] Fix failing test #1519 Signed-off-by: tdruez --- scanpipe/models.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/scanpipe/models.py b/scanpipe/models.py index dc3bd33b6..e419a79e2 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -3071,22 +3071,9 @@ def vulnerable(self): return self.filter(~Q(affected_by_vulnerabilities__in=EMPTY_VALUES)) -class OnlyPackageURLFieldsQuerySetMixin: - def only_package_url_fields(self, extra=None): - """ - Only select and return the UUID and PURL fields. - Minimum requirements to render a Package link in the UI. - """ - if not extra: - extra = [] - - return self.only("pk", *PACKAGE_URL_FIELDS, *extra) - - class DiscoveredPackageQuerySet( VulnerabilityQuerySetMixin, PackageURLQuerySetMixin, - OnlyPackageURLFieldsQuerySetMixin, ComplianceAlertQuerySetMixin, ProjectRelatedQuerySet, ): @@ -3099,6 +3086,16 @@ def with_resources_count(self): ) return self.annotate(resources_count=count_subquery) + def only_package_url_fields(self, extra=None): + """ + Only select and return the UUID and PURL fields. + Minimum requirements to render a Package link in the UI. + """ + if not extra: + extra = [] + + return self.only("uuid", *PACKAGE_URL_FIELDS, *extra) + def filter(self, *args, **kwargs): """Add support for using ``package_url`` as a field lookup.""" if purl_str := kwargs.pop("package_url", None): @@ -3678,7 +3675,6 @@ def as_cyclonedx(self): class DiscoveredDependencyQuerySet( PackageURLQuerySetMixin, - OnlyPackageURLFieldsQuerySetMixin, VulnerabilityQuerySetMixin, ProjectRelatedQuerySet, ): @@ -3701,6 +3697,16 @@ def prefetch_for_serializer(self): ), ) + def only_package_url_fields(self, extra=None): + """ + Only select and return the UUID and PURL fields. + Minimum requirements to render a Package link in the UI. + """ + if not extra: + extra = [] + + return self.only("dependency_uid", *PACKAGE_URL_FIELDS, *extra) + class DiscoveredDependency( ProjectRelatedModel, From 74a110776449b634a89da6601e32e34d3899874f Mon Sep 17 00:00:00 2001 From: tdruez Date: Mon, 13 Jan 2025 15:26:24 +0100 Subject: [PATCH 3/3] Fix the "Un-mapped to/from resources" links #1519 Signed-off-by: tdruez --- .../scanpipe/includes/pagination_header_relations.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scanpipe/templates/scanpipe/includes/pagination_header_relations.html b/scanpipe/templates/scanpipe/includes/pagination_header_relations.html index c2ab655f2..a63fc5ddd 100644 --- a/scanpipe/templates/scanpipe/includes/pagination_header_relations.html +++ b/scanpipe/templates/scanpipe/includes/pagination_header_relations.html @@ -5,13 +5,13 @@ {{ paginator.count|intcomma }} relations - + Un-mapped to/ resources - + Un-mapped from/ resources