Skip to content

Commit

Permalink
Improve XLSX output for Vulnerabilities #1519
Browse files Browse the repository at this point in the history
Signed-off-by: tdruez <[email protected]>
  • Loading branch information
tdruez committed Jan 13, 2025
1 parent 07a0022 commit 8719906
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 23 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ v34.9.4 (unreleased)
- Add a "TODOS" sheet containing on REQUIRES_REVIEW resources in XLSX.
https://github.com/aboutcode-org/scancode.io/issues/1524

- Improve XLSX output for Vulnerabilities.
Replace the ``affected_by_vulnerabilities`` field in the PACKAGES and DEPENDENCIES
sheets with a dedicated VULNERABILITIES sheet.
https://github.com/aboutcode-org/scancode.io/issues/1519

v34.9.3 (2024-12-31)
--------------------

Expand Down
25 changes: 17 additions & 8 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3071,9 +3071,22 @@ def vulnerable(self):
return self.filter(~Q(affected_by_vulnerabilities__in=EMPTY_VALUES))


class OnlyPackageURLFieldsQuerySetMixin:
def only_package_url_fields(self, extra=None):
"""
Only select and return the UUID and PURL fields.
Minimum requirements to render a Package link in the UI.
"""
if not extra:
extra = []

return self.only("pk", *PACKAGE_URL_FIELDS, *extra)


class DiscoveredPackageQuerySet(
VulnerabilityQuerySetMixin,
PackageURLQuerySetMixin,
OnlyPackageURLFieldsQuerySetMixin,
ComplianceAlertQuerySetMixin,
ProjectRelatedQuerySet,
):
Expand All @@ -3086,13 +3099,6 @@ def with_resources_count(self):
)
return self.annotate(resources_count=count_subquery)

def only_package_url_fields(self):
"""
Only select and return the UUID and PURL fields.
Minimum requirements to render a Package link in the UI.
"""
return self.only("uuid", *PACKAGE_URL_FIELDS)

def filter(self, *args, **kwargs):
"""Add support for using ``package_url`` as a field lookup."""
if purl_str := kwargs.pop("package_url", None):
Expand Down Expand Up @@ -3671,7 +3677,10 @@ def as_cyclonedx(self):


class DiscoveredDependencyQuerySet(
PackageURLQuerySetMixin, VulnerabilityQuerySetMixin, ProjectRelatedQuerySet
PackageURLQuerySetMixin,
OnlyPackageURLFieldsQuerySetMixin,
VulnerabilityQuerySetMixin,
ProjectRelatedQuerySet,
):
def prefetch_for_serializer(self):
"""
Expand Down
89 changes: 80 additions & 9 deletions scanpipe/pipes/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def get_queryset(project, model_name):

queryset = querysets.get(model_name)
if project:
queryset = queryset.filter(project=project)
queryset = queryset.project(project)

return queryset

Expand Down Expand Up @@ -303,6 +303,14 @@ def to_json(project):
"projectmessage": "MESSAGES",
}

model_name_to_object_type = {
"discoveredpackage": "package",
"discovereddependency": "dependency",
"codebaseresource": "resource",
"codebaserelation": "relation",
"projectmessage": "message",
}


def queryset_to_xlsx_worksheet(
queryset,
Expand Down Expand Up @@ -333,15 +341,15 @@ def queryset_to_xlsx_worksheet(
if prepend_fields:
fields = prepend_fields + fields

return _add_xlsx_worksheet(
return add_xlsx_worksheet(
workbook=workbook,
worksheet_name=worksheet_name,
rows=queryset,
fields=fields,
)


def _add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
def add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
"""
Add a new ``worksheet_name`` worksheet to the ``workbook``
``xlsxwriter.Workbook``. Write the iterable of ``rows`` objects using their
Expand Down Expand Up @@ -478,6 +486,7 @@ def to_xlsx(project):
"license_detections",
"other_license_detections",
"license_clues",
"affected_by_vulnerabilities",
]

if not project.policies_enabled:
Expand All @@ -497,17 +506,79 @@ def to_xlsx(project):
queryset_to_xlsx_worksheet(queryset, workbook, exclude_fields)

if layers_data := docker.get_layers_data(project):
_add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields)
add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields)

todos_queryset = get_queryset(project, "todos")
if todos_queryset:
queryset_to_xlsx_worksheet(
todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"
)
add_vulnerabilities_sheet(workbook, project)
add_todos_sheet(workbook, project, exclude_fields)

return output_file


def add_vulnerabilities_sheet(workbook, project):
vulnerable_packages_queryset = (
DiscoveredPackage.objects.project(project)
.vulnerable()
.only_package_url_fields(extra=["affected_by_vulnerabilities"])
.order_by_package_url()
)
vulnerable_dependencies_queryset = (
DiscoveredDependency.objects.project(project)
.vulnerable()
.only_package_url_fields(extra=["affected_by_vulnerabilities"])
.order_by_package_url()
)
vulnerable_querysets = [
vulnerable_packages_queryset,
vulnerable_dependencies_queryset,
]

vulnerability_fields = [
"vulnerability_id",
"aliases",
"summary",
"risk_score",
"exploitability",
"weighted_severity",
"resource_url",
]
sheet_fields = ["object_type", "package_url"] + vulnerability_fields

rows = []
for queryset in vulnerable_querysets:
model_name = queryset.model._meta.model_name
object_type = model_name_to_object_type.get(model_name)

for package in queryset:
package_url = package.package_url

for vulnerability_data in package.affected_by_vulnerabilities:
row = {
"object_type": object_type,
"package_url": package_url,
**{
field_name: vulnerability_data.get(field_name, "")
for field_name in vulnerability_fields
},
}
rows.append(row)

if rows:
add_xlsx_worksheet(
workbook=workbook,
worksheet_name="VULNERABILITIES",
rows=rows,
fields=sheet_fields,
)


def add_todos_sheet(workbook, project, exclude_fields):
todos_queryset = get_queryset(project, "todos")
if todos_queryset:
queryset_to_xlsx_worksheet(
todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"
)


def _get_spdx_extracted_licenses(license_expressions):
"""
Generate and return the SPDX `extracted_licenses` from provided
Expand Down
12 changes: 6 additions & 6 deletions scanpipe/tests/pipes/test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,13 @@ def test_scanpipe_pipes_outputs_to_xlsx(self):
project=project, path="path/file1.ext", status=flag.REQUIRES_REVIEW
)

output_file = output.to_xlsx(project=project)
with self.assertNumQueries(12):
output_file = output.to_xlsx(project=project)
self.assertIn(output_file.name, project.output_root)

# Make sure the output can be generated even if the work_directory was wiped
shutil.rmtree(project.work_directory)
with self.assertNumQueries(10):
output_file = output.to_xlsx(project=project)
output_file = output.to_xlsx(project=project)
self.assertIn(output_file.name, project.output_root)

workbook = openpyxl.load_workbook(output_file, read_only=True, data_only=True)
Expand Down Expand Up @@ -499,7 +499,7 @@ def test_scanpipe_pipes_outputs_to_attribution(self):


class ScanPipeXLSXOutputPipesTest(TestCase):
def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
def test_add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
# This test verifies that we do not truncate long text silently

test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test"))
Expand Down Expand Up @@ -532,7 +532,7 @@ def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
if r != x:
self.assertEqual(r[-50:], x)

def test__add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self):
def test_add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self):
# This test verifies that we do not truncate long text silently

test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test"))
Expand Down Expand Up @@ -736,7 +736,7 @@ def __init__(self, foo):

output_file = test_dir / workbook_name
with xlsxwriter.Workbook(str(output_file)) as workbook:
output._add_xlsx_worksheet(
output.add_xlsx_worksheet(
workbook=workbook,
worksheet_name="packages",
rows=rows,
Expand Down

0 comments on commit 8719906

Please sign in to comment.