Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve XLSX output for Vulnerabilities #1519 #1531

Merged
merged 3 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ v34.9.4 (unreleased)
- Add a "TODOS" sheet containing on REQUIRES_REVIEW resources in XLSX.
https://github.com/aboutcode-org/scancode.io/issues/1524

- Improve XLSX output for Vulnerabilities.
Replace the ``affected_by_vulnerabilities`` field in the PACKAGES and DEPENDENCIES
sheets with a dedicated VULNERABILITIES sheet.
https://github.com/aboutcode-org/scancode.io/issues/1519

v34.9.3 (2024-12-31)
--------------------

Expand Down
21 changes: 18 additions & 3 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3086,12 +3086,15 @@ def with_resources_count(self):
)
return self.annotate(resources_count=count_subquery)

def only_package_url_fields(self):
def only_package_url_fields(self, extra=None):
"""
Only select and return the UUID and PURL fields.
Minimum requirements to render a Package link in the UI.
"""
return self.only("uuid", *PACKAGE_URL_FIELDS)
if not extra:
extra = []

return self.only("uuid", *PACKAGE_URL_FIELDS, *extra)

def filter(self, *args, **kwargs):
"""Add support for using ``package_url`` as a field lookup."""
Expand Down Expand Up @@ -3671,7 +3674,9 @@ def as_cyclonedx(self):


class DiscoveredDependencyQuerySet(
PackageURLQuerySetMixin, VulnerabilityQuerySetMixin, ProjectRelatedQuerySet
PackageURLQuerySetMixin,
VulnerabilityQuerySetMixin,
ProjectRelatedQuerySet,
):
def prefetch_for_serializer(self):
"""
Expand All @@ -3692,6 +3697,16 @@ def prefetch_for_serializer(self):
),
)

def only_package_url_fields(self, extra=None):
"""
Only select and return the UUID and PURL fields.
Minimum requirements to render a Package link in the UI.
"""
if not extra:
extra = []

return self.only("dependency_uid", *PACKAGE_URL_FIELDS, *extra)


class DiscoveredDependency(
ProjectRelatedModel,
Expand Down
89 changes: 80 additions & 9 deletions scanpipe/pipes/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def get_queryset(project, model_name):

queryset = querysets.get(model_name)
if project:
queryset = queryset.filter(project=project)
queryset = queryset.project(project)

return queryset

Expand Down Expand Up @@ -303,6 +303,14 @@ def to_json(project):
"projectmessage": "MESSAGES",
}

model_name_to_object_type = {
"discoveredpackage": "package",
"discovereddependency": "dependency",
"codebaseresource": "resource",
"codebaserelation": "relation",
"projectmessage": "message",
}


def queryset_to_xlsx_worksheet(
queryset,
Expand Down Expand Up @@ -333,15 +341,15 @@ def queryset_to_xlsx_worksheet(
if prepend_fields:
fields = prepend_fields + fields

return _add_xlsx_worksheet(
return add_xlsx_worksheet(
workbook=workbook,
worksheet_name=worksheet_name,
rows=queryset,
fields=fields,
)


def _add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
def add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
"""
Add a new ``worksheet_name`` worksheet to the ``workbook``
``xlsxwriter.Workbook``. Write the iterable of ``rows`` objects using their
Expand Down Expand Up @@ -478,6 +486,7 @@ def to_xlsx(project):
"license_detections",
"other_license_detections",
"license_clues",
"affected_by_vulnerabilities",
]

if not project.policies_enabled:
Expand All @@ -497,17 +506,79 @@ def to_xlsx(project):
queryset_to_xlsx_worksheet(queryset, workbook, exclude_fields)

if layers_data := docker.get_layers_data(project):
_add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields)
add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields)

todos_queryset = get_queryset(project, "todos")
if todos_queryset:
queryset_to_xlsx_worksheet(
todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"
)
add_vulnerabilities_sheet(workbook, project)
add_todos_sheet(workbook, project, exclude_fields)

return output_file


def add_vulnerabilities_sheet(workbook, project):
vulnerable_packages_queryset = (
DiscoveredPackage.objects.project(project)
.vulnerable()
.only_package_url_fields(extra=["affected_by_vulnerabilities"])
.order_by_package_url()
)
vulnerable_dependencies_queryset = (
DiscoveredDependency.objects.project(project)
.vulnerable()
.only_package_url_fields(extra=["affected_by_vulnerabilities"])
.order_by_package_url()
)
vulnerable_querysets = [
vulnerable_packages_queryset,
vulnerable_dependencies_queryset,
]

vulnerability_fields = [
"vulnerability_id",
"aliases",
"summary",
"risk_score",
"exploitability",
"weighted_severity",
"resource_url",
]
sheet_fields = ["object_type", "package_url"] + vulnerability_fields

rows = []
for queryset in vulnerable_querysets:
model_name = queryset.model._meta.model_name
object_type = model_name_to_object_type.get(model_name)

for package in queryset:
package_url = package.package_url

for vulnerability_data in package.affected_by_vulnerabilities:
row = {
"object_type": object_type,
"package_url": package_url,
**{
field_name: vulnerability_data.get(field_name, "")
for field_name in vulnerability_fields
},
}
rows.append(row)

if rows:
add_xlsx_worksheet(
workbook=workbook,
worksheet_name="VULNERABILITIES",
rows=rows,
fields=sheet_fields,
)


def add_todos_sheet(workbook, project, exclude_fields):
todos_queryset = get_queryset(project, "todos")
if todos_queryset:
queryset_to_xlsx_worksheet(
todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"
)


def _get_spdx_extracted_licenses(license_expressions):
"""
Generate and return the SPDX `extracted_licenses` from provided
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
<span style="vertical-align: middle">
{{ paginator.count|intcomma }} relations
</span>
<a href="{% url 'project_resources' project.slug %}?tag=to&relation_map_type=none&status=_EMPTY_" target="_blank" class="button is-small is-info is-outlined">
<a href="{% url 'project_resources' project.slug %}?tag=to&relation_map_type=none" target="_blank" class="button is-small is-info is-outlined">
<span>Un-mapped <strong>to/</strong> resources</span>
<span class="icon">
<i class="fa-solid fa-external-link-alt"></i>
</span>
</a>
<a href="{% url 'project_resources' project.slug %}?tag=from&relation_map_type=none&status=_EMPTY_" target="_blank" class="button is-small is-info is-outlined">
<a href="{% url 'project_resources' project.slug %}?tag=from&relation_map_type=none" target="_blank" class="button is-small is-info is-outlined">
<span>Un-mapped <strong>from/</strong> resources</span>
<span class="icon">
<i class="fa-solid fa-external-link-alt"></i>
Expand Down
12 changes: 6 additions & 6 deletions scanpipe/tests/pipes/test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,13 @@ def test_scanpipe_pipes_outputs_to_xlsx(self):
project=project, path="path/file1.ext", status=flag.REQUIRES_REVIEW
)

output_file = output.to_xlsx(project=project)
with self.assertNumQueries(12):
output_file = output.to_xlsx(project=project)
self.assertIn(output_file.name, project.output_root)

# Make sure the output can be generated even if the work_directory was wiped
shutil.rmtree(project.work_directory)
with self.assertNumQueries(10):
output_file = output.to_xlsx(project=project)
output_file = output.to_xlsx(project=project)
self.assertIn(output_file.name, project.output_root)

workbook = openpyxl.load_workbook(output_file, read_only=True, data_only=True)
Expand Down Expand Up @@ -499,7 +499,7 @@ def test_scanpipe_pipes_outputs_to_attribution(self):


class ScanPipeXLSXOutputPipesTest(TestCase):
def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
def test_add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
# This test verifies that we do not truncate long text silently

test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test"))
Expand Down Expand Up @@ -532,7 +532,7 @@ def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
if r != x:
self.assertEqual(r[-50:], x)

def test__add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self):
def test_add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self):
# This test verifies that we do not truncate long text silently

test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test"))
Expand Down Expand Up @@ -736,7 +736,7 @@ def __init__(self, foo):

output_file = test_dir / workbook_name
with xlsxwriter.Workbook(str(output_file)) as workbook:
output._add_xlsx_worksheet(
output.add_xlsx_worksheet(
workbook=workbook,
worksheet_name="packages",
rows=rows,
Expand Down