diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 54d7dd356..0e930bb49 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -47,6 +47,10 @@ v34.9.4 (unreleased) sheets with a dedicated VULNERABILITIES sheet. https://github.com/aboutcode-org/scancode.io/issues/1519 +- Add a ``report`` management command that allows to generate XLSX reports for + multiple projects at once using labels and searching by project name. + https://github.com/aboutcode-org/scancode.io/issues/1524 + v34.9.3 (2024-12-31) -------------------- diff --git a/docs/command-line-interface.rst b/docs/command-line-interface.rst index ef16047a8..28875183b 100644 --- a/docs/command-line-interface.rst +++ b/docs/command-line-interface.rst @@ -68,6 +68,7 @@ ScanPipe's own commands are listed under the ``[scanpipe]`` section:: list-project output purldb-scan-worker + report reset-project run show-pipeline @@ -174,6 +175,10 @@ Required arguments (one of): | project-2 | pkg:deb/debian/curl@7.50.3 | +----------------+---------------------------------+ +.. tip:: + In place of a local path, a download URL to the CSV file is supported for the + ``--input-list`` argument. + Optional arguments: - ``--project-name-suffix`` Optional custom suffix to append to project names. @@ -194,14 +199,15 @@ Optional arguments: Example: Processing Multiple Docker Images ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Assume multiple Docker images are available in a directory named ``local-data/`` on +Suppose you have multiple Docker images stored in a directory named ``local-data/`` on the host machine. -To process these images with the ``analyze_docker_image`` pipeline using asynchronous -execution:: +To process these images using the ``analyze_docker_image`` pipeline with asynchronous +execution, you can use this command:: $ docker compose run --rm \ - --volume local-data/:/input-data:ro \ - web scanpipe batch-create input-data/ \ + --volume local-data/:/input-data/:ro \ + web scanpipe batch-create + --input-directory /input-data/ \ --pipeline analyze_docker_image \ --label "Docker" \ --execute --async @@ -224,6 +230,19 @@ Each Docker image in the ``local-data/`` directory will result in the creation o project with the specified pipeline (``analyze_docker_image``) executed by worker services. +Example: Processing Multiple Develop to Deploy Mapping +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To process an input list CSV file with the ``map_deploy_to_develop`` pipeline using +asynchronous execution:: + + $ docker compose run --rm \ + web scanpipe batch-create \ + --input-list https://url/input_list.csv \ + --pipeline map_deploy_to_develop \ + --label "d2d_mapping" \ + --execute --async + `$ scanpipe list-pipeline [--verbosity {0,1,2,3}]` -------------------------------------------------- @@ -375,6 +394,46 @@ your outputs on the host machine when running with Docker. .. tip:: To specify a CycloneDX spec version (default to latest), use the syntax ``cyclonedx:VERSION`` as format value. For example: ``--format cyclonedx:1.5``. +.. _cli_report: + +`$ scanpipe report --sheet SHEET` +--------------------------------- + +Generates an XLSX report of selected projects based on the provided criteria. + +Required arguments: + +- ``--sheet {package,dependency,resource,relation,message,todo}`` + Specifies the sheet to include in the XLSX report. Available choices are based on + predefined object types. + +Optional arguments: + +- ``--output-directory OUTPUT_DIRECTORY`` + The path to the directory where the report file will be created. If not provided, + the report file will be created in the current working directory. + +- ``--search SEARCH`` + Filter projects by searching for the provided string in their name. + +- ``--label LABELS`` + Filter projects by the provided label(s). Multiple labels can be provided by using + this argument multiple times. + +.. note:: + Either ``--label`` or ``--search`` must be provided to select projects. + +Example usage: + +1. Generate a report for all projects tagged with "d2d" and include the **TODOS** +worksheet:: + + $ scanpipe report --sheet todo --label d2d + +2. Generate a report for projects whose names contain the word "audit" and include the +**PACKAGES** worksheet:: + + $ scanpipe report --sheet package --search audit .. _cli_check_compliance: diff --git a/scanpipe/forms.py b/scanpipe/forms.py index f64d0bf1c..35d638784 100644 --- a/scanpipe/forms.py +++ b/scanpipe/forms.py @@ -295,7 +295,7 @@ class ProjectReportForm(BaseProjectActionForm): ("codebaseresource", "Resources"), ("codebaserelation", "Relations"), ("projectmessage", "Messages"), - ("todos", "TODOs"), + ("todo", "TODOs"), ], required=True, initial="discoveredpackage", diff --git a/scanpipe/management/commands/batch-create.py b/scanpipe/management/commands/batch-create.py index 47b5c36c5..cf56d5aa4 100644 --- a/scanpipe/management/commands/batch-create.py +++ b/scanpipe/management/commands/batch-create.py @@ -27,8 +27,11 @@ from django.core.management import CommandError from django.core.management.base import BaseCommand +import requests + from scanpipe.management.commands import CreateProjectCommandMixin from scanpipe.management.commands import PipelineCommandMixin +from scanpipe.pipes import fetch class Command(CreateProjectCommandMixin, PipelineCommandMixin, BaseCommand): @@ -54,7 +57,8 @@ def add_arguments(self, parser): "Path to a CSV file with project names and input URLs. " "The first column must contain project names, and the second column " "should list comma-separated input URLs (e.g., Download URL, PURL, or " - "Docker reference)." + "Docker reference). " + "In place of a local path, a download URL to the CSV file is supported." ), ) parser.add_argument( @@ -110,7 +114,16 @@ def handle_input_directory(self, **options): self.created_project_count += 1 def handle_input_list(self, **options): - input_file = Path(options["input_list"]) + input_file = options["input_list"] + + if input_file.startswith("http"): + try: + download = fetch.fetch_http(input_file) + except requests.exceptions.RequestException as e: + raise CommandError(e) + input_file = download.path + + input_file = Path(input_file) if not input_file.exists(): raise CommandError(f"The {input_file} file does not exist.") diff --git a/scanpipe/management/commands/report.py b/scanpipe/management/commands/report.py new file mode 100644 index 000000000..f2912ae71 --- /dev/null +++ b/scanpipe/management/commands/report.py @@ -0,0 +1,121 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/aboutcode-org/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/scancode.io for support and download. + +from pathlib import Path +from timeit import default_timer as timer + +from django.core.management import CommandError +from django.core.management.base import BaseCommand + +import xlsxwriter + +from aboutcode.pipeline import humanize_time +from scanpipe.models import Project +from scanpipe.pipes import filename_now +from scanpipe.pipes import output + + +class Command(BaseCommand): + help = "Report of selected projects." + + def add_arguments(self, parser): + super().add_arguments(parser) + parser.add_argument( + "--output-directory", + help=( + "The path to the directory where the report file will be created. " + "If not provided, the report file will be created in the current " + "working directory." + ), + ) + parser.add_argument( + "--sheet", + required=True, + choices=list(output.object_type_to_model_name.keys()), + help="Specifies the sheet to include in the XLSX report.", + ) + parser.add_argument( + "--search", + help="Select projects searching for the provided string in their name.", + ) + parser.add_argument( + "--label", + action="append", + dest="labels", + default=list(), + help=( + "Filter projects by the provided label(s). Multiple labels can be " + "provided by using this argument multiple times." + ), + ) + + def handle(self, *args, **options): + start_time = timer() + self.verbosity = options["verbosity"] + + output_directory = options["output_directory"] + labels = options["labels"] + search = options["search"] + sheet = options["sheet"] + model_name = output.object_type_to_model_name.get(sheet) + + if not (labels or search): + raise CommandError( + "You must provide either --label or --search to select projects." + ) + + project_qs = Project.objects.all() + if labels: + project_qs = project_qs.filter(labels__name__in=labels) + if search: + project_qs = project_qs.filter(name__icontains=search) + project_count = project_qs.count() + + if not project_count: + raise CommandError("No projects found for the provided criteria.") + + if self.verbosity > 0: + msg = f"{project_count} project(s) will be included in the report." + self.stdout.write(msg, self.style.SUCCESS) + + worksheet_queryset = output.get_queryset(project=None, model_name=model_name) + worksheet_queryset = worksheet_queryset.filter(project__in=project_qs) + + filename = f"scancodeio-report-{filename_now()}.xlsx" + if output_directory: + output_file = Path(f"{output_directory}/{filename}") + else: + output_file = Path(filename) + + with xlsxwriter.Workbook(output_file) as workbook: + output.queryset_to_xlsx_worksheet( + worksheet_queryset, + workbook, + exclude_fields=output.XLSX_EXCLUDE_FIELDS, + prepend_fields=["project"], + worksheet_name="TODOS", + ) + + run_time = timer() - start_time + if self.verbosity > 0: + msg = f"Report generated at {output_file} in {humanize_time(run_time)}." + self.stdout.write(msg, self.style.SUCCESS) diff --git a/scanpipe/pipes/output.py b/scanpipe/pipes/output.py index 786fad734..84a795b09 100644 --- a/scanpipe/pipes/output.py +++ b/scanpipe/pipes/output.py @@ -96,7 +96,7 @@ def get_queryset(project, model_name): CodebaseRelation.objects.select_related("from_resource", "to_resource") ), "projectmessage": ProjectMessage.objects.all(), - "todos": CodebaseResource.objects.files().status(flag.REQUIRES_REVIEW), + "todo": CodebaseResource.objects.files().status(flag.REQUIRES_REVIEW), } queryset = querysets.get(model_name) @@ -309,6 +309,11 @@ def to_json(project): "codebaseresource": "resource", "codebaserelation": "relation", "projectmessage": "message", + "todo": "todo", +} + +object_type_to_model_name = { + value: key for key, value in model_name_to_object_type.items() } @@ -469,6 +474,16 @@ def _adapt_value_for_xlsx(fieldname, value, maximum_length=32767, _adapt=True): return value, error +XLSX_EXCLUDE_FIELDS = [ + "extra_data", + "package_data", + "license_detections", + "other_license_detections", + "license_clues", + "affected_by_vulnerabilities", +] + + def to_xlsx(project): """ Generate output for the provided ``project`` in XLSX format. @@ -479,15 +494,8 @@ def to_xlsx(project): with possible error messages for a row when converting the data to XLSX exceed the limits of what can be stored in a cell. """ + exclude_fields = XLSX_EXCLUDE_FIELDS.copy() output_file = project.get_output_file_path("results", "xlsx") - exclude_fields = [ - "extra_data", - "package_data", - "license_detections", - "other_license_detections", - "license_clues", - "affected_by_vulnerabilities", - ] if not project.policies_enabled: exclude_fields.append("compliance_alert") @@ -572,7 +580,7 @@ def add_vulnerabilities_sheet(workbook, project): def add_todos_sheet(workbook, project, exclude_fields): - todos_queryset = get_queryset(project, "todos") + todos_queryset = get_queryset(project, "todo") if todos_queryset: queryset_to_xlsx_worksheet( todos_queryset, workbook, exclude_fields, worksheet_name="TODOS" diff --git a/scanpipe/tests/test_commands.py b/scanpipe/tests/test_commands.py index 69c2152ee..3f3156dcd 100644 --- a/scanpipe/tests/test_commands.py +++ b/scanpipe/tests/test_commands.py @@ -22,6 +22,7 @@ import datetime import json +import tempfile import uuid from contextlib import redirect_stdout from io import StringIO @@ -37,14 +38,18 @@ from django.test import override_settings from django.utils import timezone +import openpyxl + from scanpipe.management import commands from scanpipe.models import CodebaseResource from scanpipe.models import DiscoveredPackage from scanpipe.models import Project from scanpipe.models import Run from scanpipe.models import WebhookSubscription +from scanpipe.pipes import flag from scanpipe.pipes import purldb from scanpipe.tests import make_package +from scanpipe.tests import make_project from scanpipe.tests import make_resource_file scanpipe_app = apps.get_app_config("scanpipe") @@ -1092,6 +1097,49 @@ def test_scanpipe_management_command_check_compliance(self): ) self.assertEqual(expected, out_value) + def test_scanpipe_management_command_report(self): + project1 = make_project("project1") + label1 = "label1" + project1.labels.add(label1) + make_resource_file(project1, path="file.ext", status=flag.REQUIRES_REVIEW) + make_project("project2") + + expected = "Error: the following arguments are required: --sheet" + with self.assertRaisesMessage(CommandError, expected): + call_command("report") + + options = ["--sheet", "UNKNOWN"] + expected = "Error: argument --sheet: invalid choice: 'UNKNOWN'" + with self.assertRaisesMessage(CommandError, expected): + call_command("report", *options) + + options = ["--sheet", "todo"] + expected = "You must provide either --label or --search to select projects." + with self.assertRaisesMessage(CommandError, expected): + call_command("report", *options) + + expected = "No projects found for the provided criteria." + with self.assertRaisesMessage(CommandError, expected): + call_command("report", *options, *["--label", "UNKNOWN"]) + + output_directory = Path(tempfile.mkdtemp()) + options.extend(["--output-directory", str(output_directory), "--label", label1]) + out = StringIO() + call_command("report", *options, stdout=out) + self.assertIn("1 project(s) will be included in the report.", out.getvalue()) + output_file = list(output_directory.glob("*.xlsx"))[0] + self.assertIn(f"Report generated at {output_file}", out.getvalue()) + + workbook = openpyxl.load_workbook(output_file, read_only=True, data_only=True) + self.assertEqual(["TODOS"], workbook.get_sheet_names()) + todos_sheet = workbook.get_sheet_by_name("TODOS") + header = list(todos_sheet.values)[0] + + self.assertNotIn("extra_data", header) + row1 = list(todos_sheet.values)[1] + expected = ("project1", "file.ext", "file", "file.ext", "requires-review") + self.assertEqual(expected, row1[0:5]) + class ScanPipeManagementCommandMixinTest(TestCase): class CreateProjectCommand( diff --git a/scanpipe/tests/test_views.py b/scanpipe/tests/test_views.py index be25d0db7..6e99dffc5 100644 --- a/scanpipe/tests/test_views.py +++ b/scanpipe/tests/test_views.py @@ -193,7 +193,7 @@ def test_scanpipe_views_project_action_report_view(self): data = { "action": "report", "selected_ids": f"{self.project1.uuid}", - "model_name": "todos", + "model_name": "todo", } response = self.client.post(url, data=data, follow=True) self.assertEqual("report.xlsx", response.filename) diff --git a/scanpipe/views.py b/scanpipe/views.py index c9dbd218b..314f851c7 100644 --- a/scanpipe/views.py +++ b/scanpipe/views.py @@ -468,6 +468,7 @@ def export_xlsx_file_response(self): output.queryset_to_xlsx_worksheet( queryset, workbook, + exclude_fields=output.XLSX_EXCLUDE_FIELDS, prepend_fields=prepend_fields, worksheet_name=worksheet_name, ) @@ -1278,7 +1279,7 @@ def get_export_xlsx_prepend_fields(self): return ["project"] def get_export_xlsx_worksheet_name(self): - if self.action_form.cleaned_data.get("model_name") == "todos": + if self.report_form.cleaned_data.get("model_name") == "todo": return "TODOS" def get_export_xlsx_filename(self):