From ec4c1636331a6e661fd8878283419b1d117179a3 Mon Sep 17 00:00:00 2001 From: tdruez Date: Fri, 17 Jan 2025 17:14:41 +0400 Subject: [PATCH] Add a report management command to generate XLSX reports #1524 Signed-off-by: tdruez --- CHANGELOG.rst | 4 + docs/command-line-interface.rst | 41 +++++++++ scanpipe/management/commands/report.py | 120 +++++++++++++++++++++++++ scanpipe/pipes/output.py | 5 ++ scanpipe/tests/test_commands.py | 45 ++++++++++ 5 files changed, 215 insertions(+) create mode 100644 scanpipe/management/commands/report.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 54d7dd356..0e930bb49 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -47,6 +47,10 @@ v34.9.4 (unreleased) sheets with a dedicated VULNERABILITIES sheet. https://github.com/aboutcode-org/scancode.io/issues/1519 +- Add a ``report`` management command that allows to generate XLSX reports for + multiple projects at once using labels and searching by project name. + https://github.com/aboutcode-org/scancode.io/issues/1524 + v34.9.3 (2024-12-31) -------------------- diff --git a/docs/command-line-interface.rst b/docs/command-line-interface.rst index 9f0101f73..28875183b 100644 --- a/docs/command-line-interface.rst +++ b/docs/command-line-interface.rst @@ -68,6 +68,7 @@ ScanPipe's own commands are listed under the ``[scanpipe]`` section:: list-project output purldb-scan-worker + report reset-project run show-pipeline @@ -393,6 +394,46 @@ your outputs on the host machine when running with Docker. .. tip:: To specify a CycloneDX spec version (default to latest), use the syntax ``cyclonedx:VERSION`` as format value. For example: ``--format cyclonedx:1.5``. +.. _cli_report: + +`$ scanpipe report --sheet SHEET` +--------------------------------- + +Generates an XLSX report of selected projects based on the provided criteria. + +Required arguments: + +- ``--sheet {package,dependency,resource,relation,message,todo}`` + Specifies the sheet to include in the XLSX report. Available choices are based on + predefined object types. + +Optional arguments: + +- ``--output-directory OUTPUT_DIRECTORY`` + The path to the directory where the report file will be created. If not provided, + the report file will be created in the current working directory. + +- ``--search SEARCH`` + Filter projects by searching for the provided string in their name. + +- ``--label LABELS`` + Filter projects by the provided label(s). Multiple labels can be provided by using + this argument multiple times. + +.. note:: + Either ``--label`` or ``--search`` must be provided to select projects. + +Example usage: + +1. Generate a report for all projects tagged with "d2d" and include the **TODOS** +worksheet:: + + $ scanpipe report --sheet todo --label d2d + +2. Generate a report for projects whose names contain the word "audit" and include the +**PACKAGES** worksheet:: + + $ scanpipe report --sheet package --search audit .. _cli_check_compliance: diff --git a/scanpipe/management/commands/report.py b/scanpipe/management/commands/report.py new file mode 100644 index 000000000..86744c5e5 --- /dev/null +++ b/scanpipe/management/commands/report.py @@ -0,0 +1,120 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/aboutcode-org/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/scancode.io for support and download. + +from pathlib import Path +from timeit import default_timer as timer + +from django.core.management import CommandError +from django.core.management.base import BaseCommand + +import xlsxwriter + +from aboutcode.pipeline import humanize_time +from scanpipe.models import Project +from scanpipe.pipes import filename_now +from scanpipe.pipes import output + + +class Command(BaseCommand): + help = "Report of selected projects." + + def add_arguments(self, parser): + super().add_arguments(parser) + parser.add_argument( + "--output-directory", + help=( + "The path to the directory where the report file will be created. " + "If not provided, the report file will be created in the current " + "working directory." + ), + ) + parser.add_argument( + "--sheet", + required=True, + choices=list(output.object_type_to_model_name.keys()), + help="Specifies the sheet to include in the XLSX report.", + ) + parser.add_argument( + "--search", + help="Select projects searching for the provided string in their name.", + ) + parser.add_argument( + "--label", + action="append", + dest="labels", + default=list(), + help=( + "Filter projects by the provided label(s). Multiple labels can be " + "provided by using this argument multiple times." + ), + ) + + def handle(self, *args, **options): + start_time = timer() + self.verbosity = options["verbosity"] + + output_directory = options["output_directory"] + labels = options["labels"] + search = options["search"] + sheet = options["sheet"] + model_name = output.object_type_to_model_name.get(sheet) + + if not (labels or search): + raise CommandError( + "You must provide either --label or --search to select projects." + ) + + project_qs = Project.objects.all() + if labels: + project_qs = project_qs.filter(labels__name__in=labels) + if search: + project_qs = project_qs.filter(name__icontains=search) + project_count = project_qs.count() + + if not project_count: + raise CommandError("No projects found for the provided criteria.") + + if self.verbosity > 0: + msg = f"{project_count} project(s) will be included in the report." + self.stdout.write(msg, self.style.SUCCESS) + + worksheet_queryset = output.get_queryset(project=None, model_name=model_name) + worksheet_queryset = worksheet_queryset.filter(project__in=project_qs) + + filename = f"scancodeio-report-{filename_now()}.xlsx" + if output_directory: + output_file = Path(f"{output_directory}/{filename}") + else: + output_file = Path(filename) + + with xlsxwriter.Workbook(output_file) as workbook: + output.queryset_to_xlsx_worksheet( + worksheet_queryset, + workbook, + prepend_fields=["project"], + worksheet_name="TODOS", + ) + + run_time = timer() - start_time + if self.verbosity > 0: + msg = f"Report generated at {output_file} in {humanize_time(run_time)}." + self.stdout.write(msg, self.style.SUCCESS) diff --git a/scanpipe/pipes/output.py b/scanpipe/pipes/output.py index 786fad734..3eb07d721 100644 --- a/scanpipe/pipes/output.py +++ b/scanpipe/pipes/output.py @@ -309,6 +309,11 @@ def to_json(project): "codebaseresource": "resource", "codebaserelation": "relation", "projectmessage": "message", + "todos": "todo", +} + +object_type_to_model_name = { + value: key for key, value in model_name_to_object_type.items() } diff --git a/scanpipe/tests/test_commands.py b/scanpipe/tests/test_commands.py index 69c2152ee..937175806 100644 --- a/scanpipe/tests/test_commands.py +++ b/scanpipe/tests/test_commands.py @@ -22,6 +22,7 @@ import datetime import json +import tempfile import uuid from contextlib import redirect_stdout from io import StringIO @@ -37,14 +38,18 @@ from django.test import override_settings from django.utils import timezone +import openpyxl + from scanpipe.management import commands from scanpipe.models import CodebaseResource from scanpipe.models import DiscoveredPackage from scanpipe.models import Project from scanpipe.models import Run from scanpipe.models import WebhookSubscription +from scanpipe.pipes import flag from scanpipe.pipes import purldb from scanpipe.tests import make_package +from scanpipe.tests import make_project from scanpipe.tests import make_resource_file scanpipe_app = apps.get_app_config("scanpipe") @@ -1092,6 +1097,46 @@ def test_scanpipe_management_command_check_compliance(self): ) self.assertEqual(expected, out_value) + def test_scanpipe_management_command_report(self): + project1 = make_project("project1") + label1 = "label1" + project1.labels.add(label1) + make_resource_file(project1, path="file.ext", status=flag.REQUIRES_REVIEW) + make_project("project2") + + expected = "Error: the following arguments are required: --sheet" + with self.assertRaisesMessage(CommandError, expected): + call_command("report") + + options = ["--sheet", "UNKNOWN"] + expected = "Error: argument --sheet: invalid choice: 'UNKNOWN'" + with self.assertRaisesMessage(CommandError, expected): + call_command("report", *options) + + options = ["--sheet", "todo"] + expected = "You must provide either --label or --search to select projects." + with self.assertRaisesMessage(CommandError, expected): + call_command("report", *options) + + expected = "No projects found for the provided criteria." + with self.assertRaisesMessage(CommandError, expected): + call_command("report", *options, *["--label", "UNKNOWN"]) + + output_directory = Path(tempfile.mkdtemp()) + options.extend(["--output-directory", str(output_directory), "--label", label1]) + out = StringIO() + call_command("report", *options, stdout=out) + self.assertIn("1 project(s) will be included in the report.", out.getvalue()) + output_file = list(output_directory.glob("*.xlsx"))[0] + self.assertIn(f"Report generated at {output_file}", out.getvalue()) + + workbook = openpyxl.load_workbook(output_file, read_only=True, data_only=True) + self.assertEqual(["TODOS"], workbook.get_sheet_names()) + todos_sheet = workbook.get_sheet_by_name("TODOS") + row1 = list(todos_sheet.values)[1] + expected = ("project1", "file.ext", "file", "file.ext", "requires-review") + self.assertEqual(expected, row1[0:5]) + class ScanPipeManagementCommandMixinTest(TestCase): class CreateProjectCommand(