Merge main and fix conflicts #1524

Signed-off-by: tdruez <[email protected]>
aboutcode-org · Jan 17, 2025 · e458a1a · e458a1a
2 parents 2292804 + ce227cd
commit e458a1a
Show file tree

Hide file tree

Showing 9 changed files with 274 additions and 20 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -47,6 +47,10 @@ v34.9.4 (unreleased)
   sheets with a dedicated VULNERABILITIES sheet.
   https://github.com/aboutcode-org/scancode.io/issues/1519
 
+- Add a ``report`` management command that allows to generate XLSX reports for
+  multiple projects at once using labels and searching by project name.
+  https://github.com/aboutcode-org/scancode.io/issues/1524
+
 v34.9.3 (2024-12-31)
 --------------------
 

diff --git a/docs/command-line-interface.rst b/docs/command-line-interface.rst
@@ -68,6 +68,7 @@ ScanPipe's own commands are listed under the ``[scanpipe]`` section::
       list-project
       output
       purldb-scan-worker
+      report
       reset-project
       run
       show-pipeline
@@ -174,6 +175,10 @@ Required arguments (one of):
   | project-2      | pkg:deb/debian/[email protected]      |
   +----------------+---------------------------------+
 
+.. tip::
+    In place of a local path, a download URL to the CSV file is supported for the
+    ``--input-list`` argument.
+
 Optional arguments:
 
 - ``--project-name-suffix`` Optional custom suffix to append to project names.
@@ -194,14 +199,15 @@ Optional arguments:
 Example: Processing Multiple Docker Images
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Assume multiple Docker images are available in a directory named ``local-data/`` on
+Suppose you have multiple Docker images stored in a directory named ``local-data/`` on
 the host machine.
-To process these images with the ``analyze_docker_image`` pipeline using asynchronous
-execution::
+To process these images using the ``analyze_docker_image`` pipeline with asynchronous
+execution, you can use this command::
 
     $ docker compose run --rm \
-        --volume local-data/:/input-data:ro \
-        web scanpipe batch-create input-data/ \
+        --volume local-data/:/input-data/:ro \
+        web scanpipe batch-create
+            --input-directory /input-data/ \
             --pipeline analyze_docker_image \
             --label "Docker" \
             --execute --async
@@ -224,6 +230,19 @@ Each Docker image in the ``local-data/`` directory will result in the creation o
 project with the specified pipeline (``analyze_docker_image``) executed by worker
 services.
 
+Example: Processing Multiple Develop to Deploy Mapping
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To process an input list CSV file with the ``map_deploy_to_develop`` pipeline using
+asynchronous execution::
+
+    $ docker compose run --rm \
+        web scanpipe batch-create \
+            --input-list https://url/input_list.csv \
+            --pipeline map_deploy_to_develop \
+            --label "d2d_mapping" \
+            --execute --async
+
 `$ scanpipe list-pipeline [--verbosity {0,1,2,3}]`
 --------------------------------------------------
 
@@ -375,6 +394,46 @@ your outputs on the host machine when running with Docker.
 .. tip:: To specify a CycloneDX spec version (default to latest), use the syntax
   ``cyclonedx:VERSION`` as format value. For example: ``--format cyclonedx:1.5``.
 
+.. _cli_report:
+
+`$ scanpipe report --sheet SHEET`
+---------------------------------
+
+Generates an XLSX report of selected projects based on the provided criteria.
+
+Required arguments:
+
+- ``--sheet {package,dependency,resource,relation,message,todo}``
+  Specifies the sheet to include in the XLSX report. Available choices are based on
+  predefined object types.
+
+Optional arguments:
+
+- ``--output-directory OUTPUT_DIRECTORY``
+  The path to the directory where the report file will be created. If not provided,
+  the report file will be created in the current working directory.
+
+- ``--search SEARCH``
+  Filter projects by searching for the provided string in their name.
+
+- ``--label LABELS``
+  Filter projects by the provided label(s). Multiple labels can be provided by using
+  this argument multiple times.
+
+.. note::
+    Either ``--label`` or ``--search`` must be provided to select projects.
+
+Example usage:
+
+1. Generate a report for all projects tagged with "d2d" and include the **TODOS**
+worksheet::
+
+   $ scanpipe report --sheet todo --label d2d
+
+2. Generate a report for projects whose names contain the word "audit" and include the
+**PACKAGES** worksheet::
+
+   $ scanpipe report --sheet package --search audit
 
 .. _cli_check_compliance:
 

diff --git a/scanpipe/forms.py b/scanpipe/forms.py
@@ -295,7 +295,7 @@ class ProjectReportForm(BaseProjectActionForm):
             ("codebaseresource", "Resources"),
             ("codebaserelation", "Relations"),
             ("projectmessage", "Messages"),
-            ("todos", "TODOs"),
+            ("todo", "TODOs"),
         ],
         required=True,
         initial="discoveredpackage",

diff --git a/scanpipe/management/commands/batch-create.py b/scanpipe/management/commands/batch-create.py
@@ -27,8 +27,11 @@
 from django.core.management import CommandError
 from django.core.management.base import BaseCommand
 
+import requests
+
 from scanpipe.management.commands import CreateProjectCommandMixin
 from scanpipe.management.commands import PipelineCommandMixin
+from scanpipe.pipes import fetch
 
 
 class Command(CreateProjectCommandMixin, PipelineCommandMixin, BaseCommand):
@@ -54,7 +57,8 @@ def add_arguments(self, parser):
                 "Path to a CSV file with project names and input URLs. "
                 "The first column must contain project names, and the second column "
                 "should list comma-separated input URLs (e.g., Download URL, PURL, or "
-                "Docker reference)."
+                "Docker reference). "
+                "In place of a local path, a download URL to the CSV file is supported."
             ),
         )
         parser.add_argument(
@@ -110,7 +114,16 @@ def handle_input_directory(self, **options):
                 self.created_project_count += 1
 
     def handle_input_list(self, **options):
-        input_file = Path(options["input_list"])
+        input_file = options["input_list"]
+
+        if input_file.startswith("http"):
+            try:
+                download = fetch.fetch_http(input_file)
+            except requests.exceptions.RequestException as e:
+                raise CommandError(e)
+            input_file = download.path
+
+        input_file = Path(input_file)
         if not input_file.exists():
             raise CommandError(f"The {input_file} file does not exist.")
 

diff --git a/scanpipe/management/commands/report.py b/scanpipe/management/commands/report.py
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/aboutcode-org/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/aboutcode-org/scancode.io for support and download.
+
+from pathlib import Path
+from timeit import default_timer as timer
+
+from django.core.management import CommandError
+from django.core.management.base import BaseCommand
+
+import xlsxwriter
+
+from aboutcode.pipeline import humanize_time
+from scanpipe.models import Project
+from scanpipe.pipes import filename_now
+from scanpipe.pipes import output
+
+
+class Command(BaseCommand):
+    help = "Report of selected projects."
+
+    def add_arguments(self, parser):
+        super().add_arguments(parser)
+        parser.add_argument(
+            "--output-directory",
+            help=(
+                "The path to the directory where the report file will be created. "
+                "If not provided, the report file will be created in the current "
+                "working directory."
+            ),
+        )
+        parser.add_argument(
+            "--sheet",
+            required=True,
+            choices=list(output.object_type_to_model_name.keys()),
+            help="Specifies the sheet to include in the XLSX report.",
+        )
+        parser.add_argument(
+            "--search",
+            help="Select projects searching for the provided string in their name.",
+        )
+        parser.add_argument(
+            "--label",
+            action="append",
+            dest="labels",
+            default=list(),
+            help=(
+                "Filter projects by the provided label(s). Multiple labels can be "
+                "provided by using this argument multiple times."
+            ),
+        )
+
+    def handle(self, *args, **options):
+        start_time = timer()
+        self.verbosity = options["verbosity"]
+
+        output_directory = options["output_directory"]
+        labels = options["labels"]
+        search = options["search"]
+        sheet = options["sheet"]
+        model_name = output.object_type_to_model_name.get(sheet)
+
+        if not (labels or search):
+            raise CommandError(
+                "You must provide either --label or --search to select projects."
+            )
+
+        project_qs = Project.objects.all()
+        if labels:
+            project_qs = project_qs.filter(labels__name__in=labels)
+        if search:
+            project_qs = project_qs.filter(name__icontains=search)
+        project_count = project_qs.count()
+
+        if not project_count:
+            raise CommandError("No projects found for the provided criteria.")
+
+        if self.verbosity > 0:
+            msg = f"{project_count} project(s) will be included in the report."
+            self.stdout.write(msg, self.style.SUCCESS)
+
+        worksheet_queryset = output.get_queryset(project=None, model_name=model_name)
+        worksheet_queryset = worksheet_queryset.filter(project__in=project_qs)
+
+        filename = f"scancodeio-report-{filename_now()}.xlsx"
+        if output_directory:
+            output_file = Path(f"{output_directory}/{filename}")
+        else:
+            output_file = Path(filename)
+
+        with xlsxwriter.Workbook(output_file) as workbook:
+            output.queryset_to_xlsx_worksheet(
+                worksheet_queryset,
+                workbook,
+                exclude_fields=output.XLSX_EXCLUDE_FIELDS,
+                prepend_fields=["project"],
+                worksheet_name="TODOS",
+            )
+
+        run_time = timer() - start_time
+        if self.verbosity > 0:
+            msg = f"Report generated at {output_file} in {humanize_time(run_time)}."
+            self.stdout.write(msg, self.style.SUCCESS)
diff --git a/scanpipe/pipes/output.py b/scanpipe/pipes/output.py
@@ -96,7 +96,7 @@ def get_queryset(project, model_name):
             CodebaseRelation.objects.select_related("from_resource", "to_resource")
         ),
         "projectmessage": ProjectMessage.objects.all(),
-        "todos": CodebaseResource.objects.files().status(flag.REQUIRES_REVIEW),
+        "todo": CodebaseResource.objects.files().status(flag.REQUIRES_REVIEW),
     }
 
     queryset = querysets.get(model_name)
@@ -309,6 +309,11 @@ def to_json(project):
     "codebaseresource": "resource",
     "codebaserelation": "relation",
     "projectmessage": "message",
+    "todo": "todo",
+}
+
+object_type_to_model_name = {
+    value: key for key, value in model_name_to_object_type.items()
 }
 
 
@@ -469,6 +474,16 @@ def _adapt_value_for_xlsx(fieldname, value, maximum_length=32767, _adapt=True):
     return value, error
 
 
+XLSX_EXCLUDE_FIELDS = [
+    "extra_data",
+    "package_data",
+    "license_detections",
+    "other_license_detections",
+    "license_clues",
+    "affected_by_vulnerabilities",
+]
+
+
 def to_xlsx(project):
     """
     Generate output for the provided ``project`` in XLSX format.
@@ -479,15 +494,8 @@ def to_xlsx(project):
     with possible error messages for a row when converting the data to XLSX
     exceed the limits of what can be stored in a cell.
     """
+    exclude_fields = XLSX_EXCLUDE_FIELDS.copy()
     output_file = project.get_output_file_path("results", "xlsx")
-    exclude_fields = [
-        "extra_data",
-        "package_data",
-        "license_detections",
-        "other_license_detections",
-        "license_clues",
-        "affected_by_vulnerabilities",
-    ]
 
     if not project.policies_enabled:
         exclude_fields.append("compliance_alert")
@@ -572,7 +580,7 @@ def add_vulnerabilities_sheet(workbook, project):
 
 
 def add_todos_sheet(workbook, project, exclude_fields):
-    todos_queryset = get_queryset(project, "todos")
+    todos_queryset = get_queryset(project, "todo")
     if todos_queryset:
         queryset_to_xlsx_worksheet(
             todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"