Skip to content

Commit

Permalink
Add a report management command to generate XLSX reports #1524
Browse files Browse the repository at this point in the history
Signed-off-by: tdruez <[email protected]>
  • Loading branch information
tdruez committed Jan 17, 2025
1 parent dd0e0bd commit ec4c163
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ v34.9.4 (unreleased)
sheets with a dedicated VULNERABILITIES sheet.
https://github.com/aboutcode-org/scancode.io/issues/1519

- Add a ``report`` management command that allows to generate XLSX reports for
multiple projects at once using labels and searching by project name.
https://github.com/aboutcode-org/scancode.io/issues/1524

v34.9.3 (2024-12-31)
--------------------

Expand Down
41 changes: 41 additions & 0 deletions docs/command-line-interface.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ ScanPipe's own commands are listed under the ``[scanpipe]`` section::
list-project
output
purldb-scan-worker
report
reset-project
run
show-pipeline
Expand Down Expand Up @@ -393,6 +394,46 @@ your outputs on the host machine when running with Docker.
.. tip:: To specify a CycloneDX spec version (default to latest), use the syntax
``cyclonedx:VERSION`` as format value. For example: ``--format cyclonedx:1.5``.

.. _cli_report:

`$ scanpipe report --sheet SHEET`
---------------------------------

Generates an XLSX report of selected projects based on the provided criteria.

Required arguments:

- ``--sheet {package,dependency,resource,relation,message,todo}``
Specifies the sheet to include in the XLSX report. Available choices are based on
predefined object types.

Optional arguments:

- ``--output-directory OUTPUT_DIRECTORY``
The path to the directory where the report file will be created. If not provided,
the report file will be created in the current working directory.

- ``--search SEARCH``
Filter projects by searching for the provided string in their name.

- ``--label LABELS``
Filter projects by the provided label(s). Multiple labels can be provided by using
this argument multiple times.

.. note::
Either ``--label`` or ``--search`` must be provided to select projects.

Example usage:

1. Generate a report for all projects tagged with "d2d" and include the **TODOS**
worksheet::

$ scanpipe report --sheet todo --label d2d

2. Generate a report for projects whose names contain the word "audit" and include the
**PACKAGES** worksheet::

$ scanpipe report --sheet package --search audit

.. _cli_check_compliance:

Expand Down
120 changes: 120 additions & 0 deletions scanpipe/management/commands/report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/scancode.io for support and download.

from pathlib import Path
from timeit import default_timer as timer

from django.core.management import CommandError
from django.core.management.base import BaseCommand

import xlsxwriter

from aboutcode.pipeline import humanize_time
from scanpipe.models import Project
from scanpipe.pipes import filename_now
from scanpipe.pipes import output


class Command(BaseCommand):
help = "Report of selected projects."

def add_arguments(self, parser):
super().add_arguments(parser)
parser.add_argument(
"--output-directory",
help=(
"The path to the directory where the report file will be created. "
"If not provided, the report file will be created in the current "
"working directory."
),
)
parser.add_argument(
"--sheet",
required=True,
choices=list(output.object_type_to_model_name.keys()),
help="Specifies the sheet to include in the XLSX report.",
)
parser.add_argument(
"--search",
help="Select projects searching for the provided string in their name.",
)
parser.add_argument(
"--label",
action="append",
dest="labels",
default=list(),
help=(
"Filter projects by the provided label(s). Multiple labels can be "
"provided by using this argument multiple times."
),
)

def handle(self, *args, **options):
start_time = timer()
self.verbosity = options["verbosity"]

output_directory = options["output_directory"]
labels = options["labels"]
search = options["search"]
sheet = options["sheet"]
model_name = output.object_type_to_model_name.get(sheet)

if not (labels or search):
raise CommandError(
"You must provide either --label or --search to select projects."
)

project_qs = Project.objects.all()
if labels:
project_qs = project_qs.filter(labels__name__in=labels)
if search:
project_qs = project_qs.filter(name__icontains=search)
project_count = project_qs.count()

if not project_count:
raise CommandError("No projects found for the provided criteria.")

if self.verbosity > 0:
msg = f"{project_count} project(s) will be included in the report."
self.stdout.write(msg, self.style.SUCCESS)

worksheet_queryset = output.get_queryset(project=None, model_name=model_name)
worksheet_queryset = worksheet_queryset.filter(project__in=project_qs)

filename = f"scancodeio-report-{filename_now()}.xlsx"
if output_directory:
output_file = Path(f"{output_directory}/{filename}")
else:
output_file = Path(filename)

with xlsxwriter.Workbook(output_file) as workbook:
output.queryset_to_xlsx_worksheet(
worksheet_queryset,
workbook,
prepend_fields=["project"],
worksheet_name="TODOS",
)

run_time = timer() - start_time
if self.verbosity > 0:
msg = f"Report generated at {output_file} in {humanize_time(run_time)}."
self.stdout.write(msg, self.style.SUCCESS)
5 changes: 5 additions & 0 deletions scanpipe/pipes/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,11 @@ def to_json(project):
"codebaseresource": "resource",
"codebaserelation": "relation",
"projectmessage": "message",
"todos": "todo",
}

object_type_to_model_name = {
value: key for key, value in model_name_to_object_type.items()
}


Expand Down
45 changes: 45 additions & 0 deletions scanpipe/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import datetime
import json
import tempfile
import uuid
from contextlib import redirect_stdout
from io import StringIO
Expand All @@ -37,14 +38,18 @@
from django.test import override_settings
from django.utils import timezone

import openpyxl

from scanpipe.management import commands
from scanpipe.models import CodebaseResource
from scanpipe.models import DiscoveredPackage
from scanpipe.models import Project
from scanpipe.models import Run
from scanpipe.models import WebhookSubscription
from scanpipe.pipes import flag
from scanpipe.pipes import purldb
from scanpipe.tests import make_package
from scanpipe.tests import make_project
from scanpipe.tests import make_resource_file

scanpipe_app = apps.get_app_config("scanpipe")
Expand Down Expand Up @@ -1092,6 +1097,46 @@ def test_scanpipe_management_command_check_compliance(self):
)
self.assertEqual(expected, out_value)

def test_scanpipe_management_command_report(self):
project1 = make_project("project1")
label1 = "label1"
project1.labels.add(label1)
make_resource_file(project1, path="file.ext", status=flag.REQUIRES_REVIEW)
make_project("project2")

expected = "Error: the following arguments are required: --sheet"
with self.assertRaisesMessage(CommandError, expected):
call_command("report")

options = ["--sheet", "UNKNOWN"]
expected = "Error: argument --sheet: invalid choice: 'UNKNOWN'"
with self.assertRaisesMessage(CommandError, expected):
call_command("report", *options)

options = ["--sheet", "todo"]
expected = "You must provide either --label or --search to select projects."
with self.assertRaisesMessage(CommandError, expected):
call_command("report", *options)

expected = "No projects found for the provided criteria."
with self.assertRaisesMessage(CommandError, expected):
call_command("report", *options, *["--label", "UNKNOWN"])

output_directory = Path(tempfile.mkdtemp())
options.extend(["--output-directory", str(output_directory), "--label", label1])
out = StringIO()
call_command("report", *options, stdout=out)
self.assertIn("1 project(s) will be included in the report.", out.getvalue())
output_file = list(output_directory.glob("*.xlsx"))[0]
self.assertIn(f"Report generated at {output_file}", out.getvalue())

workbook = openpyxl.load_workbook(output_file, read_only=True, data_only=True)
self.assertEqual(["TODOS"], workbook.get_sheet_names())
todos_sheet = workbook.get_sheet_by_name("TODOS")
row1 = list(todos_sheet.values)[1]
expected = ("project1", "file.ext", "file", "file.ext", "requires-review")
self.assertEqual(expected, row1[0:5])


class ScanPipeManagementCommandMixinTest(TestCase):
class CreateProjectCommand(
Expand Down

0 comments on commit ec4c163

Please sign in to comment.