diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 955d663f6..6cf2115c5 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -18,6 +18,10 @@ v34.9.0 (2024-11-14) "policies.yml" files, or global app settings. https://github.com/aboutcode-org/scancode.io/issues/386 +- Refactor the ``group`` decorator for pipeline steps as ``optional_step``. + The steps decorated as optional are not included by default anymore. + https://github.com/aboutcode-org/scancode.io/issues/386 + - Add a new ``PublishToFederatedCode`` pipeline (addon) to push scan result to FederatedCode. https://github.com/nexB/scancode.io/pull/1400 diff --git a/aboutcode/pipeline/README.md b/aboutcode/pipeline/README.md index eb18c6ec2..13ea051d3 100644 --- a/aboutcode/pipeline/README.md +++ b/aboutcode/pipeline/README.md @@ -24,11 +24,12 @@ class PrintMessages(BasePipeline): PrintMessages().execute() ``` -### Groups and steps selection +### Options and steps selection ```python from aboutcode.pipeline import BasePipeline -from aboutcode.pipeline import group +from aboutcode.pipeline import optional_step + class PrintMessages(BasePipeline): @classmethod @@ -38,7 +39,7 @@ class PrintMessages(BasePipeline): def step1(self): print("Message from step1") - @group("foo") + @optional_step("foo") def step2(self): print("Message from step2") diff --git a/aboutcode/pipeline/__init__.py b/aboutcode/pipeline/__init__.py index db301e7b3..38dfa2042 100644 --- a/aboutcode/pipeline/__init__.py +++ b/aboutcode/pipeline/__init__.py @@ -22,6 +22,7 @@ import logging import traceback +import warnings from datetime import datetime from datetime import timezone from pydoc import getdoc @@ -53,25 +54,24 @@ def get_steps(cls, groups=None): """ Return the list of steps defined in the ``steps`` class method. - If the optional ``groups`` parameter is provided, only include steps labeled - with groups that intersect with the provided list. If a step has no groups or - if ``groups`` is not specified, include the step in the result. + By default, all steps decorated with ``optional_step`` are not included. + A list of optional steps can be included using the ``groups`` parameter. """ if not callable(cls.steps): raise TypeError("Use a ``steps(cls)`` classmethod to declare the steps.") steps = cls.steps() + groups = groups or [] if initial_steps := cls.get_initial_steps(): steps = (*initial_steps, *steps) - if groups is not None: - steps = tuple( - step - for step in steps - if not getattr(step, "groups", []) - or set(getattr(step, "groups")).intersection(groups) - ) + steps = tuple( + step + for step in steps + if not getattr(step, "groups", []) + or set(getattr(step, "groups")).intersection(groups) + ) return steps @@ -123,7 +123,7 @@ def get_available_groups(cls): return sorted( set( group_name - for step in cls.get_steps() + for step in cls.steps() for group_name in getattr(step, "groups", []) ) ) @@ -219,8 +219,8 @@ class BasePipeline(PipelineDefinition, PipelineRun): """ -def group(*groups): - """Mark a function as part of a particular group.""" +def optional_step(*groups): + """Mark a step function as optional and part of a group.""" def decorator(obj): if hasattr(obj, "groups"): @@ -232,6 +232,17 @@ def decorator(obj): return decorator +def group(*groups): + """Backward compatibility.""" + warnings.warn( + "The `group` decorator is deprecated and will be " + "removed in a future release. Use `optional_step` instead.", + DeprecationWarning, + stacklevel=2, + ) + return optional_step(*groups) + + def humanize_time(seconds): """Convert the provided ``seconds`` number into human-readable time.""" message = f"{seconds:.0f} seconds" diff --git a/docs/built-in-pipelines.rst b/docs/built-in-pipelines.rst index 0b0971ac7..952e57741 100644 --- a/docs/built-in-pipelines.rst +++ b/docs/built-in-pipelines.rst @@ -6,6 +6,10 @@ Built-in Pipelines Pipelines in ScanCode.io are Python scripts that facilitate code analysis by executing a sequence of steps. The platform provides the following built-in pipelines: +.. note:: + Some pipelines have optional steps which are enabled only when they are + selected explicitly. + .. tip:: If you are unsure which pipeline suits your requirements best, check out the :ref:`faq_which_pipeline` section for guidance. diff --git a/docs/command-line-interface.rst b/docs/command-line-interface.rst index 08914b6aa..dd2bb6756 100644 --- a/docs/command-line-interface.rst +++ b/docs/command-line-interface.rst @@ -103,7 +103,7 @@ Optional arguments: - ``--pipeline PIPELINES`` Pipelines names to add on the project. .. tip:: - Use the "pipeline_name:group1,group2" syntax to select steps groups: + Use the "pipeline_name:option1,option2" syntax to select optional steps: ``--pipeline map_deploy_to_develop:Java,JavaScript`` @@ -230,7 +230,7 @@ add the docker pipeline to your project:: $ scanpipe add-pipeline --project foo analyze_docker_image .. tip:: - Use the "pipeline_name:group1,group2" syntax to select steps groups: + Use the "pipeline_name:option1,option2" syntax to select optional steps: ``--pipeline map_deploy_to_develop:Java,JavaScript`` @@ -417,7 +417,7 @@ For example, running the ``inspect_packages`` pipeline on a manifest file: $ run inspect_packages path/to/package.json > results.json -.. tip:: Use the "pipeline_name:group1,group2" syntax to select steps groups:: +.. tip:: Use the "pipeline_name:option1,option2" syntax to select optional steps:: $ run inspect_packages:StaticResolver package.json > results.json diff --git a/docs/rest-api.rst b/docs/rest-api.rst index 4593dd374..98eacad95 100644 --- a/docs/rest-api.rst +++ b/docs/rest-api.rst @@ -136,7 +136,7 @@ Using cURL: .. tip:: - Use the "pipeline_name:group1,group2" syntax to select steps groups: + Use the "pipeline_name:option1,option2" syntax to select optional steps: ``"pipeline": "map_deploy_to_develop:Java,JavaScript"`` @@ -293,7 +293,7 @@ Data: - ``execute_now``: ``true`` or ``false`` .. tip:: - Use the "pipeline_name:group1,group2" syntax to select steps groups: + Use the "pipeline_name:option1,option2" syntax to select optional steps: ``"pipeline": "map_deploy_to_develop:Java,JavaScript"`` diff --git a/scanpipe/management/commands/__init__.py b/scanpipe/management/commands/__init__.py index d7c9d6457..05dcbbb9a 100644 --- a/scanpipe/management/commands/__init__.py +++ b/scanpipe/management/commands/__init__.py @@ -229,7 +229,7 @@ def validate_copy_from(copy_from): def extract_group_from_pipelines(pipelines): """ - Add support for the ":group1,group2" suffix in pipeline data. + Add support for the ":option1,option2" suffix in pipeline data. For example: "map_deploy_to_develop:Java,JavaScript" """ diff --git a/scanpipe/management/commands/create-project.py b/scanpipe/management/commands/create-project.py index f0db4f84e..2a1e333e6 100644 --- a/scanpipe/management/commands/create-project.py +++ b/scanpipe/management/commands/create-project.py @@ -42,7 +42,8 @@ def add_arguments(self, parser): help=( "Pipelines names to add to the project. " "The pipelines are added and executed based on their given order. " - 'Groups can be provided using the "pipeline_name:group1,group2" syntax.' + 'Groups can be provided using the "pipeline_name:option1,option2" ' + "syntax." ), ) parser.add_argument( diff --git a/scanpipe/management/commands/run.py b/scanpipe/management/commands/run.py index fdcd0543b..83b5b4908 100644 --- a/scanpipe/management/commands/run.py +++ b/scanpipe/management/commands/run.py @@ -42,7 +42,8 @@ def add_arguments(self, parser): help=( "One or more pipeline to run. " "The pipelines executed based on their given order. " - 'Groups can be provided using the "pipeline_name:group1,group2" syntax.' + 'Groups can be provided using the "pipeline_name:option1,option2"' + " syntax." ), ) parser.add_argument( diff --git a/scanpipe/pipelines/deploy_to_develop.py b/scanpipe/pipelines/deploy_to_develop.py index 3b07795a4..ea74f93e8 100644 --- a/scanpipe/pipelines/deploy_to_develop.py +++ b/scanpipe/pipelines/deploy_to_develop.py @@ -20,7 +20,7 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/aboutcode-org/scancode.io for support and download. -from aboutcode.pipeline import group +from aboutcode.pipeline import optional_step from scanpipe import pipes from scanpipe.pipelines import Pipeline from scanpipe.pipes import d2d @@ -168,22 +168,22 @@ def match_archives_to_purldb(self): logger=self.log, ) - @group("Java") + @optional_step("Java") def find_java_packages(self): """Find the java package of the .java source files.""" d2d.find_java_packages(self.project, logger=self.log) - @group("Java") + @optional_step("Java") def map_java_to_class(self): """Map a .class compiled file to its .java source.""" d2d.map_java_to_class(project=self.project, logger=self.log) - @group("Java") + @optional_step("Java") def map_jar_to_source(self): """Map .jar files to their related source directory.""" d2d.map_jar_to_source(project=self.project, logger=self.log) - @group("JavaScript") + @optional_step("JavaScript") def map_javascript(self): """ Map a packed or minified JavaScript, TypeScript, CSS and SCSS @@ -191,12 +191,12 @@ def map_javascript(self): """ d2d.map_javascript(project=self.project, logger=self.log) - @group("Elf") + @optional_step("Elf") def map_elf(self): """Map ELF binaries to their sources.""" d2d.map_elfs(project=self.project, logger=self.log) - @group("Go") + @optional_step("Go") def map_go(self): """Map Go binaries to their sources.""" d2d.map_go_paths(project=self.project, logger=self.log) @@ -225,22 +225,22 @@ def match_resources_to_purldb(self): logger=self.log, ) - @group("JavaScript") + @optional_step("JavaScript") def map_javascript_post_purldb_match(self): """Map minified javascript file based on existing PurlDB match.""" d2d.map_javascript_post_purldb_match(project=self.project, logger=self.log) - @group("JavaScript") + @optional_step("JavaScript") def map_javascript_path(self): """Map javascript file based on path.""" d2d.map_javascript_path(project=self.project, logger=self.log) - @group("JavaScript") + @optional_step("JavaScript") def map_javascript_colocation(self): """Map JavaScript files based on neighborhood file mapping.""" d2d.map_javascript_colocation(project=self.project, logger=self.log) - @group("JavaScript") + @optional_step("JavaScript") def map_thirdparty_npm_packages(self): """Map thirdparty package using package.json metadata.""" d2d.map_thirdparty_npm_packages(project=self.project, logger=self.log) diff --git a/scanpipe/pipelines/inspect_packages.py b/scanpipe/pipelines/inspect_packages.py index 9d28c07cf..7674f7f25 100644 --- a/scanpipe/pipelines/inspect_packages.py +++ b/scanpipe/pipelines/inspect_packages.py @@ -20,7 +20,7 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/aboutcode-org/scancode.io for support and download. -from aboutcode.pipeline import group +from aboutcode.pipeline import optional_step from scanpipe.pipelines.scan_codebase import ScanCodebase from scanpipe.pipes import scancode @@ -65,7 +65,7 @@ def scan_for_application_packages(self): progress_logger=self.log, ) - @group("StaticResolver") + @optional_step("StaticResolver") def resolve_dependencies(self): """ Create packages and dependency relationships from diff --git a/scanpipe/pipelines/resolve_dependencies.py b/scanpipe/pipelines/resolve_dependencies.py index 781d1d639..89522d9c8 100644 --- a/scanpipe/pipelines/resolve_dependencies.py +++ b/scanpipe/pipelines/resolve_dependencies.py @@ -20,7 +20,7 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/aboutcode-org/scancode.io for support and download. -from aboutcode.pipeline import group +from aboutcode.pipeline import optional_step from scanpipe.pipelines.scan_codebase import ScanCodebase from scanpipe.pipes import resolve from scanpipe.pipes import scancode @@ -57,7 +57,7 @@ def get_manifest_inputs(self): """Locate package manifest files with a supported package resolver.""" self.manifest_resources = resolve.get_manifest_resources(self.project) - @group("StaticResolver") + @optional_step("StaticResolver") def scan_for_application_packages(self): """ Scan and assemble application packages from package manifests @@ -70,7 +70,7 @@ def scan_for_application_packages(self): progress_logger=self.log, ) - @group("StaticResolver") + @optional_step("StaticResolver") def create_packages_and_dependencies(self): """ Create the statically resolved packages and their dependencies @@ -78,7 +78,7 @@ def create_packages_and_dependencies(self): """ scancode.process_package_data(self.project, static_resolve=True) - @group("DynamicResolver") + @optional_step("DynamicResolver") def get_packages_from_manifest(self): """ Resolve package data from lockfiles/requirement files with package @@ -91,7 +91,7 @@ def get_packages_from_manifest(self): model="get_packages_from_manifest", ) - @group("DynamicResolver") + @optional_step("DynamicResolver") def create_resolved_packages(self): """ Create the dynamically resolved packages and their dependencies diff --git a/scanpipe/tests/data/d2d/about_files/expected.json b/scanpipe/tests/data/d2d/about_files/expected.json index fd5552c05..71bd269f7 100644 --- a/scanpipe/tests/data/d2d/about_files/expected.json +++ b/scanpipe/tests/data/d2d/about_files/expected.json @@ -21,7 +21,9 @@ { "pipeline_name": "map_deploy_to_develop", "status": "not_started", - "selected_groups": null, + "selected_groups": [ + "Java" + ], "selected_steps": null, "scancodeio_version": "", "task_id": null, diff --git a/scanpipe/tests/data/d2d/flume-ng-node-d2d.json b/scanpipe/tests/data/d2d/flume-ng-node-d2d.json index aa64ffe23..cac80687d 100644 --- a/scanpipe/tests/data/d2d/flume-ng-node-d2d.json +++ b/scanpipe/tests/data/d2d/flume-ng-node-d2d.json @@ -21,7 +21,9 @@ { "pipeline_name": "map_deploy_to_develop", "status": "not_started", - "selected_groups": null, + "selected_groups": [ + "Java" + ], "selected_steps": null, "scancodeio_version": "", "task_id": null, diff --git a/scanpipe/tests/pipelines/with_groups.py b/scanpipe/tests/pipelines/with_groups.py index 5fa8bace6..52719d6b9 100644 --- a/scanpipe/tests/pipelines/with_groups.py +++ b/scanpipe/tests/pipelines/with_groups.py @@ -20,7 +20,7 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. -from aboutcode.pipeline import group +from aboutcode.pipeline import optional_step from scanpipe.pipelines import Pipeline @@ -38,17 +38,17 @@ def steps(cls): cls.no_groups, ) - @group("foo", "bar") + @optional_step("foo", "bar") def grouped_with_foo_and_bar(self): """Step1 doc.""" pass - @group("bar") + @optional_step("bar") def grouped_with_bar(self): """Step2 doc.""" pass - @group("excluded") + @optional_step("excluded") def grouped_with_excluded(self): """Step2 doc.""" pass diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index d07a2c48e..d4cd06ade 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -151,7 +151,9 @@ def test_scanpipe_pipeline_class_execute_with_exception(self): @mock.patch("scanpipe.tests.pipelines.do_nothing.DoNothing.step2") def test_scanpipe_pipeline_class_execute_with_selected_steps(self, step2, step1): step1.__name__ = "step1" + step1.groups = [] step2.__name__ = "step2" + step2.groups = [] project1 = Project.objects.create(name="Analysis") run = project1.add_pipeline("do_nothing") @@ -347,17 +349,10 @@ def test_scanpipe_pipeline_class_get_steps(self): self.assertEqual(expected, str(cm.exception)) def test_scanpipe_pipeline_class_get_steps_with_groups(self): - expected = ( - WithGroups.grouped_with_foo_and_bar, - WithGroups.grouped_with_bar, - WithGroups.grouped_with_excluded, - WithGroups.no_groups, - ) - self.assertEqual(expected, WithGroups.get_steps()) - expected = (WithGroups.no_groups,) + self.assertEqual(expected, WithGroups.get_steps()) self.assertEqual(expected, WithGroups.get_steps(groups=[])) - self.assertEqual(expected, WithGroups.get_steps(groups=["not"])) + self.assertEqual(expected, WithGroups.get_steps(groups=["not_defined"])) expected = ( WithGroups.grouped_with_foo_and_bar, @@ -1242,8 +1237,11 @@ def test_scanpipe_find_vulnerabilities_pipeline_integration( def test_scanpipe_resolve_dependencies_pipeline_integration(self): pipeline_name = "resolve_dependencies" project1 = Project.objects.create(name="Analysis") + selected_groups = ["DynamicResolver"] - run = project1.add_pipeline(pipeline_name) + run = project1.add_pipeline( + pipeline_name=pipeline_name, selected_groups=selected_groups + ) pipeline = run.make_pipeline_instance() project1.move_input_from(tempfile.mkstemp()[1]) @@ -1257,8 +1255,11 @@ def test_scanpipe_resolve_dependencies_pipeline_integration(self): def test_scanpipe_resolve_dependencies_pipeline_integration_empty_manifest(self): pipeline_name = "resolve_dependencies" project1 = Project.objects.create(name="Analysis") + selected_groups = ["DynamicResolver"] - run = project1.add_pipeline(pipeline_name) + run = project1.add_pipeline( + pipeline_name=pipeline_name, selected_groups=selected_groups + ) pipeline = run.make_pipeline_instance() project1.move_input_from(tempfile.mkstemp(suffix="requirements.txt")[1]) @@ -1272,11 +1273,14 @@ def test_scanpipe_resolve_dependencies_pipeline_integration_empty_manifest(self) def test_scanpipe_resolve_dependencies_pipeline_integration_misc(self): pipeline_name = "resolve_dependencies" project1 = Project.objects.create(name="Analysis") + selected_groups = ["DynamicResolver"] input_location = self.data / "manifests" / "requirements.txt" project1.copy_input_from(input_location) - run = project1.add_pipeline(pipeline_name) + run = project1.add_pipeline( + pipeline_name=pipeline_name, selected_groups=selected_groups + ) pipeline = run.make_pipeline_instance() exitcode, out = pipeline.execute() @@ -1289,8 +1293,11 @@ def test_scanpipe_resolve_dependencies_pipeline_pypi_integration( ): pipeline_name = "resolve_dependencies" project1 = Project.objects.create(name="Analysis") + selected_groups = ["DynamicResolver"] - run = project1.add_pipeline(pipeline_name) + run = project1.add_pipeline( + pipeline_name=pipeline_name, selected_groups=selected_groups + ) pipeline = run.make_pipeline_instance() project1.move_input_from(tempfile.mkstemp(suffix="requirements.txt")[1]) @@ -1450,12 +1457,15 @@ def test_scanpipe_deploy_to_develop_pipeline_integration( mock_request.return_value = None pipeline_name = "map_deploy_to_develop" project1 = Project.objects.create(name="Analysis", uuid=forced_uuid) + selected_groups = ["Java"] jar_location = self.data / "d2d" / "jars" project1.copy_input_from(jar_location / "from-flume-ng-node-1.9.0.zip") project1.copy_input_from(jar_location / "to-flume-ng-node-1.9.0.zip") - run = project1.add_pipeline(pipeline_name) + run = project1.add_pipeline( + pipeline_name=pipeline_name, selected_groups=selected_groups + ) pipeline = run.make_pipeline_instance() exitcode, out = pipeline.execute() @@ -1504,12 +1514,15 @@ def test_scanpipe_deploy_to_develop_pipeline_with_about_file( mock_request.return_value = None pipeline_name = "map_deploy_to_develop" project1 = Project.objects.create(name="Analysis", uuid=forced_uuid) + selected_groups = ["Java"] data_dir = self.data / "d2d" / "about_files" project1.copy_input_from(data_dir / "from-with-about-file.zip") project1.copy_input_from(data_dir / "to-with-jar.zip") - run = project1.add_pipeline(pipeline_name) + run = project1.add_pipeline( + pipeline_name=pipeline_name, selected_groups=selected_groups + ) pipeline = run.make_pipeline_instance() exitcode, out = pipeline.execute()