diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5dc583811..bd7a0a20d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,9 @@ v34.6.2 (unreleased) - Add support for fetching Git repository as Project input. https://github.com/nexB/scancode.io/issues/921 +- Enhance the logging and reporting of input fetch exceptions. + https://github.com/nexB/scancode.io/issues/1257 + v34.6.1 (2024-06-07) -------------------- diff --git a/scanpipe/pipelines/__init__.py b/scanpipe/pipelines/__init__.py index 1f84d9d8b..e14fc2988 100644 --- a/scanpipe/pipelines/__init__.py +++ b/scanpipe/pipelines/__init__.py @@ -40,9 +40,19 @@ logger = logging.getLogger(__name__) -class InputFileError(Exception): +class InputFilesError(Exception): """InputFile is missing or cannot be downloaded.""" + def __init__(self, error_tracebacks): + self.error_tracebacks = error_tracebacks + super().__init__(self._generate_message()) + + def _generate_message(self): + message = "InputFilesError encountered with the following issues:\n" + for index, (error, tb) in enumerate(self.error_tracebacks, start=1): + message += f"\nError {index}: {str(error)}\n\n{tb}" + return message + def group(*groups): """Mark a function as part of a particular group.""" @@ -218,9 +228,9 @@ def execute(self): def download_missing_inputs(self): """ Download any InputSource missing on disk. - Raise an error if any of the uploaded files is not available. + Raise an error if any of the uploaded files is not available or not reachable. """ - errors = [] + error_tracebacks = [] for input_source in self.project.inputsources.all(): if input_source.exists(): @@ -229,18 +239,20 @@ def download_missing_inputs(self): if input_source.is_uploaded: msg = f"Uploaded file {input_source} not available." self.log(msg) - errors.append(msg) + error_tracebacks.append((msg, "No traceback available.")) continue self.log(f"Fetching input from {input_source.download_url}") try: input_source.fetch() except Exception as error: + traceback_str = traceback.format_exc() + logger.error(traceback_str) self.log(f"{input_source.download_url} could not be fetched.") - errors.append(error) + error_tracebacks.append((str(error), traceback_str)) - if errors: - raise InputFileError(errors) + if error_tracebacks: + raise InputFilesError(error_tracebacks) def add_error(self, exception, resource=None): """Create a ``ProjectMessage`` ERROR record on the current `project`.""" diff --git a/scanpipe/pipes/fetch.py b/scanpipe/pipes/fetch.py index a96bb4547..07a2a98fb 100644 --- a/scanpipe/pipes/fetch.py +++ b/scanpipe/pipes/fetch.py @@ -63,15 +63,22 @@ def run_command_safely(command_args): sure to sanitize and validate the input to prevent any malicious commands from being executed. - As ``check`` is True, if the exit code is non-zero, it raises a CalledProcessError. + Raise a SubprocessError if the exit code was non-zero. """ - result = subprocess.run( # nosec + completed_process = subprocess.run( # nosec command_args, capture_output=True, text=True, - check=True, ) - return result.stdout + + if completed_process.returncode: + error_msg = ( + f'Error while executing cmd="{completed_process.args}": ' + f'"{completed_process.stderr.strip()}"' + ) + raise subprocess.SubprocessError(error_msg) + + return completed_process.stdout def get_request_session(uri): diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index c551801fc..6570e8a7a 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -39,7 +39,7 @@ from scanpipe.models import CodebaseResource from scanpipe.models import DiscoveredPackage from scanpipe.models import Project -from scanpipe.pipelines import InputFileError +from scanpipe.pipelines import InputFilesError from scanpipe.pipelines import Pipeline from scanpipe.pipelines import is_pipeline from scanpipe.pipelines import root_filesystem @@ -166,11 +166,17 @@ def test_scanpipe_pipeline_class_download_missing_inputs(self, mock_get): filename=file_location.name, is_uploaded=True ) self.assertFalse(input_source.exists()) - with self.assertRaises(InputFileError) as error: + with self.assertRaises(InputFilesError) as error: pipeline.download_missing_inputs() - error_msg = "Uploaded file filename=notice.NOTICE [uploaded] not available." - self.assertEqual(f"['{error_msg}']", str(error.exception)) - self.assertIn(error_msg, run.log) + error_msg = ( + "InputFilesError encountered with the following issues:\n\n" + "Error 1: Uploaded file filename=notice.NOTICE [uploaded] not available." + "\n\nNo traceback available." + ) + self.assertEqual(error_msg, str(error.exception)) + self.assertIn( + "Uploaded file filename=notice.NOTICE [uploaded] not available.", run.log + ) project1.copy_input_from(file_location) self.assertTrue(input_source.exists()) @@ -191,6 +197,29 @@ def test_scanpipe_pipeline_class_download_missing_inputs(self, mock_get): self.assertTrue(input_source2.exists()) mock_get.assert_called_once() + @mock.patch("scanpipe.models.InputSource.fetch") + def test_scanpipe_pipeline_class_download_fetch_exception(self, mock_fetch): + project1 = Project.objects.create(name="Analysis") + run = project1.add_pipeline("do_nothing") + pipeline = run.make_pipeline_instance() + + mock_fetch.side_effect = Exception("File not found") + download_url = "https://download.url/file.zip" + project1.add_input_source(download_url=download_url) + + with self.assertRaises(InputFilesError) as error: + pipeline.download_missing_inputs() + self.assertIn( + "InputFilesError encountered with the following issues:", + str(error.exception), + ) + self.assertIn("Error 1: File not found", str(error.exception)) + self.assertIn("Traceback (most recent call last):", str(error.exception)) + self.assertIn("Exception: File not found", str(error.exception)) + + self.assertIn("Fetching input from https://download.url/file.zip", run.log) + self.assertIn("https://download.url/file.zip could not be fetched.", run.log) + @mock.patch("git.repo.base.Repo.clone_from") def test_scanpipe_pipeline_class_download_missing_inputs_git_repo(self, mock_clone): project1 = Project.objects.create(name="Analysis")