Skip to content

Commit

Permalink
Enhance the logging and reporting of input fetch exceptions #1257 (#1269
Browse files Browse the repository at this point in the history
)

Signed-off-by: tdruez <[email protected]>
  • Loading branch information
tdruez authored Jun 18, 2024
1 parent 98f0f1e commit 2389d3c
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 16 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ v34.6.2 (unreleased)
- Add support for fetching Git repository as Project input.
https://github.com/nexB/scancode.io/issues/921

- Enhance the logging and reporting of input fetch exceptions.
https://github.com/nexB/scancode.io/issues/1257

v34.6.1 (2024-06-07)
--------------------

Expand Down
26 changes: 19 additions & 7 deletions scanpipe/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,19 @@
logger = logging.getLogger(__name__)


class InputFileError(Exception):
class InputFilesError(Exception):
"""InputFile is missing or cannot be downloaded."""

def __init__(self, error_tracebacks):
self.error_tracebacks = error_tracebacks
super().__init__(self._generate_message())

def _generate_message(self):
message = "InputFilesError encountered with the following issues:\n"
for index, (error, tb) in enumerate(self.error_tracebacks, start=1):
message += f"\nError {index}: {str(error)}\n\n{tb}"
return message


def group(*groups):
"""Mark a function as part of a particular group."""
Expand Down Expand Up @@ -218,9 +228,9 @@ def execute(self):
def download_missing_inputs(self):
"""
Download any InputSource missing on disk.
Raise an error if any of the uploaded files is not available.
Raise an error if any of the uploaded files is not available or not reachable.
"""
errors = []
error_tracebacks = []

for input_source in self.project.inputsources.all():
if input_source.exists():
Expand All @@ -229,18 +239,20 @@ def download_missing_inputs(self):
if input_source.is_uploaded:
msg = f"Uploaded file {input_source} not available."
self.log(msg)
errors.append(msg)
error_tracebacks.append((msg, "No traceback available."))
continue

self.log(f"Fetching input from {input_source.download_url}")
try:
input_source.fetch()
except Exception as error:
traceback_str = traceback.format_exc()
logger.error(traceback_str)
self.log(f"{input_source.download_url} could not be fetched.")
errors.append(error)
error_tracebacks.append((str(error), traceback_str))

if errors:
raise InputFileError(errors)
if error_tracebacks:
raise InputFilesError(error_tracebacks)

def add_error(self, exception, resource=None):
"""Create a ``ProjectMessage`` ERROR record on the current `project`."""
Expand Down
15 changes: 11 additions & 4 deletions scanpipe/pipes/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,22 @@ def run_command_safely(command_args):
sure to sanitize and validate the input to prevent any malicious commands from
being executed.
As ``check`` is True, if the exit code is non-zero, it raises a CalledProcessError.
Raise a SubprocessError if the exit code was non-zero.
"""
result = subprocess.run( # nosec
completed_process = subprocess.run( # nosec
command_args,
capture_output=True,
text=True,
check=True,
)
return result.stdout

if completed_process.returncode:
error_msg = (
f'Error while executing cmd="{completed_process.args}": '
f'"{completed_process.stderr.strip()}"'
)
raise subprocess.SubprocessError(error_msg)

return completed_process.stdout


def get_request_session(uri):
Expand Down
39 changes: 34 additions & 5 deletions scanpipe/tests/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from scanpipe.models import CodebaseResource
from scanpipe.models import DiscoveredPackage
from scanpipe.models import Project
from scanpipe.pipelines import InputFileError
from scanpipe.pipelines import InputFilesError
from scanpipe.pipelines import Pipeline
from scanpipe.pipelines import is_pipeline
from scanpipe.pipelines import root_filesystem
Expand Down Expand Up @@ -166,11 +166,17 @@ def test_scanpipe_pipeline_class_download_missing_inputs(self, mock_get):
filename=file_location.name, is_uploaded=True
)
self.assertFalse(input_source.exists())
with self.assertRaises(InputFileError) as error:
with self.assertRaises(InputFilesError) as error:
pipeline.download_missing_inputs()
error_msg = "Uploaded file filename=notice.NOTICE [uploaded] not available."
self.assertEqual(f"['{error_msg}']", str(error.exception))
self.assertIn(error_msg, run.log)
error_msg = (
"InputFilesError encountered with the following issues:\n\n"
"Error 1: Uploaded file filename=notice.NOTICE [uploaded] not available."
"\n\nNo traceback available."
)
self.assertEqual(error_msg, str(error.exception))
self.assertIn(
"Uploaded file filename=notice.NOTICE [uploaded] not available.", run.log
)

project1.copy_input_from(file_location)
self.assertTrue(input_source.exists())
Expand All @@ -191,6 +197,29 @@ def test_scanpipe_pipeline_class_download_missing_inputs(self, mock_get):
self.assertTrue(input_source2.exists())
mock_get.assert_called_once()

@mock.patch("scanpipe.models.InputSource.fetch")
def test_scanpipe_pipeline_class_download_fetch_exception(self, mock_fetch):
project1 = Project.objects.create(name="Analysis")
run = project1.add_pipeline("do_nothing")
pipeline = run.make_pipeline_instance()

mock_fetch.side_effect = Exception("File not found")
download_url = "https://download.url/file.zip"
project1.add_input_source(download_url=download_url)

with self.assertRaises(InputFilesError) as error:
pipeline.download_missing_inputs()
self.assertIn(
"InputFilesError encountered with the following issues:",
str(error.exception),
)
self.assertIn("Error 1: File not found", str(error.exception))
self.assertIn("Traceback (most recent call last):", str(error.exception))
self.assertIn("Exception: File not found", str(error.exception))

self.assertIn("Fetching input from https://download.url/file.zip", run.log)
self.assertIn("https://download.url/file.zip could not be fetched.", run.log)

@mock.patch("git.repo.base.Repo.clone_from")
def test_scanpipe_pipeline_class_download_missing_inputs_git_repo(self, mock_clone):
project1 = Project.objects.create(name="Analysis")
Expand Down

0 comments on commit 2389d3c

Please sign in to comment.