diff --git a/docs/source/pages/developers_guide/apidoc/macaron.rst b/docs/source/pages/developers_guide/apidoc/macaron.rst index fa9eace38..01ce11712 100644 --- a/docs/source/pages/developers_guide/apidoc/macaron.rst +++ b/docs/source/pages/developers_guide/apidoc/macaron.rst @@ -25,6 +25,14 @@ Subpackages Submodules ---------- +macaron.environment\_variables module +------------------------------------- + +.. automodule:: macaron.environment_variables + :members: + :undoc-members: + :show-inheritance: + macaron.errors module --------------------- diff --git a/src/macaron/environment_variables.py b/src/macaron/environment_variables.py new file mode 100644 index 000000000..0bfbf2869 --- /dev/null +++ b/src/macaron/environment_variables.py @@ -0,0 +1,45 @@ +# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Helper functions related to environment variables.""" + +import os +from collections.abc import Mapping + + +def get_patched_env( + patch: Mapping[str, str | None], + _env: dict[str, str] | None = None, +) -> dict[str, str]: + """Return a dictionary whose elements copied from ``os.environ`` and are updated according to ``patch``. + + This function does not modify ``os.environ``. + + Parameters + ---------- + patch : Mapping[str, str | None] + A mapping (immutable) in which: + - each key is an environment variable. + - each value is the value to set to the corresponding environment variable. + If value is ``None``, the environment variable is "unset". + _env : dict[str, str] | None + The environment being updated. + This is ``None`` by default, in which case ``os.environ`` is being updated. + + Returns + ------- + dict[str, str] + The the dictionary contains the patched env variables. + """ + env = os.environ if _env is None else _env + + # Make a copy of the environment. + copied_env = dict(env) + + for var, value in patch.items(): + if value is None: + copied_env.pop(var, None) + else: + copied_env[var] = value + + return copied_env diff --git a/src/macaron/slsa_analyzer/git_url.py b/src/macaron/slsa_analyzer/git_url.py index 33e2e9a41..c44b053fa 100644 --- a/src/macaron/slsa_analyzer/git_url.py +++ b/src/macaron/slsa_analyzer/git_url.py @@ -8,8 +8,10 @@ import os import re import string +import subprocess # nosec B404 import urllib.parse from configparser import ConfigParser +from pathlib import Path from git import GitCommandError from git.objects import Commit @@ -17,6 +19,7 @@ from pydriller.git import Git from macaron.config.defaults import defaults +from macaron.environment_variables import get_patched_env from macaron.errors import CloneError logger: logging.Logger = logging.getLogger(__name__) @@ -235,6 +238,12 @@ def clone_remote_repo(clone_dir: str, url: str) -> Repo | None: This could happen when multiple runs of Macaron use the same ``, leading to Macaron potentially trying to clone a repository multiple times. + We use treeless partial clone to reduce clone time, by retrieving trees and blobs lazily. + For more details, see the following: + - https://git-scm.com/docs/partial-clone + - https://git-scm.com/docs/git-rev-list + - https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone + Parameters ---------- clone_dir : str @@ -268,20 +277,38 @@ def clone_remote_repo(clone_dir: str, url: str) -> Repo | None: ) return None + # Ensure that the parent directory where the repo is cloned into exists. + parent_dir = Path(clone_dir).parent + parent_dir.mkdir(parents=True, exist_ok=True) + try: - # The Repo.clone_from method handles creating intermediate dirs. - return Repo.clone_from( - url=url, - to_path=clone_dir, - env={ - # Setting the GIT_TERMINAL_PROMPT environment variable to ``0`` stops - # ``git clone`` from prompting for login credentials. - "GIT_TERMINAL_PROMPT": "0", - }, + git_env_patch = { + # Setting the GIT_TERMINAL_PROMPT environment variable to ``0`` stops + # ``git clone`` from prompting for login credentials. + "GIT_TERMINAL_PROMPT": "0", + } + result = subprocess.run( # nosec B603 + args=["git", "clone", "--filter=tree:0", url], + capture_output=True, + cwd=parent_dir, + # If `check=True` and return status code is not zero, subprocess.CalledProcessError is + # raised, which we don't want. We want to check the return status code of the subprocess + # later on. + check=False, + env=get_patched_env(git_env_patch), ) - except GitCommandError as error: - # stderr here does not contain secrets, so it is safe for logging. - raise CloneError(error.stderr) from None + except (subprocess.CalledProcessError, OSError): + # Here, we raise from ``None`` to be extra-safe that no token is leaked. + # We should never store or print out the captured output from the subprocess + # because they might contain the secret-embedded URL. + raise CloneError("Failed to clone repository.") from None + + if result.returncode != 0: + raise CloneError( + "Failed to clone repository: the `git clone --filter=tree:0` command exited with non-zero return code." + ) + + return Repo(path=clone_dir) def get_repo_name_from_url(url: str) -> str: diff --git a/tests/test_environment_variables.py b/tests/test_environment_variables.py new file mode 100644 index 000000000..0964a50d2 --- /dev/null +++ b/tests/test_environment_variables.py @@ -0,0 +1,51 @@ +# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for helper functions related to environment variables.""" + +import pytest + +from macaron.environment_variables import get_patched_env + + +@pytest.mark.parametrize( + ("before", "patch", "expect"), + [ + pytest.param( + {"FOO": "some-value"}, + {}, + {"FOO": "some-value"}, + id="patch is empty", + ), + pytest.param( + {"FOO": "some-value"}, + {"GIT_TERMINAL_PROMPT": "0"}, + { + "FOO": "some-value", + "GIT_TERMINAL_PROMPT": "0", + }, + id="patch adding a variable", + ), + pytest.param( + {"GIT_TERMINAL_PROMPT": "1"}, + {"GIT_TERMINAL_PROMPT": "0"}, + {"GIT_TERMINAL_PROMPT": "0"}, + id="patch overriding a variable", + ), + pytest.param( + {"GIT_TERMINAL_PROMPT": "0"}, + {"GIT_TERMINAL_PROMPT": None}, + {}, + id="patch removing a variable", + ), + ], +) +def test_patched_env( + before: dict[str, str], + patch: dict[str, str | None], + expect: dict[str, str], +) -> None: + """Tests for the ``get_patched_env`` helper function.""" + env = dict(before) + + assert get_patched_env(patch, env) == expect