diff --git a/docs/source/pages/developers_guide/apidoc/macaron.repo_finder.rst b/docs/source/pages/developers_guide/apidoc/macaron.repo_finder.rst
index 1fc7aca3b..724c2614f 100644
--- a/docs/source/pages/developers_guide/apidoc/macaron.repo_finder.rst
+++ b/docs/source/pages/developers_guide/apidoc/macaron.repo_finder.rst
@@ -57,6 +57,14 @@ macaron.repo\_finder.repo\_finder\_deps\_dev module
:undoc-members:
:show-inheritance:
+macaron.repo\_finder.repo\_finder\_enums module
+-----------------------------------------------
+
+.. automodule:: macaron.repo_finder.repo_finder_enums
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
macaron.repo\_finder.repo\_finder\_java module
----------------------------------------------
diff --git a/src/macaron/database/table_definitions.py b/src/macaron/database/table_definitions.py
index 035df8f31..669a650d5 100644
--- a/src/macaron/database/table_definitions.py
+++ b/src/macaron/database/table_definitions.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
"""
@@ -36,6 +36,7 @@
from macaron.database.database_manager import ORMBase
from macaron.database.db_custom_types import RFC3339DateTime
from macaron.errors import InvalidPURLError
+from macaron.repo_finder.repo_finder_enums import CommitFinderOutcome, RepoFinderOutcome
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, ProvenanceSubjectPURLMatcher
from macaron.slsa_analyzer.slsa_req import ReqName
@@ -177,7 +178,12 @@ class Component(PackageURLMixin, ORMBase):
lazy="immediate",
)
- def __init__(self, purl: str, analysis: Analysis, repository: "Repository | None"):
+ #: The one-to-one relationship with Repo Finder metadata.
+ repo_finder_metadata: Mapped["RepoFinderMetadata"] = relationship(back_populates="component", lazy="immediate")
+
+ def __init__(
+ self, purl: str, analysis: Analysis, repository: "Repository | None", repo_finder_metadata: "RepoFinderMetadata"
+ ):
"""
Instantiate the software component using PURL identifier.
@@ -204,7 +210,13 @@ def __init__(self, purl: str, analysis: Analysis, repository: "Repository | None
# TODO: Explore the ``dbm`` or ``shelve`` packages to support dict type, which are part of the Python standard library.
purl_kwargs = purl_parts.to_dict(encode=True)
- super().__init__(purl=purl, analysis=analysis, repository=repository, **purl_kwargs)
+ super().__init__(
+ purl=purl,
+ analysis=analysis,
+ repository=repository,
+ repo_finder_metadata=repo_finder_metadata,
+ **purl_kwargs,
+ )
@property
def report_file_name(self) -> str:
@@ -605,3 +617,34 @@ def from_purl_and_provenance(
return cls(sha256=sha256)
return None
+
+
+class RepoFinderMetadata(ORMBase):
+ """Metadata from the Repo Finder and Commit Finder runs for an associated Component."""
+
+ __tablename__ = "_repo_finder_metadata"
+
+ #: The primary key.
+ id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003
+
+ #: The foreign key to the software component.
+ component_id: Mapped[int] = mapped_column(Integer, ForeignKey(Component.id), nullable=False)
+
+ #: A one-to-one relationship with software components.
+ component: Mapped["Component"] = relationship(back_populates="repo_finder_metadata")
+
+ #: The outcome of the Repo Finder.
+ repo_finder_outcome: Mapped[Enum] = mapped_column(
+ Enum(RepoFinderOutcome), nullable=False # pylint: disable=protected-access,no-member
+ )
+
+ #: The outcome of the Commit Finder.
+ commit_finder_outcome: Mapped[Enum] = mapped_column(
+ Enum(CommitFinderOutcome), nullable=False # pylint: disable=protected-access,no-member
+ )
+
+ #: The URL found by the Repo Finder (if applicable).
+ found_url: Mapped[str] = mapped_column(String)
+
+ #: The commit of the tag matched by the Commit Finder.
+ found_commit: Mapped[str] = mapped_column(String)
diff --git a/src/macaron/dependency_analyzer/cyclonedx.py b/src/macaron/dependency_analyzer/cyclonedx.py
index d1bd93eac..1ebc930d2 100644
--- a/src/macaron/dependency_analyzer/cyclonedx.py
+++ b/src/macaron/dependency_analyzer/cyclonedx.py
@@ -30,6 +30,7 @@
from macaron.errors import CycloneDXParserError, DependencyAnalyzerError
from macaron.output_reporter.scm import SCMStatus
from macaron.repo_finder.repo_finder import find_repo
+from macaron.repo_finder.repo_finder_enums import RepoFinderOutcome
from macaron.repo_finder.repo_validator import find_valid_repository_url
logger: logging.Logger = logging.getLogger(__name__)
@@ -465,12 +466,12 @@ def _resolve_more_dependencies(dependencies: dict[str, DependencyInfo]) -> None:
for item in dependencies.values():
if item["available"] != SCMStatus.MISSING_SCM:
continue
-
- item["url"] = find_repo(item["purl"])
- if item["url"] == "":
+ url, outcome = find_repo(item["purl"])
+ if outcome not in {RepoFinderOutcome.FOUND, RepoFinderOutcome.FOUND_FROM_PARENT}:
logger.debug("Failed to find url for purl: %s", item["purl"])
else:
# TODO decide how to handle possible duplicates here
+ item["url"] = url
item["available"] = SCMStatus.AVAILABLE
item["note"] = ""
diff --git a/src/macaron/repo_finder/commit_finder.py b/src/macaron/repo_finder/commit_finder.py
index a637c2aaf..2850ab998 100644
--- a/src/macaron/repo_finder/commit_finder.py
+++ b/src/macaron/repo_finder/commit_finder.py
@@ -13,6 +13,7 @@
from pydriller import Commit, Git
from macaron.repo_finder import repo_finder_deps_dev, to_domain_from_known_purl_types
+from macaron.repo_finder.repo_finder_enums import CommitFinderOutcome
from macaron.slsa_analyzer.git_service import GIT_SERVICES
logger: logging.Logger = logging.getLogger(__name__)
@@ -121,7 +122,7 @@ class AbstractPurlType(Enum):
UNSUPPORTED = (2,)
-def find_commit(git_obj: Git, purl: PackageURL) -> str | None:
+def find_commit(git_obj: Git, purl: PackageURL) -> tuple[str | None, CommitFinderOutcome]:
"""Try to find the commit matching the passed PURL.
The PURL may be a repository type, e.g. GitHub, in which case the commit might be in its version part.
@@ -137,13 +138,13 @@ def find_commit(git_obj: Git, purl: PackageURL) -> str | None:
Returns
-------
- str | None
- The digest, or None if the commit cannot be correctly retrieved.
+ tuple[str | None, CommitFinderOutcome]
+ The digest, or None if the commit cannot be correctly retrieved, and the outcome to report.
"""
version = purl.version
if not version:
logger.debug("Missing version for analysis target: %s", purl.name)
- return None
+ return None, CommitFinderOutcome.NO_VERSION_PROVIDED
repo_type = determine_abstract_purl_type(purl)
if repo_type == AbstractPurlType.REPOSITORY:
@@ -151,7 +152,7 @@ def find_commit(git_obj: Git, purl: PackageURL) -> str | None:
if repo_type == AbstractPurlType.ARTIFACT:
return find_commit_from_version_and_name(git_obj, purl.name, version)
logger.debug("Type of PURL is not supported for commit finding: %s", purl.type)
- return None
+ return None, CommitFinderOutcome.UNSUPPORTED_PURL_TYPE
def determine_abstract_purl_type(purl: PackageURL) -> AbstractPurlType:
@@ -181,7 +182,7 @@ def determine_abstract_purl_type(purl: PackageURL) -> AbstractPurlType:
return AbstractPurlType.UNSUPPORTED
-def extract_commit_from_version(git_obj: Git, version: str) -> str | None:
+def extract_commit_from_version(git_obj: Git, version: str) -> tuple[str | None, CommitFinderOutcome]:
"""Try to extract the commit from the PURL's version parameter.
E.g.
@@ -197,8 +198,8 @@ def extract_commit_from_version(git_obj: Git, version: str) -> str | None:
Returns
-------
- str | None
- The digest, or None if the commit cannot be correctly retrieved.
+ tuple[str | None, CommitFinderOutcome]
+ The digest, or None if the commit cannot be correctly retrieved, and the outcome to report.
"""
# A commit hash is 40 characters in length, but commits are often referenced using only some of those.
commit: Commit | None = None
@@ -218,12 +219,12 @@ def extract_commit_from_version(git_obj: Git, version: str) -> str | None:
logger.debug("Failed to retrieve commit: %s", error)
if not commit:
- return None
+ return None, CommitFinderOutcome.REPO_PURL_FAILURE
- return commit.hash if commit else None
+ return commit.hash if commit else None, CommitFinderOutcome.MATCHED
-def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) -> str | None:
+def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) -> tuple[str | None, CommitFinderOutcome]:
"""Try to find the matching commit in a repository of a given version (and name) via tags.
The passed version is used to match with the tags in the target repository. The passed name is used in cases where
@@ -240,14 +241,19 @@ def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) ->
Returns
-------
- str | None
- The digest, or None if the commit cannot be correctly retrieved.
+ tuple[str | None, CommitFinderOutcome]
+ The digest, or None if the commit cannot be correctly retrieved, and the outcome to report.
"""
logger.debug("Searching for commit of artifact version using tags: %s@%s", name, version)
# Only consider tags that have a commit.
+ repo_tags = git_obj.repo.tags
+ if not repo_tags:
+ logger.debug("No tags found for %s", name)
+ return None, CommitFinderOutcome.NO_TAGS
+
valid_tags = {}
- for tag in git_obj.repo.tags:
+ for tag in repo_tags:
commit = _get_tag_commit(tag)
if not commit:
logger.debug("No commit found for tag: %s", tag)
@@ -258,14 +264,14 @@ def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) ->
if not valid_tags:
logger.debug("No tags with commits found for %s", name)
- return None
+ return None, CommitFinderOutcome.NO_TAGS_WITH_COMMITS
# Match tags.
- matched_tags = match_tags(list(valid_tags.keys()), name, version)
+ matched_tags, outcome = match_tags(list(valid_tags.keys()), name, version)
if not matched_tags:
logger.debug("No tags matched for %s", name)
- return None
+ return None, outcome
if len(matched_tags) > 1:
logger.debug("Tags found for %s: %s", name, len(matched_tags))
@@ -282,7 +288,7 @@ def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) ->
hexsha = tag.commit.hexsha
except ValueError:
logger.debug("Error trying to retrieve digest of commit: %s", tag.commit)
- return None
+ return None, CommitFinderOutcome.NO_TAG_COMMIT
logger.debug(
"Found tag %s with commit %s for artifact version %s@%s",
@@ -291,7 +297,7 @@ def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) ->
name,
version,
)
- return hexsha if hexsha else None
+ return hexsha if hexsha else None, CommitFinderOutcome.MATCHED
def _split_name(name: str) -> list[str]:
@@ -349,7 +355,7 @@ def _split_separators(version: str) -> list[str]:
return [item for item in split if item]
-def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, list[str]]:
+def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, list[str], CommitFinderOutcome]:
"""Build a version pattern to match the passed version string.
Parameters
@@ -362,12 +368,12 @@ def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, lis
Returns
-------
tuple[Pattern | None, list[str]]
- The tuple of the regex pattern that will match the version, and the list of version parts that were extracted.
- If an exception occurs from any regex operation, the pattern will be returned as None.
+ The tuple of the regex pattern that will match the version, the list of version parts that were extracted, and
+ the outcome to report. If an exception occurs from any regex operation, the pattern will be returned as None.
"""
if not version:
- return None, []
+ return None, [], CommitFinderOutcome.NO_VERSION_PROVIDED
# Escape input to prevent it being treated as regex.
name = re.escape(name)
@@ -376,7 +382,7 @@ def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, lis
if not parts:
logger.debug("Version contained no valid parts: %s", version)
- return None, []
+ return None, [], CommitFinderOutcome.INVALID_PURL
logger.debug("Final version parts: %s", parts)
@@ -470,14 +476,14 @@ def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, lis
# Compile the pattern.
try:
- return re.compile(this_version_pattern, flags=re.IGNORECASE), parts
+ return re.compile(this_version_pattern, flags=re.IGNORECASE), parts, CommitFinderOutcome.MATCHED
except Exception as error: # pylint: disable=broad-exception-caught
# The regex library uses an internal error that cannot be used here to satisfy pylint.
logger.debug("Error while compiling version regex: %s", error)
- return None, []
+ return None, [], CommitFinderOutcome.REGEX_COMPILE_FAILURE
-def match_tags(tag_list: list[str], name: str, version: str) -> list[str]:
+def match_tags(tag_list: list[str], name: str, version: str) -> tuple[list[str], CommitFinderOutcome]:
"""Return items of the passed tag list that match the passed artifact name and version.
Parameters
@@ -491,8 +497,8 @@ def match_tags(tag_list: list[str], name: str, version: str) -> list[str]:
Returns
-------
- list[str]
- The list of tags that matched the pattern.
+ tuple[list[str], CommitFinderOutcome]
+ The list of tags that matched the pattern, if any, and the outcome to report.
"""
logger.debug("Tag Sample: %s", tag_list[:5])
@@ -518,14 +524,14 @@ def match_tags(tag_list: list[str], name: str, version: str) -> list[str]:
if match.group(1):
prefix_match = tag
if prefix_match:
- return [prefix_match]
+ return [prefix_match], CommitFinderOutcome.MATCHED
if last_match:
- return [last_match]
+ return [last_match], CommitFinderOutcome.MATCHED
# Create the more complicated pattern for the passed version.
- pattern, parts = _build_version_pattern(name, version)
+ pattern, parts, outcome = _build_version_pattern(name, version)
if not pattern:
- return []
+ return [], outcome
# Match the tags.
matched_tags = []
@@ -546,8 +552,12 @@ def match_tags(tag_list: list[str], name: str, version: str) -> list[str]:
matched_tags = _fix_misaligned_tag_matches(matched_tags, version)
- if len(matched_tags) <= 1:
- return [_["tag"] for _ in matched_tags]
+ if not matched_tags:
+ logger.debug("Failed to match any tags.")
+ return [], CommitFinderOutcome.NO_TAGS_MATCHED
+
+ if len(matched_tags) == 1:
+ return [_["tag"] for _ in matched_tags], CommitFinderOutcome.MATCHED
# In the case of multiple matches, further work must be done.
@@ -588,7 +598,7 @@ def match_tags(tag_list: list[str], name: str, version: str) -> list[str]:
)
)
- return [_["tag"] for _ in matched_tags]
+ return [_["tag"] for _ in matched_tags], CommitFinderOutcome.MATCHED
def _fix_misaligned_tag_matches(matched_tags: list[dict[str, str]], version: str) -> list[dict[str, str]]:
diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py
index 7b446c00e..84c57ed3b 100644
--- a/src/macaron/repo_finder/provenance_extractor.py
+++ b/src/macaron/repo_finder/provenance_extractor.py
@@ -323,7 +323,7 @@ def check_if_input_purl_provenance_conflict(
# Check the PURL commit against the provenance.
if not digest_input and provenance_commit_digest and purl.version:
- purl_commit = extract_commit_from_version(git_obj, purl.version)
+ purl_commit, _ = extract_commit_from_version(git_obj, purl.version)
if purl_commit and purl_commit != provenance_commit_digest:
logger.debug(
"The commit digest passed via purl input does not match what exists in the "
diff --git a/src/macaron/repo_finder/repo_finder.py b/src/macaron/repo_finder/repo_finder.py
index 29b114a11..8be3d219f 100644
--- a/src/macaron/repo_finder/repo_finder.py
+++ b/src/macaron/repo_finder/repo_finder.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
"""
@@ -47,6 +47,7 @@
from macaron.repo_finder.commit_finder import find_commit, match_tags
from macaron.repo_finder.repo_finder_base import BaseRepoFinder
from macaron.repo_finder.repo_finder_deps_dev import DepsDevRepoFinder
+from macaron.repo_finder.repo_finder_enums import CommitFinderOutcome, RepoFinderOutcome
from macaron.repo_finder.repo_finder_java import JavaRepoFinder
from macaron.repo_finder.repo_utils import (
check_repo_urls_are_equivalent,
@@ -69,7 +70,7 @@
logger: logging.Logger = logging.getLogger(__name__)
-def find_repo(purl: PackageURL, check_latest_version: bool = True) -> str:
+def find_repo(purl: PackageURL, check_latest_version: bool = True) -> tuple[str, RepoFinderOutcome]:
"""Retrieve the repository URL that matches the given PURL.
Parameters
@@ -81,8 +82,8 @@ def find_repo(purl: PackageURL, check_latest_version: bool = True) -> str:
Returns
-------
- str :
- The repository URL found for the passed package.
+ tuple[str, RepoFinderOutcome] :
+ The repository URL for the passed package, if found, and the outcome to report.
"""
repo_finder: BaseRepoFinder
if purl.type == "maven":
@@ -96,26 +97,26 @@ def find_repo(purl: PackageURL, check_latest_version: bool = True) -> str:
repo_finder = DepsDevRepoFinder()
else:
logger.debug("No Repo Finder found for package type: %s of %s", purl.type, purl)
- return ""
+ return "", RepoFinderOutcome.UNSUPPORTED_PACKAGE_TYPE
# Call Repo Finder and return first valid URL
logger.debug("Analyzing %s with Repo Finder: %s", purl, type(repo_finder))
- found_repo = repo_finder.find_repo(purl)
+ found_repo, outcome = repo_finder.find_repo(purl)
if found_repo or not check_latest_version:
- return found_repo
+ return found_repo, outcome
# Try to find the latest version repo.
logger.error("Could not find repo for PURL: %s", purl)
latest_version_purl = get_latest_purl_if_different(purl)
if not latest_version_purl:
logger.debug("Could not find newer PURL than provided: %s", purl)
- return ""
+ return "", RepoFinderOutcome.NO_NEWER_VERSION
- found_repo = DepsDevRepoFinder().find_repo(latest_version_purl)
+ found_repo, outcome = DepsDevRepoFinder().find_repo(latest_version_purl)
if not found_repo:
logger.debug("Could not find repo from latest version of PURL: %s", latest_version_purl)
- return found_repo
+ return found_repo, outcome
def to_repo_path(purl: PackageURL, available_domains: list[str]) -> str | None:
@@ -147,7 +148,7 @@ def to_repo_path(purl: PackageURL, available_domains: list[str]) -> str | None:
"""
domain = to_domain_from_known_purl_types(purl.type) or (purl.type if purl.type in available_domains else None)
if not domain:
- logger.info("The PURL type of %s is not valid as a repository type. Trying to find the repository...", purl)
+ logger.info("The PURL type of %s is not valid as a repository type.", purl)
return None
if not purl.namespace:
@@ -205,7 +206,7 @@ def find_source(purl_string: str, input_repo: str | None, latest_version_fallbac
found_repo = input_repo
if not found_repo:
logger.debug("Searching for repo of PURL: %s", purl)
- found_repo = find_repo(purl)
+ found_repo, _ = find_repo(purl)
if not found_repo:
logger.error("Could not find repo for PURL: %s", purl)
@@ -214,31 +215,29 @@ def find_source(purl_string: str, input_repo: str | None, latest_version_fallbac
# Disable other loggers for cleaner output.
logging.getLogger("macaron.slsa_analyzer.analyzer").disabled = True
- digest = None
if defaults.getboolean("repofinder", "find_source_should_clone"):
# Clone the repo to retrieve the tags.
logger.debug("Preparing repo: %s", found_repo)
repo_dir = os.path.join(global_config.output_path, GIT_REPOS_DIR)
logging.getLogger("macaron.slsa_analyzer.git_url").disabled = True
# The prepare_repo function will also check the latest version of the artifact if required.
- git_obj = prepare_repo(repo_dir, found_repo, purl=purl, latest_version_fallback=not checked_latest_purl)
-
- if git_obj:
- try:
- digest = git_obj.get_head().hash
- except ValueError:
- logger.debug("Could not retrieve commit hash from repository.")
+ _, _, digest = prepare_repo(repo_dir, found_repo, purl=purl, latest_version_fallback=not checked_latest_purl)
if not digest:
return False
else:
# Retrieve the tags using a remote git operation.
tags = get_tags_via_git_remote(found_repo)
- if tags:
- matches = match_tags(list(tags.keys()), purl.name, purl.version)
- if matches:
- matched_tag = matches[0]
- digest = tags[matched_tag]
+ if not tags:
+ return False
+
+ matches, _ = match_tags(list(tags.keys()), purl.name, purl.version)
+
+ if not matches:
+ return False
+
+ matched_tag = matches[0]
+ digest = tags[matched_tag]
if not digest:
logger.error("Could not find commit for purl / repository: %s / %s", purl, found_repo)
@@ -286,7 +285,7 @@ def get_latest_purl_if_different(purl: PackageURL) -> PackageURL | None:
else:
no_version_purl = purl
- latest_version_purl = DepsDevRepoFinder.get_latest_version(no_version_purl)
+ latest_version_purl, _ = DepsDevRepoFinder.get_latest_version(no_version_purl)
if not latest_version_purl:
logger.error("Latest version PURL could not be found.")
return None
@@ -314,7 +313,7 @@ def get_latest_repo_if_different(latest_version_purl: PackageURL, original_repo:
str
The latest repository, or an empty string if not found.
"""
- latest_repo = find_repo(latest_version_purl, False)
+ latest_repo, _ = find_repo(latest_version_purl, False)
if not latest_repo:
logger.error("Could not find repository from latest PURL: %s", latest_version_purl)
return ""
@@ -379,7 +378,7 @@ def prepare_repo(
digest: str = "",
purl: PackageURL | None = None,
latest_version_fallback: bool = True,
-) -> Git | None:
+) -> tuple[Git | None, CommitFinderOutcome, str]:
"""Prepare the target repository for analysis.
If ``repo_path`` is a remote path, the target repo is cloned to ``{target_dir}/{unique_path}``.
@@ -407,8 +406,9 @@ def prepare_repo(
Returns
-------
- Git | None
- The pydriller.Git object of the repository or None if error.
+ tuple[Git | None, CommitFinderOutcome, str]
+ The pydriller.Git object of the repository or None if error; the outcome of the Commit Finder; and the final
+ digest.
"""
# TODO: separate the logic for handling remote and local repos instead of putting them into this method.
logger.info(
@@ -418,15 +418,15 @@ def prepare_repo(
digest,
)
- resolved_local_path = ""
is_remote = is_remote_repo(repo_path)
+ commit_finder_outcome = CommitFinderOutcome.NOT_USED
if is_remote:
logger.info("The path to repo %s is a remote path.", repo_path)
resolved_remote_path = get_remote_vcs_url(repo_path)
if not resolved_remote_path:
logger.error("The provided path to repo %s is not a valid remote path.", repo_path)
- return None
+ return None, commit_finder_outcome, digest
git_service = get_git_service(resolved_remote_path)
repo_unique_path = get_repo_dir_name(resolved_remote_path)
@@ -436,7 +436,7 @@ def prepare_repo(
git_service.clone_repo(resolved_local_path, resolved_remote_path)
except CloneError as error:
logger.error("Cannot clone %s: %s", resolved_remote_path, str(error))
- return None
+ return None, commit_finder_outcome, digest
else:
logger.info("Checking if the path to repo %s is a local path.", repo_path)
resolved_local_path = resolve_local_path(get_local_repos_path(), repo_path)
@@ -446,29 +446,29 @@ def prepare_repo(
git_obj = Git(resolved_local_path)
except InvalidGitRepositoryError:
logger.error("No git repo exists at %s.", resolved_local_path)
- return None
+ return None, commit_finder_outcome, digest
else:
logger.error("Error happened while preparing the repo.")
- return None
+ return None, commit_finder_outcome, digest
if is_empty_repo(git_obj):
logger.error("The target repository does not have any commit.")
- return None
+ return None, commit_finder_outcome, digest
# Find the digest and branch if a version has been specified
if not digest and purl and purl.version:
- found_digest = find_commit(git_obj, purl)
+ found_digest, commit_finder_outcome = find_commit(git_obj, purl)
if not found_digest:
logger.error("Could not map the input purl string to a specific commit in the corresponding repository.")
if not latest_version_fallback:
- return None
+ return None, commit_finder_outcome, digest
# If the commit could not be found, check if the latest version of the artifact has a different repository.
latest_purl = get_latest_purl_if_different(purl)
if not latest_purl:
- return None
+ return None, commit_finder_outcome, digest
latest_repo = get_latest_repo_if_different(latest_purl, repo_path)
if not latest_repo:
- return None
+ return None, commit_finder_outcome, digest
return prepare_repo(latest_repo, latest_repo, target_dir, latest_version_fallback=False)
digest = found_digest
@@ -490,15 +490,15 @@ def prepare_repo(
# ``git_url.check_out_repo_target``.
if not check_out_repo_target(git_obj, branch_name, digest, not is_remote):
logger.error("Cannot checkout the specific branch or commit of the target repo.")
- return None
+ return None, commit_finder_outcome, digest
- return git_obj
+ return git_obj, commit_finder_outcome, digest
try:
git_service.check_out_repo(git_obj, branch_name, digest, not is_remote)
except RepoCheckOutError as error:
logger.error("Failed to check out repository at %s", resolved_local_path)
logger.error(error)
- return None
+ return None, commit_finder_outcome, digest
- return git_obj
+ return git_obj, commit_finder_outcome, digest
diff --git a/src/macaron/repo_finder/repo_finder_base.py b/src/macaron/repo_finder/repo_finder_base.py
index ba177c89f..1e82aa475 100644
--- a/src/macaron/repo_finder/repo_finder_base.py
+++ b/src/macaron/repo_finder/repo_finder_base.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
"""This module contains the base class for the repo finders."""
@@ -7,12 +7,14 @@
from packageurl import PackageURL
+from macaron.repo_finder.repo_finder_enums import RepoFinderOutcome
+
class BaseRepoFinder(ABC):
"""This abstract class is used to represent Repository Finders."""
@abstractmethod
- def find_repo(self, purl: PackageURL) -> str:
+ def find_repo(self, purl: PackageURL) -> tuple[str, RepoFinderOutcome]:
"""
Generate iterator from _find_repo that attempts to retrieve a repository URL that matches the passed artifact.
@@ -23,6 +25,6 @@ def find_repo(self, purl: PackageURL) -> str:
Returns
-------
- str :
- The URL of the found repository.
+ tuple[str, RepoFinderOutcome] :
+ A tuple of the found URL (or an empty string), and the outcome of the Repo Finder.
"""
diff --git a/src/macaron/repo_finder/repo_finder_deps_dev.py b/src/macaron/repo_finder/repo_finder_deps_dev.py
index d66aaaebf..c2aaf61ef 100644
--- a/src/macaron/repo_finder/repo_finder_deps_dev.py
+++ b/src/macaron/repo_finder/repo_finder_deps_dev.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
"""This module contains the PythonRepoFinderDD class to be used for finding repositories using deps.dev."""
@@ -12,6 +12,7 @@
from macaron.json_tools import json_extract
from macaron.repo_finder.repo_finder_base import BaseRepoFinder
+from macaron.repo_finder.repo_finder_enums import RepoFinderOutcome
from macaron.repo_finder.repo_validator import find_valid_repository_url
from macaron.slsa_analyzer.git_url import clean_url
from macaron.util import send_get_http_raw
@@ -39,7 +40,7 @@ class DepsDevRepoFinder(BaseRepoFinder):
# See https://docs.deps.dev/api/v3alpha/
BASE_URL = "https://api.deps.dev/v3alpha/purl/"
- def find_repo(self, purl: PackageURL) -> str:
+ def find_repo(self, purl: PackageURL) -> tuple[str, RepoFinderOutcome]:
"""
Attempt to retrieve a repository URL that matches the passed artifact.
@@ -50,31 +51,31 @@ def find_repo(self, purl: PackageURL) -> str:
Returns
-------
- str :
- The URL of the found repository.
+ tuple[str, RepoFinderOutcome] :
+ A tuple of the found URL (or an empty string), and the outcome of the Repo Finder.
"""
- request_urls = self._create_urls(purl)
+ request_urls, outcome = self._create_urls(purl)
if not request_urls:
logger.debug("No urls found for: %s", purl)
- return ""
+ return "", outcome
json_data = self._retrieve_json(request_urls[0])
if not json_data:
logger.debug("Failed to retrieve json data for: %s", purl)
- return ""
+ return "", RepoFinderOutcome.DDEV_JSON_FETCH_ERROR
- urls = self._read_json(json_data)
+ urls, outcome = self._read_json(json_data)
if not urls:
logger.debug("Failed to extract repository URLs from json data: %s", purl)
- return ""
+ return "", outcome
logger.debug("Found %s urls: %s", len(urls), urls)
url = find_valid_repository_url(urls)
if url:
logger.debug("Found valid url: %s", url)
- return url
+ return url, RepoFinderOutcome.FOUND
- return ""
+ return "", RepoFinderOutcome.DDEV_NO_URLS
@staticmethod
def get_project_info(project_url: str) -> dict[str, Any] | None:
@@ -112,7 +113,7 @@ def get_project_info(project_url: str) -> dict[str, Any] | None:
return response_json
@staticmethod
- def get_latest_version(purl: PackageURL) -> PackageURL | None:
+ def get_latest_version(purl: PackageURL) -> tuple[PackageURL | None, RepoFinderOutcome]:
"""Return a PURL representing the latest version of the passed artifact.
Parameters
@@ -122,8 +123,8 @@ def get_latest_version(purl: PackageURL) -> PackageURL | None:
Returns
-------
- PackageURL | None
- The latest version of the PURL, or None if it could not be found.
+ tuple[PackageURL | None, RepoFinderOutcome]
+ The latest version of the PURL, or None if it could not be found, and the outcome to report.
"""
if purl.version:
namespace = purl.namespace + "/" if purl.namespace else ""
@@ -133,26 +134,29 @@ def get_latest_version(purl: PackageURL) -> PackageURL | None:
response = send_get_http_raw(url)
if not response:
- return None
+ return None, RepoFinderOutcome.DDEV_BAD_RESPONSE
try:
metadata: dict = json.loads(response.text)
except ValueError as error:
logger.debug("Failed to parse response from deps.dev: %s", error)
- return None
+ return None, RepoFinderOutcome.DDEV_JSON_FETCH_ERROR
versions_keys = ["package", "versions"] if "package" in metadata else ["version"]
versions = json_extract(metadata, versions_keys, list)
if not versions:
- return None
+ return None, RepoFinderOutcome.DDEV_JSON_INVALID
latest_version = json_extract(versions[-1], ["versionKey", "version"], str)
if not latest_version:
- return None
+ return None, RepoFinderOutcome.DDEV_JSON_INVALID
namespace = purl.namespace + "/" if purl.namespace else ""
- return PackageURL.from_string(f"pkg:{purl.type}/{namespace}{purl.name}@{latest_version}")
+ return (
+ PackageURL.from_string(f"pkg:{purl.type}/{namespace}{purl.name}@{latest_version}"),
+ RepoFinderOutcome.FOUND_FROM_LATEST,
+ )
- def _create_urls(self, purl: PackageURL) -> list[str]:
+ def _create_urls(self, purl: PackageURL) -> tuple[list[str], RepoFinderOutcome]:
"""
Create the urls to search for the metadata relating to the passed artifact.
@@ -168,13 +172,14 @@ def _create_urls(self, purl: PackageURL) -> list[str]:
list[str]
The list of created URLs.
"""
+ outcome = None
if not purl.version:
- latest_purl = DepsDevRepoFinder.get_latest_version(purl)
+ latest_purl, outcome = DepsDevRepoFinder.get_latest_version(purl)
if not latest_purl:
- return []
+ return [], outcome
purl = latest_purl
- return [f"{DepsDevRepoFinder.BASE_URL}{encode(str(purl), safe='')}"]
+ return [f"{DepsDevRepoFinder.BASE_URL}{encode(str(purl), safe='')}"], outcome or RepoFinderOutcome.FOUND
def _retrieve_json(self, url: str) -> str:
"""
@@ -197,7 +202,7 @@ def _retrieve_json(self, url: str) -> str:
return response.text
- def _read_json(self, json_data: str) -> list[str]:
+ def _read_json(self, json_data: str) -> tuple[list[str], RepoFinderOutcome]:
"""
Parse the deps.dev json file and extract the repository links.
@@ -208,20 +213,20 @@ def _read_json(self, json_data: str) -> list[str]:
Returns
-------
- list[str] :
- The extracted contents as a list of strings.
+ tuple[list[str], RepoFinderOutcome] :
+ The extracted contents as a list, and the outcome to report.
"""
try:
parsed = json.loads(json_data)
except ValueError as error:
logger.debug("Failed to parse response from deps.dev: %s", error)
- return []
+ return [], RepoFinderOutcome.DDEV_JSON_FETCH_ERROR
links_keys = ["version", "links"] if "version" in parsed else ["links"]
links = json_extract(parsed, links_keys, list)
if not links:
logger.debug("Could not extract 'version' or 'links' from deps.dev response.")
- return []
+ return [], RepoFinderOutcome.DDEV_JSON_INVALID
result = []
for item in links:
@@ -229,4 +234,4 @@ def _read_json(self, json_data: str) -> list[str]:
if url and isinstance(url, str):
result.append(url)
- return result
+ return result, RepoFinderOutcome.FOUND
diff --git a/src/macaron/repo_finder/repo_finder_enums.py b/src/macaron/repo_finder/repo_finder_enums.py
new file mode 100644
index 000000000..24299f097
--- /dev/null
+++ b/src/macaron/repo_finder/repo_finder_enums.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module contains Enums used to represent the outcome of Repo Finder or Commit Finder executions."""
+from enum import Enum
+
+
+class RepoFinderOutcome(Enum):
+ """An Enum of all outcomes of the Repo Finder being run for a software component."""
+
+ # States that relate to problems with user input.
+ NO_MAVEN_HOST_PROVIDED = "No maven host provided"
+ NO_POM_TAGS_PROVIDED = "No POM tags provided"
+ NO_VERSION_PROVIDED = "No version provided"
+ UNSUPPORTED_PACKAGE_TYPE = "Unsupported package type"
+
+ # States that relate to the target POM (Java).
+ POM_READ_ERROR = "POM read error"
+
+ # States that relate to the SCM in the POM (Java).
+ SCM_NO_URLS = "SCM no URLs"
+ SCM_NO_VALID_URLS = "SCM no valid URLs"
+
+ # States that relate to HTTP requests.
+ HTTP_INVALID = "HTTP invalid"
+ HTTP_NOT_FOUND = "HTTP not found"
+ HTTP_FORBIDDEN = "HTTP forbidden"
+ HTTP_OTHER = "HTTP other"
+
+ # States that relate to deps.dev (Non-Java).
+ DDEV_BAD_RESPONSE = "deps.dev bad response"
+ DDEV_JSON_FETCH_ERROR = "deps.dev fetch error"
+ DDEV_JSON_INVALID = "deps.dev JSON invalid"
+ DDEV_NO_URLS = "deps.dev no URLs"
+
+ # Version related states.
+ NO_NEWER_VERSION = "No newer version than provided which failed"
+
+ # Success states.
+ FOUND = "Found"
+ FOUND_FROM_PARENT = "Found from parent"
+ FOUND_FROM_LATEST = "Found form latest"
+
+ # Default state.
+ NOT_USED = "Not used"
+
+
+class CommitFinderOutcome(Enum):
+ """An Enum of all outcomes of the Commit Finder being run for a software component."""
+
+ # States that relate to problems with user input.
+ NO_VERSION_PROVIDED = "No version provided"
+ UNSUPPORTED_PURL_TYPE = "Unsupported PURL type"
+
+ # States that relate to repository type PURLs.
+ REPO_PURL_FAILURE = "Repository PURL failure"
+
+ # States that relate to artifact type PURLs.
+ NO_TAGS = "No tags"
+ NO_TAGS_WITH_COMMITS = "No tags with commits"
+ NO_TAG_COMMIT = "No tag commit"
+ INVALID_PURL = "No valid parts"
+ REGEX_COMPILE_FAILURE = "Regex compile failure"
+ NO_TAGS_MATCHED = "No tags matched"
+
+ # Success state.
+ MATCHED = "Matched"
+
+ # Default state.
+ NOT_USED = "Not used"
diff --git a/src/macaron/repo_finder/repo_finder_java.py b/src/macaron/repo_finder/repo_finder_java.py
index e6f349d3b..5fd100b68 100644
--- a/src/macaron/repo_finder/repo_finder_java.py
+++ b/src/macaron/repo_finder/repo_finder_java.py
@@ -1,9 +1,10 @@
-# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
"""This module contains the JavaRepoFinder class to be used for finding Java repositories."""
import logging
import re
+import urllib.parse
from xml.etree.ElementTree import Element # nosec
from packageurl import PackageURL
@@ -12,6 +13,7 @@
from macaron.parsers.pomparser import parse_pom_string
from macaron.repo_finder.repo_finder_base import BaseRepoFinder
from macaron.repo_finder.repo_finder_deps_dev import DepsDevRepoFinder
+from macaron.repo_finder.repo_finder_enums import RepoFinderOutcome
from macaron.repo_finder.repo_validator import find_valid_repository_url
from macaron.util import send_get_http_raw
@@ -25,7 +27,7 @@ def __init__(self) -> None:
"""Initialise the Java repository finder instance."""
self.pom_element: Element | None = None
- def find_repo(self, purl: PackageURL) -> str:
+ def find_repo(self, purl: PackageURL) -> tuple[str, RepoFinderOutcome]:
"""
Attempt to retrieve a repository URL that matches the passed artifact.
@@ -36,52 +38,68 @@ def find_repo(self, purl: PackageURL) -> str:
Yields
------
- str :
- The URL of the found repository.
+ tuple[str, RepoFinderOutcome] :
+ A tuple of the found URL (or an empty string), and the outcome of the Repo Finder.
"""
+ # Check POM tags exist.
+ tags = defaults.get_list("repofinder.java", "repo_pom_paths")
+ if not tags:
+ logger.debug("No POM tags found for URL discovery.")
+ return "", RepoFinderOutcome.NO_POM_TAGS_PROVIDED
+
+ group = purl.namespace or ""
+ artifact = purl.name
+ version = purl.version or ""
+
+ if not version:
+ logger.debug("Version missing for maven artifact: %s:%s", group, artifact)
+ # TODO add support for Java artifacts without a version
+ return "", RepoFinderOutcome.NO_VERSION_PROVIDED
+
# Perform the following in a loop:
# - Create URLs for the current artifact POM
# - Parse the POM
# - Try to extract SCM metadata and return URLs
# - Try to extract parent information and change current artifact to it
# - Repeat
- group = purl.namespace or ""
- artifact = purl.name
- version = purl.version or ""
limit = defaults.getint("repofinder.java", "parent_limit", fallback=10)
+ initial_limit = limit
+ last_outcome = RepoFinderOutcome.FOUND
+ check_parents = defaults.getboolean("repofinder.java", "find_parents")
if not version:
logger.info("Version missing for maven artifact: %s:%s", group, artifact)
- latest_purl = DepsDevRepoFinder().get_latest_version(purl)
+ latest_purl, outcome = DepsDevRepoFinder().get_latest_version(purl)
if not latest_purl or not latest_purl.version:
logger.debug("Could not find version for artifact: %s:%s", purl.namespace, purl.name)
- return ""
+ return "", outcome
group = latest_purl.namespace or ""
artifact = latest_purl.name
version = latest_purl.version
while group and artifact and version and limit > 0:
- # Create the URLs for retrieving the artifact's POM
+ # Create the URLs for retrieving the artifact's POM.
group = group.replace(".", "/")
request_urls = self._create_urls(group, artifact, version)
if not request_urls:
- # Abort if no URLs were created
+ # Abort if no URLs were created.
logger.debug("Failed to create request URLs for %s:%s:%s", group, artifact, version)
- return ""
+ return "", RepoFinderOutcome.NO_MAVEN_HOST_PROVIDED
- # Try each POM URL in order, terminating early if a match is found
+ # Try each POM URL in order, terminating early if a match is found.
pom = ""
+ pom_outcome = RepoFinderOutcome.FOUND
for request_url in request_urls:
- pom = self._retrieve_pom(request_url)
+ pom, pom_outcome = self._retrieve_pom(request_url)
if pom != "":
break
if pom == "":
- # Abort if no POM was found
+ # Abort if no POM was found.
logger.debug("No POM found for %s:%s:%s", group, artifact, version)
- return ""
+ return "", pom_outcome
- urls = self._read_pom(pom)
+ urls, read_outcome = self._read_pom(pom, tags)
if urls:
# If the found URLs fail to validate, finding can continue on to the next parent POM
@@ -89,18 +107,25 @@ def find_repo(self, purl: PackageURL) -> str:
url = find_valid_repository_url(urls)
if url:
logger.debug("Found valid url: %s", url)
- return url
+ return url, (
+ RepoFinderOutcome.FOUND if initial_limit == limit else RepoFinderOutcome.FOUND_FROM_PARENT
+ )
- if defaults.getboolean("repofinder.java", "find_parents") and self.pom_element is not None:
- # Attempt to extract parent information from POM
+ # No valid URLs were found from this POM.
+ last_outcome = RepoFinderOutcome.SCM_NO_VALID_URLS
+ else:
+ last_outcome = read_outcome
+
+ if check_parents and self.pom_element is not None:
+ # Attempt to extract parent information from POM.
group, artifact, version = self._find_parent(self.pom_element)
else:
break
limit = limit - 1
- # Nothing found
- return ""
+ # Nothing found.
+ return "", last_outcome
def _create_urls(self, group: str, artifact: str, version: str) -> list[str]:
"""
@@ -127,10 +152,22 @@ def _create_urls(self, group: str, artifact: str, version: str) -> list[str]:
)
urls = []
for repo in repositories:
- urls.append(f"{repo}/{group}/{artifact}/{version}/{artifact}-{version}.pom")
+ repo_url = urllib.parse.urlparse(repo)
+ pom_url = urllib.parse.ParseResult(
+ scheme=repo_url.scheme,
+ netloc=repo_url.netloc,
+ path=(
+ ((repo_url.path + "/") if repo_url.path else "")
+ + "/".join([group, artifact, version, f"{artifact}-{version}.pom"])
+ ),
+ params="",
+ query="",
+ fragment="",
+ ).geturl()
+ urls.append(pom_url)
return urls
- def _retrieve_pom(self, url: str) -> str:
+ def _retrieve_pom(self, url: str) -> tuple[str, RepoFinderOutcome]:
"""
Attempt to retrieve the file located at the passed URL.
@@ -141,18 +178,26 @@ def _retrieve_pom(self, url: str) -> str:
Returns
-------
- str :
- The retrieved file data or an empty string.
+ tuple[str, RepoFinderOutcome] :
+ The retrieved file data or an empty string, and the outcome to report.
"""
- response = send_get_http_raw(url, {})
+ response = send_get_http_raw(url, always_return_response=True)
if not response:
- return ""
+ return "", RepoFinderOutcome.HTTP_INVALID
+
+ if response.status_code == 404:
+ return "", RepoFinderOutcome.HTTP_NOT_FOUND
+ if response.status_code == 403:
+ return "", RepoFinderOutcome.HTTP_FORBIDDEN
+ if response.status_code != 200:
+ logger.debug("Failed to retrieve POM: HTTP %s", response.status_code)
+ return "", RepoFinderOutcome.HTTP_OTHER
logger.debug("Found artifact POM at: %s", url)
- return response.text
+ return response.text, RepoFinderOutcome.FOUND
- def _read_pom(self, pom: str) -> list[str]:
+ def _read_pom(self, pom: str, tags: list[str]) -> tuple[list[str], RepoFinderOutcome]:
"""
Parse the passed pom and extract the relevant tags.
@@ -163,23 +208,18 @@ def _read_pom(self, pom: str) -> list[str]:
Returns
-------
- list[str] :
- The extracted contents as a list of strings.
+ tuple[list[str], RepoFinderOutcome] :
+ A tuple of the found URLs, or an empty list, and the outcome to report.
"""
- # Retrieve tags
- tags = defaults.get_list("repofinder.java", "repo_pom_paths")
- if not any(tags):
- logger.debug("No POM tags found for URL discovery.")
- return []
-
- # Parse POM using defusedxml
+ # Parse POM using defusedxml.
pom_element = parse_pom_string(pom)
if pom_element is None:
- return []
+ return [], RepoFinderOutcome.POM_READ_ERROR
self.pom_element = pom_element
- # Attempt to extract SCM data and return URL
- return self._find_scm(pom_element, tags)
+ # Attempt to extract SCM data and return URL.
+ results = self._find_scm(pom_element, tags)
+ return results, RepoFinderOutcome.FOUND if results else RepoFinderOutcome.SCM_NO_URLS
def _find_scm(self, pom: Element, tags: list[str], resolve_properties: bool = True) -> list[str]:
"""
@@ -206,8 +246,8 @@ def _find_scm(self, pom: Element, tags: list[str], resolve_properties: bool = Tr
element: Element | None = pom
if tag.startswith("properties."):
- # Tags under properties are often "." separated
- # These can be safely split into two resulting tags as nested tags are not allowed here
+ # Tags under properties are often "." separated.
+ # These can be safely split into two resulting tags as nested tags are not allowed here.
tag_parts = ["properties", tag[11:]]
else:
# Other tags can be split into distinct elements via "."
@@ -218,10 +258,10 @@ def _find_scm(self, pom: Element, tags: list[str], resolve_properties: bool = Tr
if element is None:
break
if index == len(tag_parts) - 1 and element.text:
- # Add the contents of the final tag
+ # Add the contents of the final tag.
results.append(element.text.strip())
- # Resolve any Maven properties within the results
+ # Resolve any Maven properties within the results.
if resolve_properties:
results = self._resolve_properties(pom, results)
@@ -281,20 +321,20 @@ def _resolve_properties(self, pom: Element, values: list[str]) -> list[str]:
resolved_values = []
for value in values:
replacements: list = []
- # Calculate replacements - matches any number of ${...} entries in the current value
+ # Calculate replacements - matches any number of ${...} entries in the current value.
for match in re.finditer("\\$\\{[^}]+}", value):
text = match.group().replace("$", "").replace("{", "").replace("}", "")
if text.startswith("project."):
text = text.replace("project.", "")
else:
text = f"properties.{text}"
- # Call find_scm with property resolution flag set to False to prevent the possibility of endless looping
+ # Call find_scm with property resolution flag as False to prevent the possibility of endless looping.
result = self._find_scm(pom, [text], False)
if not result:
break
replacements.append([match.start(), result[0], match.end()])
- # Apply replacements in reverse order
+ # Apply replacements in reverse order.
# E.g.
# git@github.com:owner/project${javac.src.version}-${project.inceptionYear}.git
# ->
diff --git a/src/macaron/repo_finder/repo_utils.py b/src/macaron/repo_finder/repo_utils.py
index 467776673..0f9ca2683 100644
--- a/src/macaron/repo_finder/repo_utils.py
+++ b/src/macaron/repo_finder/repo_utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
"""This module contains the utility functions for repo and commit finder operations."""
diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py
index 894c82134..47e073b3f 100644
--- a/src/macaron/slsa_analyzer/analyzer.py
+++ b/src/macaron/slsa_analyzer/analyzer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
"""This module handles the cloning and analyzing a Git repo."""
@@ -24,7 +24,7 @@
from macaron.config.global_config import global_config
from macaron.config.target_config import Configuration
from macaron.database.database_manager import DatabaseManager, get_db_manager, get_db_session
-from macaron.database.table_definitions import Analysis, Component, ProvenanceSubject, Repository
+from macaron.database.table_definitions import Analysis, Component, ProvenanceSubject, RepoFinderMetadata, Repository
from macaron.dependency_analyzer.cyclonedx import DependencyAnalyzer, DependencyInfo
from macaron.errors import (
DuplicateError,
@@ -44,6 +44,7 @@
)
from macaron.repo_finder.provenance_finder import ProvenanceFinder, find_provenance_from_ci
from macaron.repo_finder.repo_finder import prepare_repo
+from macaron.repo_finder.repo_finder_enums import CommitFinderOutcome, RepoFinderOutcome
from macaron.repo_finder.repo_utils import get_git_service
from macaron.repo_verifier.repo_verifier import verify_repo
from macaron.slsa_analyzer import git_url
@@ -378,8 +379,10 @@ def run_single(
# Prepare the repo.
git_obj = None
+ commit_finder_outcome = CommitFinderOutcome.NOT_USED
+ final_digest = analysis_target.digest
if analysis_target.repo_path:
- git_obj = prepare_repo(
+ git_obj, commit_finder_outcome, final_digest = prepare_repo(
os.path.join(self.output_path, GIT_REPOS_DIR),
analysis_target.repo_path,
analysis_target.branch,
@@ -387,6 +390,13 @@ def run_single(
analysis_target.parsed_purl,
)
+ repo_finder_metadata = RepoFinderMetadata(
+ repo_finder_outcome=analysis_target.repo_finder_outcome,
+ commit_finder_outcome=commit_finder_outcome,
+ found_url=analysis_target.repo_path,
+ found_commit=final_digest,
+ )
+
# Check if only one of the repo or digest came from direct input.
if git_obj and (provenance_repo_url or provenance_commit_digest) and parsed_purl:
if check_if_input_purl_provenance_conflict(
@@ -410,6 +420,7 @@ def run_single(
analysis,
analysis_target,
git_obj,
+ repo_finder_metadata,
existing_records,
provenance_payload,
)
@@ -614,11 +625,15 @@ class AnalysisTarget(NamedTuple):
#: The digest of the commit to analyze.
digest: str
+ #: The outcome of the Repo Finder on this analysis target.
+ repo_finder_outcome: RepoFinderOutcome
+
def add_component(
self,
analysis: Analysis,
analysis_target: AnalysisTarget,
git_obj: Git | None,
+ repo_finder_metadata: RepoFinderMetadata,
existing_records: dict[str, Record] | None = None,
provenance_payload: InTotoPayload | None = None,
) -> Component:
@@ -635,6 +650,8 @@ def add_component(
The target of this analysis.
git_obj: Git | None
The pydriller.Git object of the repository.
+ repo_finder_metadata: RepoFinderMetadata
+ The Repo Finder metadata for this component.
existing_records : dict[str, Record] | None
The mapping of existing records that the analysis has run successfully.
provenance_payload: InTotoVPayload | None
@@ -694,6 +711,7 @@ def add_component(
purl=str(purl),
analysis=analysis,
repository=repository,
+ repo_finder_metadata=repo_finder_metadata,
)
if provenance_payload:
@@ -777,6 +795,7 @@ def to_analysis_target(
repo_path_input: str = config.get_value("path")
input_branch: str = config.get_value("branch")
input_digest: str = config.get_value("digest")
+ repo_finder_outcome = RepoFinderOutcome.NOT_USED
match (parsed_purl, repo_path_input):
case (None, ""):
@@ -797,19 +816,21 @@ def to_analysis_target(
repo_path=provenance_repo_url or "",
branch="",
digest=provenance_commit_digest or "",
+ repo_finder_outcome=repo_finder_outcome,
)
# As there is no repo or commit from provenance, use the Repo Finder to find the repo.
converted_repo_path = repo_finder.to_repo_path(parsed_purl, available_domains)
if converted_repo_path is None:
# Try to find repo from PURL
- repo = repo_finder.find_repo(parsed_purl)
+ repo, repo_finder_outcome = repo_finder.find_repo(parsed_purl)
return Analyzer.AnalysisTarget(
parsed_purl=parsed_purl,
repo_path=converted_repo_path or repo or "",
branch=input_branch,
digest=input_digest,
+ repo_finder_outcome=repo_finder_outcome,
)
case (_, _) | (None, _):
@@ -828,6 +849,7 @@ def to_analysis_target(
repo_path=repo_path_input,
branch=input_branch,
digest=input_digest,
+ repo_finder_outcome=repo_finder_outcome,
)
return Analyzer.AnalysisTarget(
@@ -835,6 +857,7 @@ def to_analysis_target(
repo_path=repo_path_input,
branch=input_branch,
digest=provenance_commit_digest or "",
+ repo_finder_outcome=repo_finder_outcome,
)
case _:
diff --git a/src/macaron/util.py b/src/macaron/util.py
index 047d14125..c90f534e7 100644
--- a/src/macaron/util.py
+++ b/src/macaron/util.py
@@ -126,7 +126,11 @@ def send_head_http_raw(
def send_get_http_raw(
- url: str, headers: dict | None = None, timeout: int | None = None, allow_redirects: bool = True
+ url: str,
+ headers: dict | None = None,
+ timeout: int | None = None,
+ allow_redirects: bool = True,
+ always_return_response: bool = False,
) -> Response | None:
"""Send the GET HTTP request with the given url and headers.
@@ -179,7 +183,7 @@ def send_get_http_raw(
if response.status_code == 403:
check_rate_limit(response)
else:
- return None
+ return None if not always_return_response else response
retry_counter = retry_counter - 1
response = requests.get(
url=url,
diff --git a/tests/conftest.py b/tests/conftest.py
index d6b83bd78..b47aa7269 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,7 +10,7 @@
import macaron
from macaron.code_analyzer.call_graph import BaseNode, CallGraph
from macaron.config.defaults import create_defaults, defaults, load_defaults
-from macaron.database.table_definitions import Analysis, Component, Repository
+from macaron.database.table_definitions import Analysis, Component, RepoFinderMetadata, Repository
from macaron.parsers.bashparser import BashScriptType, create_bash_node
from macaron.parsers.github_workflow_model import Identified, Job, NormalJob, RunStep, Workflow
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
@@ -413,6 +413,7 @@ def __init__(
# Must match test_provenance_finder.MockGit.MockTag.commit.
commit_sha="dig",
),
+ repo_finder_metadata=RepoFinderMetadata(),
)
super().__init__(component, *args, **kwargs)
diff --git a/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py b/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py
index df1eeacb0..ce421f56c 100644
--- a/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py
+++ b/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py
@@ -9,7 +9,7 @@
from cyclonedx.model.component import Component as CDXComponent
from macaron.config.defaults import defaults, load_defaults
-from macaron.database.table_definitions import Analysis, Component, Repository
+from macaron.database.table_definitions import Analysis, Component, RepoFinderMetadata, Repository
from macaron.dependency_analyzer.cyclonedx import CycloneDXParserError, DependencyInfo, deserialize_bom_json
from macaron.dependency_analyzer.cyclonedx_mvn import CycloneDxMaven
from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
@@ -65,6 +65,7 @@ def test_get_dep_components_java(
purl="pkg:maven/io.micronaut.aws/aws-parent@4.0.0-SNAPSHOT?type=pom",
analysis=Analysis(),
repository=Repository(complete_name="github.com/micronaut-projects/micronaut-aws", fs_path=""),
+ repo_finder_metadata=RepoFinderMetadata(),
)
# Path to the sub-project bom.json files.
@@ -107,6 +108,7 @@ def test_get_dep_components_python(
purl="pkg:pypi/requests@2.31.0",
analysis=Analysis(),
repository=Repository(complete_name="github.com/psf/requests", fs_path=""),
+ repo_finder_metadata=RepoFinderMetadata(),
)
# Path to the sub-project bom.json files.
@@ -144,6 +146,7 @@ def test_convert_components_to_artifacts_java(
purl="pkg:maven/io.micronaut.aws/aws-parent@4.0.0-SNAPSHOT?type=pom",
analysis=Analysis(),
repository=Repository(complete_name="github.com/micronaut-projects/micronaut-aws", fs_path=""),
+ repo_finder_metadata=RepoFinderMetadata(),
)
# Path to the sub-project bom.json files.
@@ -177,6 +180,7 @@ def test_convert_components_to_artifacts_python(
purl="pkg:pypi/requests@2.31.0",
analysis=Analysis(),
repository=Repository(complete_name="github.com/psf/requests", fs_path=""),
+ repo_finder_metadata=RepoFinderMetadata(),
)
# Pass the root bom.json.
@@ -210,6 +214,7 @@ def test_low_quality_bom(
purl="pkg:maven/com.amazonaws/aws-lambda-java-events@3.11.0?type=jar",
analysis=Analysis(),
repository=Repository(complete_name="github.com/aws/aws-lambda-java-libs", fs_path=""),
+ repo_finder_metadata=RepoFinderMetadata(),
)
result = dep_analyzer.get_deps_from_sbom(bom_path, target_component=component)
assert snapshot == result
@@ -236,6 +241,7 @@ def test_multiple_versions(
purl="pkg:maven/com.amazonaws/aws-lambda-java-events@3.11.0?type=jar",
analysis=Analysis(),
repository=Repository(complete_name="github.com/aws/aws-lambda-java-libs", fs_path=""),
+ repo_finder_metadata=RepoFinderMetadata(),
)
result = dep_analyzer.get_deps_from_sbom(bom_path, target_component=component)
assert snapshot == result
@@ -250,6 +256,7 @@ def test_custom_sbom_name_with_maven() -> None:
purl="pkg:maven/com.example/cyclonedx-test@1.0-SNAPSHOT?type=jar",
analysis=Analysis(),
repository=None,
+ repo_finder_metadata=RepoFinderMetadata(),
)
custom_bom_dir = RESOURCES_DIR.joinpath("sbom_name_tests")
assert cyclonedx.collect_dependencies(str(custom_bom_dir.joinpath("single_named_sbom")), target_component=component)
diff --git a/tests/integration/cases/commit_finder_tag_matching_functionality/commit_finder.py b/tests/integration/cases/commit_finder_tag_matching_functionality/commit_finder.py
index 105af78b4..857acb0d0 100644
--- a/tests/integration/cases/commit_finder_tag_matching_functionality/commit_finder.py
+++ b/tests/integration/cases/commit_finder_tag_matching_functionality/commit_finder.py
@@ -30,7 +30,7 @@ def test_commit_finder() -> int:
artifacts = item["artifacts"]
for artifact in artifacts:
purl = PackageURL.from_string(artifact["purl"])
- matched_tags = commit_finder.match_tags(item["tags"], purl.name, purl.version or "")
+ matched_tags, _ = commit_finder.match_tags(item["tags"], purl.name, purl.version or "")
matched_tag = matched_tags[0] if matched_tags else ""
expected = str(artifact["match"])
if matched_tag != expected:
diff --git a/tests/integration/cases/repo_finder_remote_calls/repo_finder.py b/tests/integration/cases/repo_finder_remote_calls/repo_finder.py
index f529cb771..d6ba2081b 100644
--- a/tests/integration/cases/repo_finder_remote_calls/repo_finder.py
+++ b/tests/integration/cases/repo_finder_remote_calls/repo_finder.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
"""This script tests the functionality of the repo finder's remote API calls."""
@@ -13,6 +13,7 @@
from macaron.repo_finder import repo_validator
from macaron.repo_finder.repo_finder import find_repo
from macaron.repo_finder.repo_finder_deps_dev import DepsDevRepoFinder
+from macaron.repo_finder.repo_finder_enums import RepoFinderOutcome
from macaron.slsa_analyzer.git_url import clean_url
logger: logging.Logger = logging.getLogger(__name__)
@@ -43,27 +44,33 @@ def test_repo_finder() -> int:
defaults.set("git_service.gitlab", "hostname", "gitlab.com")
# Test Java package with SCM metadata in artifact POM.
- if not find_repo(PackageURL.from_string("pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.14.2")):
+ match, outcome = find_repo(PackageURL.from_string("pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.14.2"))
+ if not match or outcome != RepoFinderOutcome.FOUND:
return os.EX_UNAVAILABLE
# Test Java package with SCM metadata in artifact's parent POM.
- if not find_repo(PackageURL.from_string("pkg:maven/commons-cli/commons-cli@1.5.0")):
+ match, outcome = find_repo(PackageURL.from_string("pkg:maven/commons-cli/commons-cli@1.5.0"))
+ if not match or outcome != RepoFinderOutcome.FOUND:
return os.EX_UNAVAILABLE
# Test deps.dev API for a Python package.
- if not find_repo(PackageURL.from_string("pkg:pypi/packageurl-python@0.11.1")):
+ match, outcome = find_repo(PackageURL.from_string("pkg:pypi/packageurl-python@0.11.1"))
+ if not match or outcome != RepoFinderOutcome.FOUND:
return os.EX_UNAVAILABLE
# Test deps.dev API for a Nuget package.
- if not find_repo(PackageURL.from_string("pkg:nuget/azure.core")):
+ match, outcome = find_repo(PackageURL.from_string("pkg:nuget/azure.core"))
+ if not match or outcome != RepoFinderOutcome.FOUND:
return os.EX_UNAVAILABLE
# Test deps.dev API for an NPM package.
- if not find_repo(PackageURL.from_string("pkg:npm/@colors/colors")):
+ match, outcome = find_repo(PackageURL.from_string("pkg:npm/@colors/colors"))
+ if not match or outcome != RepoFinderOutcome.FOUND:
return os.EX_UNAVAILABLE
# Test deps.dev API for Cargo package.
- if not find_repo(PackageURL.from_string("pkg:cargo/rand_core")):
+ match, outcome = find_repo(PackageURL.from_string("pkg:cargo/rand_core"))
+ if not match or outcome != RepoFinderOutcome.FOUND:
return os.EX_UNAVAILABLE
# Test redirecting URL from Apache commons-io package.
@@ -73,17 +80,18 @@ def test_repo_finder() -> int:
# Test Java package whose SCM metadata only points to the repo in later versions than is provided here.
purl = PackageURL.from_string("pkg:maven/io.vertx/vertx-auth-common@3.8.0")
- repo = find_repo(purl)
+ repo, _ = find_repo(purl)
if repo == "https://github.com/eclipse-vertx/vertx-auth":
return os.EX_UNAVAILABLE
- latest_purl = DepsDevRepoFinder().get_latest_version(purl)
+ latest_purl, _ = DepsDevRepoFinder().get_latest_version(purl)
assert latest_purl
- repo = find_repo(latest_purl)
+ repo, _ = find_repo(latest_purl)
if repo != "https://github.com/eclipse-vertx/vertx-auth":
return os.EX_UNAVAILABLE
# Test Java package that has no version.
- if not find_repo(PackageURL.from_string("pkg:maven/io.vertx/vertx-auth-common")):
+ match, outcome = find_repo(PackageURL.from_string("pkg:maven/io.vertx/vertx-auth-common"))
+ if not match or outcome != RepoFinderOutcome.FOUND:
return os.EX_UNAVAILABLE
return os.EX_OK
diff --git a/tests/malware_analyzer/pypi/conftest.py b/tests/malware_analyzer/pypi/conftest.py
index a5f775531..4a583fda3 100644
--- a/tests/malware_analyzer/pypi/conftest.py
+++ b/tests/malware_analyzer/pypi/conftest.py
@@ -7,7 +7,7 @@
import pytest
-from macaron.database.table_definitions import Analysis, Component
+from macaron.database.table_definitions import Analysis, Component, RepoFinderMetadata
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry
@@ -23,5 +23,7 @@ def pypi_package_json() -> MagicMock:
pypi_registry = MagicMock(spec=PyPIRegistry)
pypi_package = MagicMock(spec=PyPIPackageJsonAsset)
pypi_package.pypi_registry = pypi_registry
- pypi_package.component = Component(purl="pkg:pypi/package", analysis=Analysis(), repository=None)
+ pypi_package.component = Component(
+ purl="pkg:pypi/package", analysis=Analysis(), repository=None, repo_finder_metadata=RepoFinderMetadata()
+ )
return pypi_package
diff --git a/tests/repo_finder/test_commit_finder.py b/tests/repo_finder/test_commit_finder.py
index 45fa15aea..47c5ee6a2 100644
--- a/tests/repo_finder/test_commit_finder.py
+++ b/tests/repo_finder/test_commit_finder.py
@@ -6,48 +6,52 @@
import os
import re
import shutil
+from typing import Any
import hypothesis
import pytest
from hypothesis import given, settings
from hypothesis.strategies import DataObject, data, text
from packageurl import PackageURL
+from pydriller.git import Git
from macaron.repo_finder import commit_finder
from macaron.repo_finder.commit_finder import AbstractPurlType
+from macaron.repo_finder.repo_finder_enums import CommitFinderOutcome
from tests.slsa_analyzer.mock_git_utils import commit_files, initiate_repo
logger: logging.Logger = logging.getLogger(__name__)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
REPO_DIR = os.path.join(BASE_DIR, "mock_repos", "commit_finder/sample_repo")
+UNICODE_VERSION = "雪" # The Japanese character for "snow".
+TAG_VERSION = "2.3.4"
+TAG_VERSION_2 = "4.5.2"
-def test_get_commit_from_version() -> None:
+@pytest.fixture(name="tag_list")
+def tag_list_() -> list[str]:
+ """Return a list of tags."""
+ return ["test-name-v1.0.1-A", "v1.0.3+test", "v_1.0.5", "50_0_2", "r78rv109", "1.0.5-JRE"]
+
+
+@pytest.mark.parametrize(
+ ("version", "name", "tag_list_index"),
+ [
+ ("1.0.1-A", "test-name-1", 0),
+ ("1.0.3+test", "test-name-2", 1),
+ ("1.0.5", "test-name-3", 2),
+ ("50.0.2", "test-name-4", 3),
+ ("78.109", "test-name-5", 4),
+ ("1.0.5-JRE", "test-name-6", 5),
+ ],
+)
+def test_get_commit_from_version(version: str, name: str, tag_list_index: int, tag_list: list[str]) -> None:
"""Test resolving commits from version tags."""
- versions = [
- "1.0.1-A", # To match a tag with a named suffix.
- "1.0.3+test", # To match a tag with a '+' suffix.
- "1.0.5", # To match a tag with a 'v_' prefix.
- "50.0.2", # To match a tag separated by '_'.
- "78.109", # To match a tag separated by characters 'r' 'rv'.
- "1.0.5-JRE", # To NOT match the similar tag without the 'JRE' suffix.
- ]
-
- tags = ["test-name-v1.0.1-A", "v1.0.3+test", "v_1.0.5", "50_0_2", "r78rv109", "1.0.5-JRE"]
-
- # Perform tests
- purl_name = "test-name"
- for count, value in enumerate(versions):
- _test_version(tags, purl_name, value, tags[count])
- purl_name = "test-name" + "-" + str(count + 1)
-
-
-def _test_version(tags: list[str], name: str, version: str, target_tag: str) -> None:
- """Retrieve tag matching version and check it is correct."""
- matched_tags = commit_finder.match_tags(tags, name, version)
+ matched_tags, outcome = commit_finder.match_tags(tag_list, name, version)
assert matched_tags
- assert matched_tags[0] == target_tag
+ assert matched_tags[0] == tag_list[tag_list_index]
+ assert outcome == CommitFinderOutcome.MATCHED
@pytest.mark.parametrize(
@@ -87,8 +91,9 @@ def test_abstract_purl_type(purls: list[str], expected: AbstractPurlType) -> Non
assert commit_finder.determine_abstract_purl_type(PackageURL.from_string(purl)) == expected
-def test_commit_finder() -> None:
- """Test commit finder using mocked repository."""
+@pytest.fixture(name="mocked_repo")
+def mocked_repo_() -> Git:
+ """Create a mocked repository."""
if os.path.exists(REPO_DIR):
shutil.rmtree(REPO_DIR)
git_obj = initiate_repo(
@@ -106,82 +111,141 @@ def test_commit_finder() -> None:
file.write("A")
commit_files(git_obj, ["file_1"])
- # Create a commit with no associated branch.
- commit_0 = git_obj.repo.index.commit(message="Commit_0")
+ return git_obj
- # No version in PURL.
- assert not commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:maven/apache/maven"))
- # Unsupported PURL type.
- assert not commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:gem/ruby-artifact@1"))
+@pytest.fixture(name="mocked_repo_commit")
+def mocked_repo_commit_(mocked_repo: Git) -> Any:
+ """Add a commit to the mocked repository."""
+ return mocked_repo.repo.index.commit(message="Commit_0")
- # Hash not present in repository, tests hash and tag.
- assert not commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:github/apache/maven@ab4ce3e"))
- # Valid PURL but repository has no tags yet.
- assert not commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:maven/apache/maven@1.0"))
+@pytest.fixture(name="mocked_repo_empty_commit")
+def mocked_repo_empty_commit_(mocked_repo: Git) -> Any:
+ """Add an empty commit to the mocked repository."""
+ return mocked_repo.repo.index.commit(message="Empty_Commit")
- # Additional setup is done here to avoid tainting earlier tests.
+
+@pytest.fixture(name="mocked_repo_expanded")
+def mocked_repo_expanded_(mocked_repo: Git, mocked_repo_commit: Any, mocked_repo_empty_commit: Any) -> Any:
+ """Add tags to the mocked repository."""
+ mocked_repo.repo.create_tag("4.5", mocked_repo_commit.hexsha)
# Create a tag from a tree.
- tag_tree_version = "1.0"
- tree = git_obj.repo.heads.master.commit.tree
- git_obj.repo.create_tag(tag_tree_version, ref=tree)
+ mocked_repo.repo.create_tag("1.0", ref=mocked_repo.repo.heads.master.commit.tree)
- # Add a new tag with an associated commit. This is the Japanese character for 'snow'.
- unicode_version = "雪"
- git_obj.repo.create_tag(unicode_version, commit_0.hexsha)
+ # Add a tag with unicode version.
+ mocked_repo.repo.create_tag(UNICODE_VERSION, mocked_repo_commit.hexsha)
# Create a more typical tag on the same commit.
- tag_version = "2.3.4"
- git_obj.repo.create_tag(tag_version, commit_0.hexsha)
+ mocked_repo.repo.create_tag(TAG_VERSION, mocked_repo_commit.hexsha)
- # Add an empty commit with some tags.
- empty_commit = git_obj.repo.index.commit("Empty commit.")
- tag_version_2 = "4.5.2"
- git_obj.repo.create_tag(f"{tag_version_2}-DEV", ref=empty_commit.hexsha)
- git_obj.repo.create_tag(f"{tag_version_2}_DEV_RC1_RELEASE", ref=empty_commit.hexsha)
- git_obj.repo.create_tag(f"rel/prefix_name-{tag_version}", ref=empty_commit.hexsha)
+ # Add more tags.
+ mocked_repo.repo.create_tag(f"{TAG_VERSION_2}-DEV", ref=mocked_repo_empty_commit.hexsha)
+ mocked_repo.repo.create_tag(f"{TAG_VERSION_2}_DEV_RC1_RELEASE", ref=mocked_repo_empty_commit.hexsha)
+ mocked_repo.repo.create_tag(f"rel/prefix_name-{TAG_VERSION}", ref=mocked_repo_empty_commit.hexsha)
- # Version with a suffix and no matching tag.
- assert not commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:maven/apache/maven@1-JRE"))
+ return mocked_repo
- # Version with only one digit and no matching tag.
- assert not commit_finder.find_commit(git_obj, PackageURL.from_string("pkg:maven/apache/maven@1"))
- # Unicode version.
- assert commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{unicode_version}"))
+@pytest.mark.parametrize(
+ ("purl_string", "expected_outcome"),
+ [
+ # No version in PURL.
+ ("pkg:maven/apache/maven", CommitFinderOutcome.NO_VERSION_PROVIDED),
+ # Unsupported PURL type.
+ ("pkg:gem/ruby-artifact@1", CommitFinderOutcome.UNSUPPORTED_PURL_TYPE),
+ # Hash not present in repository.
+ ("pkg:github/apache/maven@ab4ce3e", CommitFinderOutcome.REPO_PURL_FAILURE),
+ # Valid PURL but repository has no tags yet.
+ ("pkg:maven/apache/maven@1.0", CommitFinderOutcome.NO_TAGS),
+ ],
+)
+def test_commit_finder_tagless_failure(
+ mocked_repo: Git, purl_string: str, expected_outcome: CommitFinderOutcome
+) -> None:
+ """Test commit finder using mocked repository with no tags."""
+ match, outcome = commit_finder.find_commit(mocked_repo, PackageURL.from_string(purl_string))
+ assert not match
+ assert outcome == expected_outcome
- # Valid repository PURL.
- digest = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:github/apache/maven@{commit_0.hexsha}"))
- assert digest == commit_0.hexsha
- # Valid artifact PURL.
- digest = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version}"))
- assert digest == commit_0.hexsha
+@pytest.mark.parametrize(
+ ("purl_string", "expected_outcome"),
+ [
+ # Invalid PURL.
+ ("pkg:maven/[]@()", CommitFinderOutcome.INVALID_PURL),
+ # Version with a suffix and no matching tag.
+ ("pkg:maven/apache/maven@1-JRE", CommitFinderOutcome.NO_TAGS_MATCHED),
+ # Version with only one digit and no matching tag.
+ ("pkg:maven/apache/maven@1", CommitFinderOutcome.NO_TAGS_MATCHED),
+ ],
+)
+def test_commit_finder_tag_failure(
+ mocked_repo_expanded: Git, purl_string: str, expected_outcome: CommitFinderOutcome
+) -> None:
+ """Test commit finder using mocked repository with tags."""
+ match, outcome = commit_finder.find_commit(mocked_repo_expanded, PackageURL.from_string(purl_string))
+ assert not match
+ assert outcome == expected_outcome
- # Valid artifact PURL with an alphanumeric suffix.
- digest = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version}-RC1"))
- assert digest == commit_0.hexsha
- # Valid artifact PURL that should match a tag with a name prefix.
- digest = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/prefix_name@{tag_version}"))
- assert digest == empty_commit.hexsha
+@pytest.mark.parametrize(
+ "purl_string",
+ [
+ f"pkg:maven/apache/maven@{UNICODE_VERSION}",
+ f"pkg:maven/apache/maven@{TAG_VERSION}",
+ f"pkg:maven/apache/maven@{TAG_VERSION}-RC1",
+ ],
+)
+def test_commit_finder_success_commit(
+ mocked_repo_expanded: Git,
+ mocked_repo_commit: Any,
+ purl_string: str,
+) -> None:
+ """Test Commit Finder on mocked repository that should match valid PURLs."""
+ match, outcome = commit_finder.find_commit(mocked_repo_expanded, PackageURL.from_string(purl_string))
+ assert match == mocked_repo_commit.hexsha
+ assert outcome == CommitFinderOutcome.MATCHED
- # Valid artifact PURL that matches a version with a suffix, to a tag with the same suffix.
- digest = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version_2}-DEV"))
- assert digest == empty_commit.hexsha
- # Valid artifact PURL that matches a version with a suffix, to a tag with the same suffix part in a multi-suffix.
- digest = commit_finder.find_commit(
- git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version_2}_RELEASE")
+@pytest.mark.parametrize(
+ "purl_string",
+ [
+ # Match name prefix.
+ f"pkg:maven/apache/prefix_name@{TAG_VERSION}",
+ # Match suffix.
+ f"pkg:maven/apache/maven@{TAG_VERSION_2}-DEV",
+ # Match suffix in multi-suffix.
+ f"pkg:maven/apache/maven@{TAG_VERSION_2}_RELEASE",
+ # Match alphanumeric suffix in multi-suffix.
+ f"pkg:maven/apache/maven@{TAG_VERSION_2}_RC1",
+ ],
+)
+def test_commit_finder_success_empty_commit(
+ mocked_repo_expanded: Git, mocked_repo_empty_commit: Any, purl_string: str
+) -> None:
+ """Test Commit Finder on mocked repository that should match value PURLs."""
+ match, outcome = commit_finder.find_commit(mocked_repo_expanded, PackageURL.from_string(purl_string))
+ assert match == mocked_repo_empty_commit.hexsha
+ assert outcome == CommitFinderOutcome.MATCHED
+
+
+def test_commit_finder_repo_purl_success(mocked_repo_expanded: Git, mocked_repo_commit: Any) -> None:
+ """Test Commit Finder on mocked repository using a repo type PURL."""
+ match, outcome = commit_finder.find_commit(
+ mocked_repo_expanded, PackageURL.from_string(f"pkg:github/apache/maven@{mocked_repo_commit.hexsha}")
)
- assert digest == empty_commit.hexsha
+ assert match == mocked_repo_commit.hexsha
+ assert outcome == CommitFinderOutcome.MATCHED
+
- # Valid artifact PURL that matches a version with an alphanumeric suffix, to a tag with the same suffix part in a
- # multi-suffix.
- digest = commit_finder.find_commit(git_obj, PackageURL.from_string(f"pkg:maven/apache/maven@{tag_version_2}_RC1"))
- assert digest == empty_commit.hexsha
+def test_commit_finder_tag_no_commit(mocked_repo: Git) -> None:
+ """Test the Commit Finder on a mocked repository that has a tag with no commit."""
+ mocked_repo.repo.create_tag("TEST", ref=mocked_repo.repo.heads.master.commit.tree)
+ match, outcome = commit_finder.find_commit(mocked_repo, PackageURL.from_string("pkg:maven/apache/maven@TEST"))
+ assert not match
+ assert outcome == CommitFinderOutcome.NO_TAGS_WITH_COMMITS
@given(text())
@@ -226,7 +290,7 @@ def test_version_to_tag_matching(_data: DataObject) -> None: # noqa: PT019
if not purl.version:
return
# Build the pattern from the version.
- pattern, parts = commit_finder._build_version_pattern(purl.name, purl.version)
+ pattern, parts, _ = commit_finder._build_version_pattern(purl.name, purl.version)
if not pattern:
return
# Generate the tag from a pattern that is very similar to how version patterns are made.
diff --git a/tests/repo_finder/test_repo_finder.py b/tests/repo_finder/test_repo_finder.py
index ba0bc2b20..de73c8a03 100644
--- a/tests/repo_finder/test_repo_finder.py
+++ b/tests/repo_finder/test_repo_finder.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
"""This module tests the repo finder."""
@@ -6,13 +6,32 @@
from pathlib import Path
import pytest
+from packageurl import PackageURL
+from pytest_httpserver import HTTPServer
from macaron.config.defaults import load_defaults
-from macaron.repo_finder.repo_finder_java import JavaRepoFinder
+from macaron.repo_finder import repo_finder
+from macaron.repo_finder.repo_finder_enums import RepoFinderOutcome
+
+
+@pytest.fixture(name="httpserver_java")
+def httpserver_java_(tmp_path: Path, httpserver: HTTPServer) -> HTTPServer:
+ """Set up the mock HTTP Server for the Repo Finder."""
+ url = httpserver.url_for("")
+ test_config = f"""
+ [repofinder.java]
+ artifact_repositories = {url}
+ """
+ test_config_path = os.path.join(tmp_path, "config.ini")
+ with open(test_config_path, "w", encoding="utf-8") as test_config_file:
+ test_config_file.write(test_config)
+ load_defaults(test_config_path)
+
+ return httpserver
@pytest.mark.parametrize(
- ("user_config_input", "expected"),
+ ("test_config", "expected"),
[
(
"""
@@ -21,7 +40,7 @@
scm.connection
scm.url
""",
- ["scm:git:git@github.com:oracle-samples/macaron.git", "https://github.com/oracle/macaron"],
+ "https://github.com/oracle-samples/macaron",
),
(
"""
@@ -30,12 +49,19 @@
scm.url
scm.connection
""",
- ["https://github.com/oracle/macaron", "scm:git:git@github.com:oracle-samples/macaron.git"],
+ "https://github.com/oracle/macaron",
),
],
)
-def test_pom_extraction_ordering(tmp_path: Path, user_config_input: str, expected: list[str]) -> None:
+def test_pom_extraction_ordering(tmp_path: Path, test_config: str, expected: str, httpserver: HTTPServer) -> None:
"""Test the ordering of elements extracted from the POM is correct and maintained."""
+ url = httpserver.url_for("")
+ test_config = test_config + f"\nartifact_repositories = {url}"
+ test_config_path = os.path.join(tmp_path, "config.ini")
+ with open(test_config_path, "w", encoding="utf-8") as test_config_file:
+ test_config_file.write(test_config)
+ load_defaults(test_config_path)
+
pom_text = """
https://example.org
@@ -48,12 +74,165 @@ def test_pom_extraction_ordering(tmp_path: Path, user_config_input: str, expecte
"""
- user_config_path = os.path.join(tmp_path, "config.ini")
- with open(user_config_path, "w", encoding="utf-8") as user_config_file:
- user_config_file.write(user_config_input)
- load_defaults(user_config_path)
- repo_finder = JavaRepoFinder()
+ group = "com.oracle.tools"
+ artifact = "oracle-tools-macaron"
+ version = "0.4"
+ target_url = "/" + "/".join(["/".join(group.split(".")), artifact, version, f"{artifact}-{version}.pom"])
+ httpserver.expect_request(target_url).respond_with_data(pom_text)
+
+ found_repo, outcome = repo_finder.find_repo(PackageURL.from_string(f"pkg:maven/{group}/{artifact}@{version}"))
+ assert found_repo
+ assert found_repo == expected
+ assert outcome == RepoFinderOutcome.FOUND
+
+
+@pytest.mark.parametrize(
+ ("test_config", "expected"),
+ [
+ (
+ """
+ [repofinder.java]
+ artifact_repositories =
+
+ """,
+ RepoFinderOutcome.NO_MAVEN_HOST_PROVIDED,
+ ),
+ (
+ """
+ [repofinder.java]
+ repo_pom_paths =
+
+ """,
+ RepoFinderOutcome.NO_POM_TAGS_PROVIDED,
+ ),
+ ],
+)
+def test_repo_finder_java_invalid_config(tmp_path: Path, test_config: str, expected: RepoFinderOutcome) -> None:
+ """Test the Repo Finder when inputs are invalid: a non-breaking space."""
+ test_config_path = os.path.join(tmp_path, "config.ini")
+ with open(test_config_path, "w", encoding="utf-8") as test_config_file:
+ test_config_file.write(test_config)
+ load_defaults(test_config_path)
+
+ found_repo, outcome = repo_finder.find_repo(PackageURL.from_string("pkg:maven/test/test@1"), False)
+ assert not found_repo
+ assert outcome == expected
+
+
+@pytest.mark.parametrize(
+ ("purl_string", "expected"),
+ [
+ ("pkg:maven/test/test", RepoFinderOutcome.NO_VERSION_PROVIDED),
+ ("pkg:test/test@test", RepoFinderOutcome.UNSUPPORTED_PACKAGE_TYPE),
+ ],
+)
+def test_repo_finder_java_invalid_input(purl_string: str, expected: RepoFinderOutcome) -> None:
+ """Test the Repo Finder when invalid input is provided."""
+ found_repo, outcome = repo_finder.find_repo(PackageURL.from_string(purl_string), False)
+ assert not found_repo
+ assert outcome == expected
+
+
+@pytest.mark.parametrize(
+ ("test_pom", "expected"),
+ [
+ (
+ """
+ #####
+
+
+
+
+ """,
+ RepoFinderOutcome.SCM_NO_URLS,
+ ),
+ (
+ """
+
+
+ TEST
+
+
+ """,
+ RepoFinderOutcome.SCM_NO_VALID_URLS,
+ ),
+ ],
+)
+def test_repo_finder_java_invalid_pom_or_scm(
+ httpserver_java: HTTPServer, test_pom: str, expected: RepoFinderOutcome
+) -> None:
+ """Test the Repo Finder when the POM or SCM metadata is invalid."""
+ group = "oracle"
+ artifact = "macaron"
+ version = "0.3"
+ target_url = "/" + "/".join([group, artifact, version, f"{artifact}-{version}.pom"])
+ httpserver_java.expect_request(target_url).respond_with_data(test_pom)
+
+ found_repo, outcome = repo_finder.find_repo(
+ PackageURL.from_string(f"pkg:maven/{group}/{artifact}@{version}"), False
+ )
+ assert not found_repo
+ assert outcome == expected
+
+
+def test_repo_finder_java_success(httpserver_java: HTTPServer) -> None:
+ """Test the Repo Finder on a repository with a valid POM."""
+ pom = """
+
+
+ https://github.com/oracle/macaron
+
+
+ """
+
+ group = "oracle"
+ artifact = "macaron"
+ version = "0.3"
+ target_url = "/" + "/".join([group, artifact, version, f"{artifact}-{version}.pom"])
+ httpserver_java.expect_request(target_url).respond_with_data(pom)
+
+ found_repo, outcome = repo_finder.find_repo(PackageURL.from_string(f"pkg:maven/{group}/{artifact}@{version}"))
+ assert found_repo
+ assert outcome == RepoFinderOutcome.FOUND
+
+
+def test_repo_finder_java_success_via_parent(httpserver_java: HTTPServer) -> None:
+ """Test the Repo Finder on a repository with a valid parent POM."""
+ pom = """
+
+
+ oracle
+ macaron
+ 0.4
+
+
+ """
+
+ parent_pom = """
+
+
+ https://github.com/oracle/macaron
+
+
+ """
+
+ group = "oracle"
+ artifact = "macaron"
+ version = "0.3"
+ target_url = "/" + "/".join([group, artifact, version, f"{artifact}-{version}.pom"])
+ httpserver_java.expect_request(target_url).respond_with_data(pom)
+
+ parent_version = "0.4"
+ parent_url = "/" + "/".join([group, artifact, parent_version, f"{artifact}-{parent_version}.pom"])
+ httpserver_java.expect_request(parent_url).respond_with_data(parent_pom)
- # Retrieve SCM from POM.
- assert expected == repo_finder._read_pom(pom_text) # pylint: disable=W0212
+ found_repo, outcome = repo_finder.find_repo(PackageURL.from_string(f"pkg:maven/{group}/{artifact}@{version}"))
+ assert found_repo
+ assert outcome == RepoFinderOutcome.FOUND_FROM_PARENT
diff --git a/tests/slsa_analyzer/checks/test_registry_e2e.py b/tests/slsa_analyzer/checks/test_registry_e2e.py
index 81be21580..63f54e07c 100644
--- a/tests/slsa_analyzer/checks/test_registry_e2e.py
+++ b/tests/slsa_analyzer/checks/test_registry_e2e.py
@@ -3,7 +3,7 @@
"""This module contains an end-to-end test for the check registry."""
-from macaron.database.table_definitions import Analysis, Component, Repository
+from macaron.database.table_definitions import Analysis, Component, RepoFinderMetadata, Repository
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
from macaron.slsa_analyzer.checks.base_check import BaseCheck
from macaron.slsa_analyzer.checks.check_result import CheckResultData, CheckResultType
@@ -118,6 +118,7 @@ def test_registry_e2e(self) -> None:
purl="pkg:github.com/package-url/purl-spec@244fd47e07d1004f0aed9c",
analysis=Analysis(),
repository=Repository(complete_name="github.com/package-url/purl-spec", fs_path=""),
+ repo_finder_metadata=RepoFinderMetadata(),
)
target = AnalyzeContext(component=component)
results = registry.scan(target)
diff --git a/tests/slsa_analyzer/mock_git_utils.py b/tests/slsa_analyzer/mock_git_utils.py
index 680515983..d5a0f918f 100644
--- a/tests/slsa_analyzer/mock_git_utils.py
+++ b/tests/slsa_analyzer/mock_git_utils.py
@@ -11,7 +11,7 @@
from git.exc import GitError
from pydriller.git import Git
-from macaron.database.table_definitions import Analysis, Component, Repository
+from macaron.database.table_definitions import Analysis, Component, RepoFinderMetadata, Repository
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
@@ -111,6 +111,7 @@ def prepare_repo_for_testing(
files=git_repo.files(),
fs_path=str(repo_path),
),
+ repo_finder_metadata=RepoFinderMetadata(),
)
analyze_ctx = AnalyzeContext(component=component, macaron_path=str(macaron_path), output_dir=str(output_dir))
diff --git a/tests/slsa_analyzer/test_analyzer.py b/tests/slsa_analyzer/test_analyzer.py
index d2b754cba..19c971af9 100644
--- a/tests/slsa_analyzer/test_analyzer.py
+++ b/tests/slsa_analyzer/test_analyzer.py
@@ -11,6 +11,7 @@
from macaron.config.target_config import Configuration
from macaron.errors import InvalidAnalysisTargetError, InvalidPURLError
+from macaron.repo_finder.repo_finder_enums import RepoFinderOutcome
from macaron.slsa_analyzer.analyzer import Analyzer
@@ -25,13 +26,18 @@
repo_path="https://github.com/apache/maven",
branch="",
digest="",
+ repo_finder_outcome=RepoFinderOutcome.NOT_USED,
),
),
(
Configuration({"purl": "", "path": "https://github.com/apache/maven"}),
["github.com", "gitlab.com", "bitbucket.org"],
Analyzer.AnalysisTarget(
- parsed_purl=None, repo_path="https://github.com/apache/maven", branch="", digest=""
+ parsed_purl=None,
+ repo_path="https://github.com/apache/maven",
+ branch="",
+ digest="",
+ repo_finder_outcome=RepoFinderOutcome.NOT_USED,
),
),
(
@@ -42,6 +48,7 @@
repo_path="https://github.com/apache/maven",
branch="",
digest="",
+ repo_finder_outcome=RepoFinderOutcome.NOT_USED,
),
),
(
@@ -59,6 +66,7 @@
repo_path="https://github.com/apache/maven",
branch="master",
digest="abcxyz",
+ repo_finder_outcome=RepoFinderOutcome.NOT_USED,
),
),
],