Skip to content

Commit

Permalink
chore: rename comparison function; update commit finder failure messa…
Browse files Browse the repository at this point in the history
…ge; update e2e test file path; extract purl type check into standalone function and add unit test

Signed-off-by: Ben Selwyn-Smith <[email protected]>
  • Loading branch information
benmss committed Nov 29, 2023
1 parent 1948200 commit 79349a6
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 23 deletions.
66 changes: 54 additions & 12 deletions src/macaron/repo_finder/commit_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""This module contains the logic for matching PackageURL versions to repository commits via the tags they contain."""
import logging
import re
from enum import Enum
from re import Pattern

from git import TagReference
Expand Down Expand Up @@ -101,6 +102,17 @@
versioned_string = re.compile("[a-z]+[0-9]+$", flags=re.IGNORECASE) # e.g. RC1, M5, etc.


class PurlType(Enum):
"""The type represented by a PURL in terms of repository versus artifact.
Unsupported types are allowed as a third type.
"""

REPOSITORY = (0,)
ARTIFACT = (1,)
UNSUPPORTED = (2,)


def find_commit(git_obj: Git, purl: PackageURL) -> tuple[str, str]:
"""Try to find the commit matching the passed PURL.
Expand All @@ -125,18 +137,40 @@ def find_commit(git_obj: Git, purl: PackageURL) -> tuple[str, str]:
logger.debug("Missing version for analysis target: %s", purl.name)
return "", ""

repo_type = abstract_purl_type(purl)
if repo_type == PurlType.REPOSITORY:
return extract_commit_from_version(git_obj, version)
if repo_type == PurlType.ARTIFACT:
return find_commit_from_version_and_name(git_obj, purl.name, version)
logger.debug("Type of PURL is not supported for commit finding: %s", purl.type)
return "", ""


def abstract_purl_type(purl: PackageURL) -> PurlType:
"""Determine if the passed purl is a repository type, artifact type, or unsupported type.
Parameters
----------
purl: PackageURL
A PURL that represents a repository, artifact, or something that is not supported.
Returns
-------
PurlType:
The identified type of the PURL.
"""
available_domains = [git_service.hostname for git_service in GIT_SERVICES if git_service.hostname]
domain = to_domain_from_known_purl_types(purl.type) or (purl.type if purl.type in available_domains else None)
if domain:
# PURL is a repository type.
return extract_commit_from_version(git_obj, version)
return PurlType.REPOSITORY
try:
repo_finder_deps_dev.DepsDevType(purl.type)
# PURL is a package manager type.
return find_commit_from_version_and_name(git_obj, purl.name, version)
# PURL is an artifact type.
return PurlType.ARTIFACT
except ValueError:
logger.debug("Type of PURL is not supported for commit finding: %s", purl.type)
return "", ""
# PURL is an unsupported type.
return PurlType.UNSUPPORTED


def extract_commit_from_version(git_obj: Git, version: str) -> tuple[str, str]:
Expand Down Expand Up @@ -236,19 +270,25 @@ def find_commit_from_version_and_name(git_obj: Git, name: str, version: str) ->
logger.debug("Missing tag name from tag dict: %s not in %s", tag_name, valid_tags.keys())

branch_name = _get_branch_of_commit(git_obj.get_commit_from_tag(tag_name))
try:
hexsha = tag.commit.hexsha
except ValueError:
logger.debug("Error trying to retrieve digest of commit: %s", tag.commit)
return "", ""

if not branch_name:
logger.debug("No valid branch associated with tag (commit): %s (%s)", tag_name, tag.commit.hexsha)
logger.debug("No valid branch associated with tag (commit): %s (%s)", tag_name, hexsha)
return "", ""

logger.debug(
"Found tag %s with commit %s of branch %s for artifact version %s@%s",
tag,
tag.commit.hexsha,
hexsha,
branch_name,
name,
version,
)
return branch_name, tag.commit.hexsha
return branch_name, hexsha


def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, list[str], bool]:
Expand Down Expand Up @@ -349,9 +389,9 @@ def match_tags(tag_list: list[str], artifact_name: str, artifact_version: str) -
tag_list: list[str]
The list of tags to check.
artifact_name: str
The name of the artifact to match.
The name of the analysis target.
artifact_version: str
The version of the artifact to match.
The version of the analysis target.
Returns
-------
Expand Down Expand Up @@ -419,13 +459,15 @@ def match_tags(tag_list: list[str], artifact_name: str, artifact_version: str) -
# If multiple tags still remain, sort them based on the closest match in terms of individual parts.
if len(matched_tags) > 1:
matched_tags.sort(
key=lambda matched_tag: _count_parts_in_tag(matched_tag["version"], matched_tag["suffix"], parts)
key=lambda matched_tag: _compute_tag_version_similarity(
matched_tag["version"], matched_tag["suffix"], parts
)
)

return [_["tag"] for _ in matched_tags]


def _count_parts_in_tag(tag_version: str, tag_suffix: str, version_parts: list[str]) -> int:
def _compute_tag_version_similarity(tag_version: str, tag_suffix: str, version_parts: list[str]) -> int:
"""Return a sort value based on how well the tag version and tag suffix match the parts of the actual version.
Parameters
Expand Down
12 changes: 6 additions & 6 deletions src/macaron/repo_finder/repo_finder_deps_dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""This module contains the PythonRepoFinderDD class to be used for finding repositories using deps.dev."""
import json
import logging
from enum import Enum
from enum import StrEnum
from urllib.parse import quote as encode

from packageurl import PackageURL
Expand All @@ -16,17 +16,17 @@
logger: logging.Logger = logging.getLogger(__name__)


class DepsDevType(Enum):
class DepsDevType(StrEnum):
"""
The package manager types supported by deps.dev.
This enum should be updated based on updates to deps.dev.
"""

MAVEN = ("maven",)
PYPI = ("pypi",)
NUGET = ("nuget",)
CARGO = ("cargo",)
MAVEN = "maven"
PYPI = "pypi"
NUGET = "nuget"
CARGO = "cargo"
NPM = "npm"


Expand Down
4 changes: 3 additions & 1 deletion src/macaron/slsa_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,9 @@ def _prepare_repo(
if not digest and purl and purl.version:
branch_name, digest = find_commit(git_obj, purl)
if not (branch_name and digest):
logger.error("Could not map purl version to specific commit in repository.")
logger.error(
"Could not map the input purl string to a specific commit in the corresponding repository."
)
return None

# Checking out the specific branch or commit. This operation varies depends on the git service that the
Expand Down
8 changes: 4 additions & 4 deletions tests/e2e/repo_finder/commit_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
# Set logging debug level.
logger.setLevel(logging.DEBUG)

path = Path(__file__).parent.joinpath("resources", "java_tags.json")
java_tags_file_path = Path(__file__).parent.joinpath("resources", "java_tags.json")


def test_commit_finder() -> int:
"""Test the commit finder's tag matching functionality."""
with open(path, encoding="utf-8") as tag_file:
with open(java_tags_file_path, encoding="utf-8") as tag_file:
json_data = json.load(tag_file)
fail_count = 0
for item in json_data:
Expand Down Expand Up @@ -52,15 +52,15 @@ def test_commit_finder() -> int:
def update_commit_finder_results() -> None:
"""Run the commit finder with the current results file and update the match values (override the file)."""
# pylint: disable=protected-access
with open(path, encoding="utf-8") as tag_file:
with open(java_tags_file_path, encoding="utf-8") as tag_file:
json_data = json.load(tag_file)
for item in json_data:
name = str(item["name"])
name, version = name.split("@")
matched_tags = commit_finder.match_tags(item["tags"], name, version)
matched_tag = matched_tags[0] if matched_tags else ""
item["match"] = matched_tag
with open(path, "w", encoding="utf-8") as tag_file:
with open(java_tags_file_path, "w", encoding="utf-8") as tag_file:
json.dump(json_data, tag_file, indent=4)


Expand Down
39 changes: 39 additions & 0 deletions tests/repo_finder/test_commit_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
import re

import hypothesis
import pytest
from hypothesis import given, settings
from hypothesis.strategies import DataObject, data, text
from packageurl import PackageURL

from macaron.repo_finder import commit_finder
from macaron.repo_finder.commit_finder import PurlType

logger: logging.Logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -42,6 +44,43 @@ def _test_version(tags: list[str], name: str, version: str, target_tag: str) ->
assert matched_tags[0] == target_tag


@pytest.mark.parametrize(
("purls", "expected"),
[
pytest.param(
[
"pkg:maven/apache/maven",
"pkg:maven/commons-io/[email protected]",
"pkg:pypi/[email protected]",
"pkg:npm/@colors/[email protected]",
"pkg:nuget/[email protected]",
"pkg:cargo/[email protected]",
],
PurlType.ARTIFACT,
id="Artifact PURLs",
),
pytest.param(
[
"pkg:github/apache/maven@69bc993b8089a2d3d1ddfd6c7d4f8dc6cc205995",
"pkg:github/oracle/[email protected]",
"pkg:bitbucket/owner/project@tag_5",
],
PurlType.REPOSITORY,
id="Repository PURLs",
),
pytest.param(
["pkg:gem/[email protected]", "pkg:unknown-domain/project/owner@tag"],
PurlType.UNSUPPORTED,
id="Unsupported PURLs",
),
],
)
def test_abstract_purl_type(purls: list[str], expected: PurlType) -> None:
"""Test each purl in list is of expected type."""
for purl in purls:
assert commit_finder.abstract_purl_type(PackageURL.from_string(purl)) == expected


@given(text())
@settings(max_examples=1000)
def test_pattern_generation(version: str) -> None:
Expand Down

0 comments on commit 79349a6

Please sign in to comment.