Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add command to run repo and commit finder without analysis #827

Merged
merged 17 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions src/macaron/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from macaron.errors import ConfigurationError
from macaron.output_reporter.reporter import HTMLReporter, JSONReporter, PolicyReporter
from macaron.policy_engine.policy_engine import run_policy_engine, show_prelude
from macaron.repo_finder import repo_finder
from macaron.slsa_analyzer.analyzer import Analyzer
from macaron.slsa_analyzer.git_service import GIT_SERVICES
from macaron.slsa_analyzer.package_registry import PACKAGE_REGISTRIES
Expand Down Expand Up @@ -212,6 +213,14 @@ def verify_policy(verify_policy_args: argparse.Namespace) -> int:
return os.EX_USAGE


def find_source(find_args: argparse.Namespace) -> int:
"""Perform repo and commit finding for a passed PURL, or commit finding for a passed PURL and repo."""
if repo_finder.find_source(find_args.package_url, find_args.repo_path or None):
return os.EX_OK

return os.EX_DATAERR


def perform_action(action_args: argparse.Namespace) -> None:
"""Perform the indicated action of Macaron."""
match action_args.action:
Expand Down Expand Up @@ -239,6 +248,17 @@ def perform_action(action_args: argparse.Namespace) -> None:
sys.exit(os.EX_USAGE)

analyze_slsa_levels_single(action_args)

case "find-source":
try:
for git_service in GIT_SERVICES:
git_service.load_defaults()
except ConfigurationError as error:
logger.error(error)
sys.exit(os.EX_USAGE)

find_source(action_args)

case _:
logger.error("Macaron does not support command option %s.", action_args.action)
sys.exit(os.EX_USAGE)
Expand Down Expand Up @@ -444,6 +464,28 @@ def main(argv: list[str] | None = None) -> None:
vp_group.add_argument("-f", "--file", type=str, help="Path to the Datalog policy.")
vp_group.add_argument("-s", "--show-prelude", action="store_true", help="Show policy prelude.")

# Find the repo and commit of a passed PURL, or the commit of a passed PURL and repo.
find_parser = sub_parser.add_parser(name="find-source")

find_parser.add_argument(
"-purl",
"--package-url",
required=True,
type=str,
help=("The PURL string to perform repository and commit finding for."),
)

find_parser.add_argument(
"-rp",
"--repo-path",
required=False,
type=str,
help=(
"The path to a repository that matches the provided PURL, can be local or remote. "
"This argument is only required in cases where the repository cannot be discovered automatically."
),
)

args = main_parser.parse_args(argv)

if not args.action:
Expand Down
1 change: 1 addition & 0 deletions src/macaron/config/defaults.ini
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ use_open_source_insights = True
redirect_urls =
gitbox.apache.org
git-wip-us.apache.org
find_source_should_clone = False

[repofinder.java]
# The list of maven-like repositories to attempt to retrieve artifact POMs from.
Expand Down
24 changes: 23 additions & 1 deletion src/macaron/repo_finder/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,26 @@
# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This package contains the dependency resolvers for Java projects."""


def to_domain_from_known_purl_types(purl_type: str) -> str | None:
"""Return the git service domain from a known web-based purl type.

This method is used to handle cases where the purl type value is not the git domain but a pre-defined
repo-based type in https://github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst.

Note that this method will be updated when there are new pre-defined types as per the PURL specification.

Parameters
----------
purl_type : str
The type field of the PURL.

Returns
-------
str | None
The git service domain corresponding to the purl type or None if the purl type is unknown.
"""
known_types = {"github": "github.com", "bitbucket": "bitbucket.org"}
return known_types.get(purl_type, None)
3 changes: 1 addition & 2 deletions src/macaron/repo_finder/commit_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
from packageurl import PackageURL
from pydriller import Commit, Git

from macaron.repo_finder import repo_finder_deps_dev
from macaron.repo_finder.repo_finder import to_domain_from_known_purl_types
from macaron.repo_finder import repo_finder_deps_dev, to_domain_from_known_purl_types
from macaron.slsa_analyzer.git_service import GIT_SERVICES

logger: logging.Logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion src/macaron/repo_finder/provenance_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@

from macaron.errors import ProvenanceError
from macaron.json_tools import JsonType, json_extract
from macaron.repo_finder import to_domain_from_known_purl_types
from macaron.repo_finder.commit_finder import (
AbstractPurlType,
determine_abstract_purl_type,
extract_commit_from_version,
)
from macaron.repo_finder.repo_finder import to_domain_from_known_purl_types
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, InTotoV1Payload, InTotoV01Payload

logger: logging.Logger = logging.getLogger(__name__)
Expand Down
159 changes: 137 additions & 22 deletions src/macaron/repo_finder/repo_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,18 @@
import os
from urllib.parse import ParseResult, urlunparse

import git
from packageurl import PackageURL

from macaron.config.defaults import defaults
from macaron.config.global_config import global_config
from macaron.repo_finder import to_domain_from_known_purl_types
from macaron.repo_finder.commit_finder import match_tags
from macaron.repo_finder.repo_finder_base import BaseRepoFinder
from macaron.repo_finder.repo_finder_deps_dev import DepsDevRepoFinder
from macaron.repo_finder.repo_finder_java import JavaRepoFinder
from macaron.repo_finder.repo_utils import generate_report, prepare_repo
from macaron.slsa_analyzer.git_url import GIT_REPOS_DIR

logger: logging.Logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -78,28 +84,6 @@ def find_repo(purl: PackageURL) -> str:
return repo_finder.find_repo(purl)


def to_domain_from_known_purl_types(purl_type: str) -> str | None:
"""Return the git service domain from a known web-based purl type.

This method is used to handle cases where the purl type value is not the git domain but a pre-defined
repo-based type in https://github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst.

Note that this method will be updated when there are new pre-defined types as per the PURL specification.

Parameters
----------
purl_type : str
The type field of the PURL.

Returns
-------
str | None
The git service domain corresponding to the purl type or None if the purl type is unknown.
"""
known_types = {"github": "github.com", "bitbucket": "bitbucket.org"}
return known_types.get(purl_type, None)


def to_repo_path(purl: PackageURL, available_domains: list[str]) -> str | None:
"""Return the repository path from the PURL string.

Expand Down Expand Up @@ -148,3 +132,134 @@ def to_repo_path(purl: PackageURL, available_domains: list[str]) -> str | None:
fragment="",
)
)


def find_source(purl_string: str, input_repo: str | None) -> bool:
"""Perform repo and commit finding for a passed PURL, or commit finding for a passed PURL and repo.

Parameters
----------
purl_string: str
The PURL string of the target.
input_repo: str | None
The repository path optionally provided by the user.

Returns
-------
bool
True if the source was found.
"""
try:
purl = PackageURL.from_string(purl_string)
except ValueError as error:
logger.error("Could not parse PURL: %s", error)
return False

if not purl.version:
logger.debug("PURL is missing version.")
return False

found_repo = input_repo
if not input_repo:
logger.debug("Searching for repo of PURL: %s", purl)
found_repo = find_repo(purl)

if not found_repo:
logger.error("Could not find repo for PURL: %s", purl)
return False

# Disable other loggers for cleaner output.
logging.getLogger("macaron.slsa_analyzer.analyzer").disabled = True
logging.getLogger("macaron.slsa_analyzer.git_url").disabled = True

if defaults.getboolean("repofinder", "find_source_should_clone"):
logger.debug("Preparing repo: %s", found_repo)
repo_dir = os.path.join(global_config.output_path, GIT_REPOS_DIR)
git_obj = prepare_repo(
repo_dir,
found_repo,
purl=purl,
)

if not git_obj:
# TODO expand this message to cover cases where the obj was not created due to lack of correct tag.
logger.error("Could not resolve repository: %s", found_repo)
return False

try:
digest = git_obj.get_head().hash
except ValueError:
logger.debug("Could not retrieve commit hash from repository.")
return False
else:
# Retrieve the tags.
tags = get_tags_via_git_remote(found_repo)
if not tags:
return False

matches = match_tags(list(tags.keys()), purl.name, purl.version)

if not matches:
return False

matched_tag = matches[0]
digest = tags[matched_tag]

if not digest:
logger.error("Could not find commit for purl / repository: %s / %s", purl, found_repo)
return False

if not input_repo:
logger.info("Found repository for PURL: %s", found_repo)

logger.info("Found commit for PURL: %s", digest)

if not generate_report(purl_string, digest, found_repo, os.path.join(global_config.output_path, "reports")):
return False

return True


def get_tags_via_git_remote(repo: str) -> dict[str, str] | None:
"""Retrieve all tags from a given repository using ls-remote.

Parameters
----------
repo: str
The repository to perform the operation on.

Returns
-------
dict[str]
A dictionary of tags mapped to their commits, or None if the operation failed..
"""
tags = {}
try:
tag_data = git.cmd.Git().ls_remote("--tags", repo)
except git.exc.GitCommandError as error:
logger.debug("Failed to retrieve tags: %s", error)
return None

for tag_line in tag_data.splitlines():
tag_line = tag_line.strip()
if not tag_line:
continue
split = tag_line.split("\t")
if len(split) != 2:
continue
possible_tag = split[1]
if possible_tag.endswith("^{}"):
possible_tag = possible_tag[:-3]
elif possible_tag in tags:
# If a tag already exists, it must be the annotated reference of an annotated tag.
# In that case we skip the tag as it does not point to the proper source commit.
# Note that this should only happen if the tags are received out of standard order.
continue
possible_tag = possible_tag.replace("refs/tags/", "")
if not possible_tag:
continue
tags[possible_tag] = split[0]

logger.debug("Found %s tags via ls-remote of %s", len(tags), repo)

return tags
2 changes: 1 addition & 1 deletion src/macaron/repo_finder/repo_finder_java.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def find_repo(self, purl: PackageURL) -> str:
limit = defaults.getint("repofinder.java", "parent_limit", fallback=10)

if not version:
logger.debug("Version missing for maven artifact: %s:%s", group, artifact)
logger.info("Version missing for maven artifact: %s:%s", group, artifact)
# TODO add support for Java artifacts without a version
return ""

Expand Down
Loading
Loading