Skip to content

Commit

Permalink
fix: report known malware even when not labeled
Browse files Browse the repository at this point in the history
Signed-off-by: behnazh-w <[email protected]>
  • Loading branch information
behnazh-w committed Jan 5, 2025
1 parent 1ea1bd5 commit 6f9d7f4
Show file tree
Hide file tree
Showing 10 changed files with 229 additions and 64 deletions.
7 changes: 6 additions & 1 deletion src/macaron/config/defaults.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

[requests]
Expand Down Expand Up @@ -538,6 +538,11 @@ registry_url_scheme = https
fileserver_url_netloc = files.pythonhosted.org
fileserver_url_scheme = https

[deps_dev]
url_netloc = api.deps.dev
url_scheme = https
v3alpha_purl_endpoint = v3alpha/purl

# Configuration options for selecting the checks to run.
# Both the exclude and include are defined as list of strings:
# - The exclude list is used to specify the checks that will not run.
Expand Down
49 changes: 28 additions & 21 deletions src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This check examines the metadata of pypi packages with seven heuristics."""
Expand All @@ -11,7 +11,7 @@

from macaron.database.db_custom_types import DBJsonDict
from macaron.database.table_definitions import CheckFacts
from macaron.errors import HeuristicAnalyzerValueError
from macaron.errors import HeuristicAnalyzerValueError, InvalidHTTPResponseError
from macaron.json_tools import JsonType, json_extract
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
Expand All @@ -28,6 +28,7 @@
from macaron.slsa_analyzer.build_tool.poetry import Poetry
from macaron.slsa_analyzer.checks.base_check import BaseCheck
from macaron.slsa_analyzer.checks.check_result import CheckResultData, CheckResultType, Confidence, JustificationType
from macaron.slsa_analyzer.package_registry.deps_dev import DepsDevService
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry
from macaron.slsa_analyzer.registry import registry
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
Expand Down Expand Up @@ -177,7 +178,7 @@ def __init__(self) -> None:
"""Initialize a check instance."""
check_id = "mcn_detect_malicious_metadata_1"
description = """This check analyzes the metadata of a package based on reports malicious behavior.
Supported ecosystem: PyPI.
Supported ecosystem for unknown malware: PyPI.
"""
super().__init__(check_id=check_id, description=description, eval_reqs=[])

Expand Down Expand Up @@ -259,21 +260,28 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
The result of the check.
"""
result_tables: list[CheckFacts] = []
# First check if this package is a known malware
package_registry_info_entries = ctx.dynamic_data["package_registries"]

# First check if this package is a known malware
url = "https://api.osv.dev/v1/query"
data = {"package": {"purl": ctx.component.purl}}
response = send_post_http_raw(url, json_data=data, headers=None)
res_obj = None
if response:
try:
res_obj = response.json()
except requests.exceptions.JSONDecodeError as error:
logger.debug("Unable to get a valid response from %s: %s", url, error)
if res_obj:
for vuln in res_obj.get("vulns", {}):
v_id = json_extract(vuln, ["id"], str)
if v_id and v_id.startswith("MAL-"):

try:
package_exists = bool(DepsDevService.get_package_info(ctx.component.purl))
except InvalidHTTPResponseError as error:
logger.debug(error)

if not package_exists:
response = send_post_http_raw(url, json_data=data, headers=None)
res_obj = None
if response:
try:
res_obj = response.json()
except requests.exceptions.JSONDecodeError as error:
logger.debug("Unable to get a valid response from %s: %s", url, error)
if res_obj:
for vuln in res_obj.get("vulns", {}):
v_id = json_extract(vuln, ["id"], str)
result_tables.append(
MaliciousMetadataFacts(
known_malware=f"https://osv.dev/vulnerability/{v_id}",
Expand All @@ -282,13 +290,12 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
confidence=Confidence.HIGH,
)
)
if result_tables:
return CheckResultData(
result_tables=result_tables,
result_type=CheckResultType.FAILED,
)
if result_tables:
return CheckResultData(
result_tables=result_tables,
result_type=CheckResultType.FAILED,
)

package_registry_info_entries = ctx.dynamic_data["package_registries"]
for package_registry_info_entry in package_registry_info_entries:
match package_registry_info_entry:
case PackageRegistryInfo(
Expand Down
83 changes: 83 additions & 0 deletions src/macaron/slsa_analyzer/package_registry/deps_dev.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains implementation of deps.dev service."""

import json
import logging
import urllib.parse
from urllib.parse import quote as encode

import requests

from macaron.config.defaults import defaults
from macaron.errors import ConfigurationError, InvalidHTTPResponseError
from macaron.util import send_get_http_raw

logger: logging.Logger = logging.getLogger(__name__)


class DepsDevService:
"""The deps.dev service class."""

@staticmethod
def get_package_info(purl: str) -> dict | None:
"""Check if the package identified by the PackageURL (PURL) exists and return its information.
Parameters
----------
purl: str
The PackageURL (PURL).
Returns
-------
dict | None
The package metadata or None if it doesn't exist.
Raises
------
InvalidHTTPResponseError
If a network error happens or unexpected response is returned by the API.
"""
section_name = "deps_dev"
if not defaults.has_section(section_name):
return None
section = defaults[section_name]

url_netloc = section.get("url_netloc")
if not url_netloc:
raise ConfigurationError(
f'The "url_netloc" key is missing in section [{section_name}] of the .ini configuration file.'
)
url_scheme = section.get("url_scheme", "https")
v3alpha_purl_endpoint = section.get("v3alpha_purl_endpoint")
if not v3alpha_purl_endpoint:
raise ConfigurationError(
f'The "v3alpha_purl_endpoint" key is missing in section [{section_name}] of the .ini configuration file.'
)

path_params = "/".join([v3alpha_purl_endpoint, encode(purl, safe="")])
try:
url = urllib.parse.urlunsplit(
urllib.parse.SplitResult(
scheme=url_scheme,
netloc=url_netloc,
path=path_params,
query="",
fragment="",
)
)
except ValueError as error:
raise InvalidHTTPResponseError("Failed to construct the API URL.") from error

response = send_get_http_raw(url)
if response and response.text:
try:
metadata: dict = json.loads(response.text)
except requests.exceptions.JSONDecodeError as error:
raise InvalidHTTPResponseError(f"Failed to process response from deps.dev for {url}.") from error
if not metadata:
raise InvalidHTTPResponseError(f"Empty response returned by {url} .")
return metadata

return None
47 changes: 10 additions & 37 deletions src/macaron/slsa_analyzer/package_registry/package_registry.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,16 @@
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module defines package registries."""

import json
import logging
import urllib.parse
from abc import ABC, abstractmethod
from datetime import datetime
from urllib.parse import quote as encode

import requests

from macaron.errors import InvalidHTTPResponseError
from macaron.json_tools import json_extract
from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
from macaron.util import send_get_http_raw
from macaron.slsa_analyzer.package_registry.deps_dev import DepsDevService

logger: logging.Logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -50,7 +45,7 @@ def is_detected(self, build_tool: BaseBuildTool) -> bool:
based on the given build tool.
"""

def find_publish_timestamp(self, purl: str, registry_url: str | None = None) -> datetime:
def find_publish_timestamp(self, purl: str) -> datetime:
"""Retrieve the publication timestamp for a package specified by its purl from the deps.dev repository by default.
This method constructs a request URL based on the provided purl, sends an HTTP GET
Expand All @@ -65,8 +60,6 @@ def find_publish_timestamp(self, purl: str, registry_url: str | None = None) ->
purl: str
The Package URL (purl) of the package whose publication timestamp is to be retrieved.
This should conform to the PURL specification.
registry_url: str | None
The registry URL that can be set for testing.
Returns
-------
Expand All @@ -86,40 +79,20 @@ def find_publish_timestamp(self, purl: str, registry_url: str | None = None) ->
# in the AnalyzeContext object retrieved by the Repo Finder. This step should be
# implemented at the beginning of the analyze command to ensure that the data
# is available for subsequent processing.

base_url_parsed = urllib.parse.urlparse(registry_url or "https://api.deps.dev")
path_params = "/".join(["v3alpha", "purl", encode(purl, safe="")])
try:
url = urllib.parse.urlunsplit(
urllib.parse.SplitResult(
scheme=base_url_parsed.scheme,
netloc=base_url_parsed.netloc,
path=path_params,
query="",
fragment="",
)
)
except ValueError as error:
raise InvalidHTTPResponseError("Failed to construct the API URL.") from error

response = send_get_http_raw(url)
if response and response.text:
try:
metadata: dict = json.loads(response.text)
except requests.exceptions.JSONDecodeError as error:
raise InvalidHTTPResponseError(f"Failed to process response from deps.dev for {url}.") from error
if not metadata:
raise InvalidHTTPResponseError(f"Empty response returned by {url} .")

metadata = DepsDevService.get_package_info(purl)
except InvalidHTTPResponseError as error:
raise InvalidHTTPResponseError(f"Invalid response from deps.dev for {purl}.") from error
if metadata:
timestamp = json_extract(metadata, ["version", "publishedAt"], str)
if not timestamp:
raise InvalidHTTPResponseError(f"The timestamp is missing in the response returned by {url}.")
raise InvalidHTTPResponseError(f"The timestamp is missing in the response returned for {purl}.")

logger.debug("Found timestamp: %s.", timestamp)

try:
return datetime.fromisoformat(timestamp)
except ValueError as error:
raise InvalidHTTPResponseError(f"The timestamp returned by {url} is invalid") from error
raise InvalidHTTPResponseError(f"The timestamp returned for {purl} is invalid") from error

raise InvalidHTTPResponseError(f"Invalid response from deps.dev for {url}.")
raise InvalidHTTPResponseError(f"Invalid response from deps.dev for {purl}.")
10 changes: 10 additions & 0 deletions tests/integration/cases/ultralytics/policy.dl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */

#include "prelude.dl"

Policy("check-malicious-package", component_id, "Check the malicious package.") :-
check_passed(component_id, "mcn_detect_malicious_metadata_1").

apply_policy_to("check-malicious-package", component_id) :-
is_component(component_id, "pkg:pypi/ultralytics").
21 changes: 21 additions & 0 deletions tests/integration/cases/ultralytics/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

description: |
Analyzing a popular package that some of its versions are compromised.
tags:
- macaron-python-package
- macaron-docker-image

steps:
- name: Run macaron analyze
kind: analyze
options:
command_args:
- -purl
- pkg:pypi/ultralytics
- name: Run macaron verify-policy to verify that the malicious metadata check passes.
kind: verify
options:
policy: policy.dl
10 changes: 10 additions & 0 deletions tests/integration/cases/ultralytics_8.3.46/policy.dl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */

#include "prelude.dl"

Policy("check-malicious-package", component_id, "Check the malicious package.") :-
check_failed(component_id, "mcn_detect_malicious_metadata_1").

apply_policy_to("check-malicious-package", component_id) :-
is_component(component_id, "pkg:pypi/[email protected]").
21 changes: 21 additions & 0 deletions tests/integration/cases/ultralytics_8.3.46/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

description: |
Analyzing a known malicious package.
tags:
- macaron-python-package
- macaron-docker-image

steps:
- name: Run macaron analyze
kind: analyze
options:
command_args:
- -purl
- pkg:pypi/[email protected]
- name: Run macaron verify-policy to verify that the malicious metadata check fails.
kind: verify
options:
policy: policy.dl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""Module to test the malicious metadata detection check."""
Expand Down Expand Up @@ -62,6 +62,10 @@ def test_detect_malicious_metadata(
registry_url_scheme = {base_url_parsed.scheme}
fileserver_url_netloc = {base_url_parsed.netloc}
fileserver_url_scheme = {base_url_parsed.scheme}
[deps_dev]
url_netloc = {base_url_parsed.netloc}
url_scheme = {base_url_parsed.scheme}
"""
user_config_path = os.path.join(tmp_path, "config.ini")
with open(user_config_path, "w", encoding="utf-8") as user_config_file:
Expand Down
Loading

0 comments on commit 6f9d7f4

Please sign in to comment.