Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: report known malware for all ecosystems #922

Merged
merged 2 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ Macaron checks that report integrity issues but do not map to SLSA requirements
* - Check ID
- Description
* - ``mcn_detect_malicious_metadata_1``
- This check analyzes the metadata of a package and reports malicious behavior. This check currently supports PyPI packages.
- This check performs analysis on PyPI package metadata to detect malicious behavior. It also reports known malware from other ecosystems, but the analysis is currently limited to PyPI packages.

----------------------
How does Macaron work?
Expand Down
4 changes: 3 additions & 1 deletion docs/source/pages/tutorials/detect_malicious_package.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ In this tutorial we show how to use Macaron to find malicious packages. Imagine
:widths: 25
:header-rows: 1

* - Supported packages
* - Supported packages for analysis
* - Python packages (PyPI)

Note that known malware is reported for packages across all ecosystems.

.. contents:: :local:


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@

import logging

from sqlalchemy import ForeignKey
import requests
from sqlalchemy import ForeignKey, String
from sqlalchemy.orm import Mapped, mapped_column

from macaron.database.db_custom_types import DBJsonDict
from macaron.database.table_definitions import CheckFacts
from macaron.json_tools import JsonType
from macaron.json_tools import JsonType, json_extract
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
from macaron.malware_analyzer.pypi_heuristics.metadata.closer_release_join_date import CloserReleaseJoinDateAnalyzer
Expand All @@ -28,6 +29,7 @@
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry
from macaron.slsa_analyzer.registry import registry
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
from macaron.util import send_post_http_raw

logger: logging.Logger = logging.getLogger(__name__)

Expand All @@ -40,10 +42,15 @@ class MaliciousMetadataFacts(CheckFacts):
#: The primary key.
id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003

#: Known malware.
known_malware: Mapped[str | None] = mapped_column(
String, nullable=True, info={"justification": JustificationType.HREF}
)

#: Detailed information about the analysis.
detail_information: Mapped[dict[str, JsonType]] = mapped_column(DBJsonDict, nullable=False)

#: The result of analysis, which is of dict[Heuristics, HeuristicResult] type.
#: The result of analysis, which can be an empty dictionary.
result: Mapped[dict[Heuristics, HeuristicResult]] = mapped_column(
DBJsonDict, nullable=False, info={"justification": JustificationType.TEXT}
)
Expand Down Expand Up @@ -223,14 +230,43 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
CheckResultData
The result of the check.
"""
result_tables: list[CheckFacts] = []
# First check if this package is a known malware

url = "https://api.osv.dev/v1/query"
data = {"package": {"purl": ctx.component.purl}}
response = send_post_http_raw(url, json_data=data, headers=None)
res_obj = None
if response:
try:
res_obj = response.json()
except requests.exceptions.JSONDecodeError as error:
logger.debug("Unable to get a valid response from %s: %s", url, error)
if res_obj:
for vuln in res_obj.get("vulns", {}):
v_id = json_extract(vuln, ["id"], str)
if v_id and v_id.startswith("MAL-"):
result_tables.append(
MaliciousMetadataFacts(
known_malware=f"https://osv.dev/vulnerability/{v_id}",
result={},
detail_information=vuln,
confidence=Confidence.HIGH,
)
)
if result_tables:
return CheckResultData(
result_tables=result_tables,
result_type=CheckResultType.FAILED,
)

package_registry_info_entries = ctx.dynamic_data["package_registries"]
for package_registry_info_entry in package_registry_info_entries:
match package_registry_info_entry:
case PackageRegistryInfo(
build_tool=Pip() | Poetry(),
package_registry=PyPIRegistry() as pypi_registry,
) as pypi_registry_info:
result_tables: list[CheckFacts] = []

# Create an AssetLocator object for the PyPI package JSON object.
pypi_package_json = PyPIPackageJsonAsset(
Expand Down
74 changes: 74 additions & 0 deletions src/macaron/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,80 @@ def send_get_http_raw(
return response


def send_post_http_raw(
nicallen marked this conversation as resolved.
Show resolved Hide resolved
url: str,
json_data: dict | None = None,
headers: dict | None = None,
timeout: int | None = None,
allow_redirects: bool = True,
) -> Response | None:
"""Send a POST HTTP request with the given url, data, and headers.

This method also handle logging when the API server returns error status code.

Parameters
----------
url : str
The url of the request.
json_data: dict | None
The request payload.
headers : dict | None
The dict that describes the headers of the request.
timeout: int | None
The request timeout (optional).
allow_redirects: bool
Whether to allow redirects. Default: True.

Returns
-------
Response | None
If a Response object is returned and ``allow_redirects`` is ``True`` (the default) it will have a status code of
200 (OK). If ``allow_redirects`` is ``False`` the response can instead have a status code of 302. Otherwise, the
request has failed and ``None`` will be returned.
"""
logger.debug("POST - %s", url)
if not timeout:
timeout = defaults.getint("requests", "timeout", fallback=10)
error_retries = defaults.getint("requests", "error_retries", fallback=5)
retry_counter = error_retries
try:
response = requests.post(
url=url,
json=json_data,
headers=headers,
timeout=timeout,
allow_redirects=allow_redirects,
)
except requests.exceptions.RequestException as error:
logger.debug(error)
return None
if not allow_redirects and response.status_code == 302:
# Found, most likely because a redirect is about to happen.
return response
while response.status_code != 200:
logger.debug(
"Receiving error code %s from server.",
response.status_code,
)
if retry_counter <= 0:
logger.debug("Maximum retries reached: %s", error_retries)
return None
if response.status_code == 403:
check_rate_limit(response)
behnazh-w marked this conversation as resolved.
Show resolved Hide resolved
else:
return None
retry_counter = retry_counter - 1
response = requests.post(
url=url,
json=json_data,
headers=headers,
timeout=timeout,
allow_redirects=allow_redirects,
)

return response


def check_rate_limit(response: Response) -> None:
"""Check the remaining calls limit to GitHub API and wait accordingly.

Expand Down
10 changes: 10 additions & 0 deletions tests/integration/cases/tautoak4-hello-world/policy.dl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */

#include "prelude.dl"

Policy("check-malicious-package", component_id, "Check the malicious package.") :-
check_failed(component_id, "mcn_detect_malicious_metadata_1").

apply_policy_to("check-malicious-package", component_id) :-
is_component(component_id, "pkg:npm/tautoak4-hello-world").
21 changes: 21 additions & 0 deletions tests/integration/cases/tautoak4-hello-world/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

description: |
Analyzing a known malicious package.
tags:
- macaron-python-package
- macaron-docker-image

steps:
- name: Run macaron analyze
kind: analyze
options:
command_args:
- -purl
- pkg:npm/tautoak4-hello-world
- name: Run macaron verify-policy to verify that the malicious metadata check fails.
kind: verify
options:
policy: policy.dl
10 changes: 10 additions & 0 deletions tests/integration/cases/type-extension/policy.dl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */

#include "prelude.dl"

Policy("check-malicious-package", component_id, "Check the malicious package.") :-
check_failed(component_id, "mcn_detect_malicious_metadata_1").

apply_policy_to("check-malicious-package", component_id) :-
is_component(component_id, "pkg:pypi/type-extension").
21 changes: 21 additions & 0 deletions tests/integration/cases/type-extension/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

description: |
Analyzing a known malicious package.

tags:
- macaron-python-package
- macaron-docker-image

steps:
- name: Run macaron analyze
kind: analyze
options:
command_args:
- -purl
- pkg:pypi/type-extension
- name: Run macaron verify-policy to verify that the malicious metadata check fails.
kind: verify
options:
policy: policy.dl
Loading