From 1dca58d1492cea7be77f04f618f368db55122dea Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Fri, 15 Dec 2023 17:28:03 -0800 Subject: [PATCH 1/7] Add validate endpoint to public API Signed-off-by: Jono Yang --- purldb_public_project/urls.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/purldb_public_project/urls.py b/purldb_public_project/urls.py index c1349d98..a2a779e4 100644 --- a/purldb_public_project/urls.py +++ b/purldb_public_project/urls.py @@ -15,12 +15,15 @@ from rest_framework import routers from packagedb.api import PackagePublicViewSet +from packagedb.api import PurlValidateViewSet from packagedb.api import ResourceViewSet api_router = routers.DefaultRouter() api_router.register('packages', PackagePublicViewSet) api_router.register('resources', ResourceViewSet) +api_router.register('validate', PurlValidateViewSet, 'validate') + urlpatterns = [ path( From 9d84835685c3d48565f8bb6a964a7a1cdd899c21 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 18 Dec 2023 18:21:47 +0530 Subject: [PATCH 2/7] Remove unsupported conan API Signed-off-by: Keshav Priyadarshi --- packagedb/package_managers.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/packagedb/package_managers.py b/packagedb/package_managers.py index 181704fb..fe6ce138 100644 --- a/packagedb/package_managers.py +++ b/packagedb/package_managers.py @@ -442,27 +442,12 @@ def fetch(self, pkg: str) -> Iterable[PackageVersion]: url=f"https://hex.pm/api/packages/{pkg}", content_type="json", ) - for release in response["releases"]: - yield PackageVersion( - value=release["version"], - release_date=dateparser.parse(release["inserted_at"]), - ) - - -class ConanVersionAPI(VersionAPI): - """ - Fetch versions of ``conan`` packages from the Conan API - """ - - package_type = "conan" - - def fetch(self, pkg: str) -> Iterable[PackageVersion]: - response = get_response( - url=f"https://conan.io/center/api/ui/details?name={pkg}&user=_&channel=_", - content_type="json", - ) - for release in response["versions"]: - yield PackageVersion(value=release["version"]) + if response: + for release in response["releases"]: + yield PackageVersion( + value=release["version"], + release_date=dateparser.parse(release["inserted_at"]), + ) class GoproxyVersionAPI(VersionAPI): @@ -596,7 +581,6 @@ def fetch(self, pkg: str) -> Iterable[PackageVersion]: LaunchpadVersionAPI, CratesVersionAPI, DebianVersionAPI, - ConanVersionAPI, } From fe1b29ee5ba16284005a5e1393dfd85251749115 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 18 Dec 2023 18:26:18 +0530 Subject: [PATCH 3/7] Improve handling of unsupported ecosystem Signed-off-by: Keshav Priyadarshi --- packagedb/api.py | 105 +++++++++++++++++++----------------- packagedb/serializers.py | 13 +++++ packagedb/tests/test_api.py | 27 ++++++++++ 3 files changed, 96 insertions(+), 49 deletions(-) diff --git a/packagedb/api.py b/packagedb/api.py index 45c61a21..cffe74fc 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -38,7 +38,7 @@ get_version_fetcher) from packagedb.serializers import (DependentPackageSerializer, PackageAPISerializer, - PackageSetAPISerializer, PartySerializer, + PackageSetAPISerializer, PartySerializer, PurlValidateResponseSerializer, PurlValidateSerializer, ResourceAPISerializer) from packagedb.throttling import StaffUserRateThrottle @@ -694,15 +694,13 @@ class PurlValidateViewSet(viewsets.ViewSet): Take a `purl` and check whether it's valid PackageURL or not. Optionally set `check_existence` to true to check whether the package exists in real world. - **Note:** As of now `check_existence` only supports `apache`, `composer`, `deb`, `gem`, - `github`, `golang`, `maven`, `npm`, `nuget`and `pypi` ecosystems. + **Note:** As of now `check_existence` only supports `cargo`, `composer`, `deb`, + `gem`, `golang`, `hex`, `maven`, `npm`, `nuget` and `pypi` ecosystems. - **Input example:** - - { - "purl": "pkg:npm/foobar@12.3.1", - "check_existence": true, - } + **Example request:** + ```doc + GET /api/validate/?purl=pkg:npm/foobar@12.3.1&check_existence=false + ``` Response contains: @@ -711,12 +709,20 @@ class PurlValidateViewSet(viewsets.ViewSet): - exists - True, if input PURL exists in real world and `check_existence` flag is enabled. """ + serializer_class = PurlValidateSerializer + def get_view_name(self): return 'Validate PURL' def list(self, request): - purl = request.query_params.get("purl") - check_existence = request.query_params.get("check_existence") or False + serializer = self.serializer_class(data=request.query_params) + + if not serializer.is_valid(): + return Response({'errors': serializer.errors}, status=status.HTTP_400_BAD_REQUEST) + + validated_data = serializer.validated_data + purl = validated_data.get('purl') + check_existence = validated_data.get('check_existence', False) message_valid = "The provided PackageURL is valid." message_not_valid = "The provided PackageURL is not valid." @@ -724,64 +730,65 @@ def list(self, request): "The provided Package URL is valid, and the package exists in the upstream repo." ) message_valid_but_does_not_exist = ( - "The provided PackageURL is valid but does not exist in the upstream repo." + "The provided PackageURL is valid, but does not exist in the upstream repo." ) - message_error_no_purl = ( - "PackageURL (purl) is required. Please provide a PackageURL in the request." + message_valid_but_package_type_not_supported = ( + "The provided PackageURL is valid, but `check_existence` is not supported for this package type." ) - if not purl: - return Response( - { - "error": "Bad Request", - "message": message_error_no_purl, - }, - status=status.HTTP_400_BAD_REQUEST, - ) + response = {} + response['exists'] = None + response['purl'] = purl + response['valid'] = False + response['message'] = message_not_valid # validate purl try: package_url = PackageURL.from_string(purl) except ValueError: - return Response( - { - "valid": False, - "message": message_not_valid, - "purl": purl, - } - ) + serializer = PurlValidateResponseSerializer(response, context={'request': request}) + return Response(serializer.data) - exists = None - message = message_valid + + response['valid'] = True + response["message"] = message_valid + unsupported_ecosystem = False if check_existence: - exists = False + response['exists'] = False lookups = purl_to_lookups(purl) packages = Package.objects.filter(**lookups) if packages.exists(): - exists = True + response['exists'] = True else: versionless_purl = PackageURL( type=package_url.type, namespace=package_url.namespace, name=package_url.name, ) - all_versions = get_all_versions_plain(versionless_purl) - if (all_versions and not package_url.version) or ( - package_url.version in all_versions + if ( + package_url.type in VERSION_API_CLASSES_BY_PACKAGE_TYPE + and package_url.type in VERSION_CLASS_BY_PACKAGE_TYPE ): - # True, if requested purl has no version and any version of package exists upstream. - # True, if requested purl.version exists upstream. - exists = True - message = message_valid_and_exists if exists else message_valid_but_does_not_exist - - return Response( - { - "valid": True, - "exists": exists, - "message": message, - "purl": purl, - } - ) + all_versions = get_all_versions_plain(versionless_purl) + if all_versions and (not package_url.version or ( + package_url.version in all_versions) + ): + # True, if requested purl has no version and any version of package exists upstream. + # True, if requested purl.version exists upstream. + response['exists'] = True + else: + unsupported_ecosystem = True + + if response['exists']: + response["message"] = message_valid_and_exists + elif unsupported_ecosystem: + response['exists'] = None + response["message"] = message_valid_but_package_type_not_supported + else: + response["message"] =message_valid_but_does_not_exist + + serializer = PurlValidateResponseSerializer(response, context={'request': request}) + return Response(serializer.data) def get_resolved_purls(packages, supported_ecosystems): diff --git a/packagedb/serializers.py b/packagedb/serializers.py index 7dbef5f5..180581d9 100644 --- a/packagedb/serializers.py +++ b/packagedb/serializers.py @@ -10,6 +10,7 @@ from django.http import HttpRequest from django.urls import reverse_lazy +from rest_framework.serializers import BooleanField from rest_framework.serializers import CharField from rest_framework.serializers import HyperlinkedIdentityField from rest_framework.serializers import HyperlinkedModelSerializer @@ -17,6 +18,7 @@ from rest_framework.serializers import JSONField from rest_framework.serializers import ModelSerializer from rest_framework.serializers import SerializerMethodField +from rest_framework.serializers import Serializer from packagedb.models import DependentPackage from packagedb.models import Package @@ -328,3 +330,14 @@ class Meta: 'uuid', 'packages', ] + + +class PurlValidateResponseSerializer(Serializer): + valid = BooleanField() + exists = BooleanField(required=False) + message = CharField() + purl = CharField() + +class PurlValidateSerializer(Serializer): + purl = CharField(required=True) + check_existence = BooleanField(required=False, default=False) \ No newline at end of file diff --git a/packagedb/tests/test_api.py b/packagedb/tests/test_api.py index 6277da42..6507e2c6 100644 --- a/packagedb/tests/test_api.py +++ b/packagedb/tests/test_api.py @@ -1151,4 +1151,31 @@ def test_api_purl_validation(self): self.assertEquals( "The provided PackageURL is not valid.", response2.data["message"] ) + + def test_api_purl_validation_unsupported_package_type(self): + data1 = { + "purl": "pkg:random/foobar@1.1.0", + "check_existence": True, + } + response1 = self.client.get(f"/api/validate/", data=data1) + + + self.assertEquals(True, response1.data["valid"]) + self.assertEquals( + "The provided PackageURL is valid, but `check_existence` is not supported for this package type.", response1.data["message"] + ) + self.assertEquals(None, response1.data["exists"]) + + def test_api_purl_validation_empty_request(self): + data1 = {} + response1 = self.client.get(f"/api/validate/", data=data1) + + expected = { + "errors": { + "purl": [ + "This field is required." + ] + } + } + self.assertAlmostEquals(expected, response1.data) From aa723837dbd0e5823d774a8998ee9a761d0a0422 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Mon, 18 Dec 2023 18:50:07 -0800 Subject: [PATCH 4/7] Update matchcode-toolkit entrypoints #263 * Remove ScanAndFingerprint pipeline from matchcode-toolkit entrypoints * Bump version to 2.0.0 Signed-off-by: Jono Yang --- matchcode-toolkit/CHANGELOG.rst | 5 +++++ matchcode-toolkit/pyproject.toml | 2 +- matchcode-toolkit/setup.cfg | 5 +---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/matchcode-toolkit/CHANGELOG.rst b/matchcode-toolkit/CHANGELOG.rst index c0a0e6b3..b53be960 100644 --- a/matchcode-toolkit/CHANGELOG.rst +++ b/matchcode-toolkit/CHANGELOG.rst @@ -1,6 +1,11 @@ Changelog ========= +v2.0.0 +------ + +*2023-12-18* -- Remove ``ScanAndFingerprintPackage`` pipeline from matchcode-toolkit's entry points. (https://github.com/nexB/purldb/issues/263) + v1.1.3 ------ diff --git a/matchcode-toolkit/pyproject.toml b/matchcode-toolkit/pyproject.toml index d9e6da52..06b7289a 100644 --- a/matchcode-toolkit/pyproject.toml +++ b/matchcode-toolkit/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "matchcode-toolkit" -version = "1.1.3" +version = "2.0.0" [build-system] requires = ["setuptools >= 50", "wheel", "setuptools_scm[toml] >= 6"] diff --git a/matchcode-toolkit/setup.cfg b/matchcode-toolkit/setup.cfg index 34045a93..9fc861c0 100644 --- a/matchcode-toolkit/setup.cfg +++ b/matchcode-toolkit/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = matchcode-toolkit -version = 1.1.3 +version = 2.0.0 license = Apache-2.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 @@ -65,6 +65,3 @@ docs = [options.entry_points] scancode_post_scan = fingerprint = matchcode_toolkit.plugin_fingerprint:Fingerprint - -scancodeio_pipelines = - scan_and_fingerprint_package = matchcode_toolkit.pipelines.scan_and_fingerprint_package:ScanAndFingerprintPackage From 2169e505de693803a40c519f36d07fe78e04210a Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Tue, 19 Dec 2023 09:55:59 -0800 Subject: [PATCH 5/7] Update ScanAndFingerprintPackage pipeline #263 * Bump version to 2.0.1 Signed-off-by: Jono Yang --- matchcode-toolkit/pyproject.toml | 2 +- matchcode-toolkit/setup.cfg | 2 +- .../pipelines/scan_and_fingerprint_package.py | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/matchcode-toolkit/pyproject.toml b/matchcode-toolkit/pyproject.toml index 06b7289a..820a8f08 100644 --- a/matchcode-toolkit/pyproject.toml +++ b/matchcode-toolkit/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "matchcode-toolkit" -version = "2.0.0" +version = "2.0.1" [build-system] requires = ["setuptools >= 50", "wheel", "setuptools_scm[toml] >= 6"] diff --git a/matchcode-toolkit/setup.cfg b/matchcode-toolkit/setup.cfg index 9fc861c0..23c2c074 100644 --- a/matchcode-toolkit/setup.cfg +++ b/matchcode-toolkit/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = matchcode-toolkit -version = 2.0.0 +version = 2.0.1 license = Apache-2.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 diff --git a/matchcode-toolkit/src/matchcode_toolkit/pipelines/scan_and_fingerprint_package.py b/matchcode-toolkit/src/matchcode_toolkit/pipelines/scan_and_fingerprint_package.py index 0f15d13a..badef647 100644 --- a/matchcode-toolkit/src/matchcode_toolkit/pipelines/scan_and_fingerprint_package.py +++ b/matchcode-toolkit/src/matchcode_toolkit/pipelines/scan_and_fingerprint_package.py @@ -26,8 +26,8 @@ class ScanAndFingerprintPackage(ScanPackage): """ - Scan a single package archive with ScanCode-toolkit, then calculate the - directory fingerprints of the codebase. + Scan a single package file or package archive with ScanCode-toolkit, then + calculate the directory fingerprints of the codebase. The output is a summary of the scan results in JSON format. """ @@ -35,10 +35,10 @@ class ScanAndFingerprintPackage(ScanPackage): @classmethod def steps(cls): return ( - cls.get_package_archive_input, - cls.collect_archive_information, - cls.extract_archive_to_codebase_directory, - cls.run_scancode, + cls.get_package_input, + cls.collect_input_information, + cls.extract_input_to_codebase_directory, + cls.run_scan, cls.load_inventory_from_toolkit_scan, cls.fingerprint_codebase, cls.make_summary_from_scan_results, From 073fe2d87104216a2b568f45e2cd61a0250d43ba Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Tue, 19 Dec 2023 15:40:57 -0800 Subject: [PATCH 6/7] Update CHANGELOG.rst #263 Signed-off-by: Jono Yang --- matchcode-toolkit/CHANGELOG.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/matchcode-toolkit/CHANGELOG.rst b/matchcode-toolkit/CHANGELOG.rst index b53be960..1a389893 100644 --- a/matchcode-toolkit/CHANGELOG.rst +++ b/matchcode-toolkit/CHANGELOG.rst @@ -1,6 +1,11 @@ Changelog ========= +v2.0.1 +------ + +*2023-12-19* -- Update ``ScanAndFingerprintPackage`` pipeline with updates from the upstream ``ScanPackage`` pipeline from scancode.io + v2.0.0 ------ From 86f36738d12a3867fef5a12d8362461e28f3268c Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Tue, 19 Dec 2023 16:13:22 -0800 Subject: [PATCH 7/7] Bump matchcode-toolkit version in setup.cfg #263 Signed-off-by: Jono Yang --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 2e989d7a..1a10255e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -55,7 +55,7 @@ install_requires = rubymarshal == 1.0.3 scancode-toolkit[full] == 32.0.8 urlpy == 0.5 - matchcode-toolkit >= 1.1.1 + matchcode-toolkit >= 2.0.1 univers == 30.11.0 setup_requires = setuptools_scm[toml] >= 4