From 9ea1cea1a53e6981ac282ba97bb3b1f224682fc6 Mon Sep 17 00:00:00 2001 From: tdruez <489057+tdruez@users.noreply.github.com> Date: Mon, 17 Oct 2022 11:24:41 +0400 Subject: [PATCH] Refactor the purl2url functions and utilities #42 (#102) * Refactor purl2url into get_repo_url and get_download_url #42 Signed-off-by: Thomas Druez * Add support for nuget in purl2url.get_download_url #42 Signed-off-by: Thomas Druez * Add get_inferred_urls function to return all inferred URLs #42 Signed-off-by: Thomas Druez * Include the `version_prefix` as a qualifier to infer valid URLs #42 Signed-off-by: Thomas Druez * Fix formatting with black #42 Signed-off-by: Thomas Druez * Add support for qualifiers in purl_from_pattern #42 Qualifier `download_url` is now supported in build_github_download_url Signed-off-by: Thomas Druez * Return the download_url qualifier when available in get_download_url #42 Signed-off-by: Thomas Druez * Add changelog entry #42 Signed-off-by: Thomas Druez --- CHANGELOG.rst | 19 ++ README.rst | 55 ++++-- src/packageurl/contrib/purl2url.py | 282 ++++++++++++++++++++++++----- src/packageurl/contrib/url2purl.py | 91 ++++++---- tests/contrib/data/url2purl.json | 22 ++- tests/contrib/test_purl2url.py | 116 ++++++++++-- 6 files changed, 455 insertions(+), 130 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 6e40121..34af093 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,25 @@ Changelog ========= +next () +------- + +- Refactor the purl2url functions and utilities #42 + + - Split purl2url into `get_repo_url()` and `get_download_url()` returning + accordingly a "Repository URL" and a "Download URL". + - A new `get_inferred_urls` function is available to get return all + inferred URLs (repository and download) values. + - Add support in purl2url for npm, pypi, hackage, and nuget. + - Package URL qualifiers can now be provided to `purl_from_pattern()`. + - The `download_url` qualifier is returned in `get_download_url()` when available. + +- Usage of `purl2url.purl2url` and `purl2url.get_url` is still available for + backward compatibility but should be migrated to `purl2url.get_repo_url`. + +- Include the `version_prefix` ("v" or "V") as a qualifier in build_github_purl #42 + This allow to infer valid URLs in the context of purl2url. + 0.10.3 (2022-09-15) ------------------- diff --git a/README.rst b/README.rst index e3acc84..f779819 100644 --- a/README.rst +++ b/README.rst @@ -9,7 +9,6 @@ Join the discussion at https://gitter.im/package-url/Lobby or enter a ticket for License: MIT - Tests and build status ====================== @@ -19,46 +18,79 @@ Tests and build status | |ci-tests| | +----------------------+ +Install +======= +:: + + pip install packageurl-python Usage ===== - :: >>> from packageurl import PackageURL + >>> purl = PackageURL.from_string("pkg:maven/org.apache.commons/io@1.3.4") >>> print(purl.to_dict()) {'type': 'maven', 'namespace': 'org.apache.commons', 'name': 'io', 'version': '1.3.4', 'qualifiers': None, 'subpath': None} + >>> print(purl.to_string()) pkg:maven/org.apache.commons/io@1.3.4 + >>> print(str(purl)) pkg:maven/org.apache.commons/io@1.3.4 + >>> print(repr(purl)) PackageURL(type='maven', namespace='org.apache.commons', name='io', version='1.3.4', qualifiers={}, subpath=None) +Utilities +========= -Other utilities: +Django models +^^^^^^^^^^^^^ -- packageurl.contrib.django_models.PackageURLMixin is a Django abstract model mixin to use Package URLs in Django. -- packageurl.contrib.purl2url.get_url(purl) returns the download URL inferred from a Package URL. -- packageurl.contrib.url2purl.get_purl(url) returns a Package URL inferred from URL. +`packageurl.contrib.django_models.PackageURLMixin` is a Django abstract model mixin to use Package URLs in Django. +URL to PURL +^^^^^^^^^^^ + +`packageurl.contrib.url2purl.get_purl(url)` returns a Package URL inferred from an URL. -Install -======= :: - pip install packageurl-python + >>> from packageurl.contrib import url2purl + >>> url2purl.get_purl("https://github.com/package-url/packageurl-python") + PackageURL(type='github', namespace='package-url', name='packageurl-python', version=None, qualifiers={}, subpath=None) + +PURL to URL +^^^^^^^^^^^ + +- `packageurl.contrib.purl2url.get_repo_url(purl)` returns a repository URL inferred from a Package URL. +- `packageurl.contrib.purl2url.get_download_url(purl)` returns a download URL inferred from a Package URL. +- `packageurl.contrib.purl2url.get_inferred_urls(purl)` return all inferred URLs (repository, download) from a Package URL. + +:: + + >>> from packageurl.contrib import purl2url + + >>> purl2url.get_repo_url("pkg:rubygems/bundler@2.3.23") + "https://rubygems.org/gems/bundler/versions/2.3.23" + + >>> purl2url.get_download_url("pkg:rubygems/bundler@2.3.23") + "https://rubygems.org/downloads/bundler-2.3.23.gem" + + >>> purl2url.get_inferred_urls("pkg:rubygems/bundler@2.3.23") + ["https://rubygems.org/gems/bundler/versions/2.3.23", "https://rubygems.org/downloads/bundler-2.3.23.gem",] Run tests ========= -install:: +Install test dependencies:: python3 thirdparty/virtualenv.pyz --never-download --no-periodic-update . bin/pip install -e ."[test]" -run tests:: +Run tests:: bin/py.test tests @@ -91,7 +123,6 @@ Make a new release bin/twine upload dist/* - .. |ci-tests| image:: https://github.com/package-url/packageurl-python/actions/workflows/ci.yml/badge.svg?branch=main :target: https://github.com/package-url/packageurl-python/actions/workflows/ci.yml :alt: CI Tests and build status diff --git a/src/packageurl/contrib/purl2url.py b/src/packageurl/contrib/purl2url.py index c416f20..5a9c061 100644 --- a/src/packageurl/contrib/purl2url.py +++ b/src/packageurl/contrib/purl2url.py @@ -28,13 +28,11 @@ from packageurl.contrib.route import NoRouteAvailable from packageurl.contrib.route import Router -router = Router() +repo_router = Router() +download_router = Router() -def purl2url(purl): - """ - Return a URL inferred from the `purl` string - """ +def _get_url_from_router(router, purl): if purl: try: return router.process(purl) @@ -42,113 +40,297 @@ def purl2url(purl): return -get_url = purl2url +def get_repo_url(purl): + """ + Return a repository URL inferred from the `purl` string. + """ + return _get_url_from_router(repo_router, purl) -@router.route("pkg:cargo/.*") -def build_cargo_download_url(purl): +def get_download_url(purl): """ - Return a cargo download URL `url` from a the `purl` string + Return a download URL inferred from the `purl` string. + """ + download_url = _get_url_from_router(download_router, purl) + if download_url: + return download_url + + # Fallback on the `download_url` qualifier when available. + purl_data = PackageURL.from_string(purl) + return purl_data.qualifiers.get("download_url", None) + + +def get_inferred_urls(purl): + """ + Return all inferred URLs (repo, download) from the `purl` string. + """ + url_functions = ( + get_repo_url, + get_download_url, + ) + + inferred_urls = [] + for url_func in url_functions: + url = url_func(purl) + if url: + inferred_urls.append(url) + + return inferred_urls + + +# Backward compatibility +purl2url = get_repo_url +get_url = get_repo_url + + +@repo_router.route("pkg:cargo/.*") +def build_cargo_repo_url(purl): + """ + Return a cargo repo URL from the `purl` string. """ purl_data = PackageURL.from_string(purl) name = purl_data.name version = purl_data.version - if not (name and version): - return + if name and version: + return f"https://crates.io/crates/{name}/{version}" + elif name: + return f"https://crates.io/crates/{name}" + + +@repo_router.route("pkg:bitbucket/.*") +def build_bitbucket_repo_url(purl): + """ + Return a bitbucket repo URL from the `purl` string. + """ + purl_data = PackageURL.from_string(purl) - return f"https://crates.io/api/v1/crates/{name}/{version}/download" + namespace = purl_data.namespace + name = purl_data.name + + if name and namespace: + return f"https://bitbucket.org/{namespace}/{name}" -@router.route("pkg:bitbucket/.*") -def build_bitbucket_homepage_url(purl): +@repo_router.route("pkg:github/.*") +def build_github_repo_url(purl): """ - Return a bitbucket homepage URL `url` from a the `purl` string + Return a github repo URL from the `purl` string. """ purl_data = PackageURL.from_string(purl) namespace = purl_data.namespace name = purl_data.name version = purl_data.version - subpath = purl_data.subpath + qualifiers = purl_data.qualifiers if not (name and namespace): return - url = f"https://bitbucket.org/{namespace}/{name}" + repo_url = f"https://github.com/{namespace}/{name}" + if version: - url = f"{url}/src/{version}" + version_prefix = qualifiers.get("version_prefix", "") + repo_url = f"{repo_url}/tree/{version_prefix}{version}" - if subpath: - url = f"{url}/{subpath}" + return repo_url - return url + +@repo_router.route("pkg:gitlab/.*") +def build_gitlab_repo_url(purl): + """ + Return a gitlab repo URL from the `purl` string. + """ + purl_data = PackageURL.from_string(purl) + + namespace = purl_data.namespace + name = purl_data.name + + if name and namespace: + return f"https://gitlab.com/{namespace}/{name}" -@router.route("pkg:github/.*") -def build_github_homepage_url(purl): +@repo_router.route("pkg:rubygems/.*") +def build_rubygems_repo_url(purl): """ - Return a github homepage URL `url` from a the `purl` string + Return a rubygems repo URL from the `purl` string. + """ + purl_data = PackageURL.from_string(purl) + + name = purl_data.name + version = purl_data.version + + if name and version: + return f"https://rubygems.org/gems/{name}/versions/{version}" + elif name: + return f"https://rubygems.org/gems/{name}" + + +@repo_router.route("pkg:npm/.*") +def build_npm_repo_url(purl): + """ + Return a npm repo URL from the `purl` string. """ purl_data = PackageURL.from_string(purl) namespace = purl_data.namespace name = purl_data.name version = purl_data.version - subpath = purl_data.subpath - if not (name and namespace): - return + repo_url = "https://www.npmjs.com/package/" + if namespace: + repo_url += f"{namespace}/" - url = f"https://github.com/{namespace}/{name}" + repo_url += f"{name}" if version: - url = f"{url}/tree/{version}" + repo_url += f"/v/{version}" + + return repo_url + + +@repo_router.route("pkg:pypi/.*") +def build_pypi_repo_url(purl): + """ + Return a pypi repo URL from the `purl` string. + """ + purl_data = PackageURL.from_string(purl) - if subpath: - url = f"{url}/{subpath}" + name = (purl_data.name or "").replace("_", "-") + version = purl_data.version - return url + if name and version: + return f"https://pypi.org/project/{name}/{version}/" + elif name: + return f"https://pypi.org/project/{name}/" -@router.route("pkg:gitlab/.*") -def build_gitlab_homepage_url(purl): +@repo_router.route("pkg:nuget/.*") +def build_nuget_repo_url(purl): """ - Return a gitlab homepage URL `url` from a the `purl` string + Return a nuget repo URL from the `purl` string. """ purl_data = PackageURL.from_string(purl) - namespace = purl_data.namespace name = purl_data.name version = purl_data.version - subpath = purl_data.subpath - if not (name and namespace): - return + if name and version: + return f"https://www.nuget.org/packages/{name}/{version}" + elif name: + return f"https://www.nuget.org/packages/{name}" - url = f"https://gitlab.com/{namespace}/{name}" - if version: - url = f"{url}/-/tree/{version}" +@repo_router.route("pkg:hackage/.*") +def build_hackage_repo_url(purl): + """ + Return a hackage repo URL from the `purl` string. + """ + purl_data = PackageURL.from_string(purl) + + name = purl_data.name + version = purl_data.version + + if name and version: + return f"https://hackage.haskell.org/package/{name}-{version}" + elif name: + return f"https://hackage.haskell.org/package/{name}" - if subpath: - url = f"{url}/{subpath}" - return url +# Download URLs: -@router.route("pkg:rubygems/.*") -def build_gem_download_url(purl): +@download_router.route("pkg:cargo/.*") +def build_cargo_download_url(purl): """ - Return a rubygems homepage URL `url` from a the `purl` string + Return a cargo download URL from the `purl` string. """ purl_data = PackageURL.from_string(purl) name = purl_data.name version = purl_data.version - if not (name and version): + if name and version: + return f"https://crates.io/api/v1/crates/{name}/{version}/download" + + +@download_router.route("pkg:rubygems/.*") +def build_rubygems_download_url(purl): + """ + Return a rubygems download URL from the `purl` string. + """ + purl_data = PackageURL.from_string(purl) + + name = purl_data.name + version = purl_data.version + + if name and version: + return f"https://rubygems.org/downloads/{name}-{version}.gem" + + +@download_router.route("pkg:npm/.*") +def build_npm_download_url(purl): + """ + Return a npm download URL from the `purl` string. + """ + purl_data = PackageURL.from_string(purl) + + name = purl_data.name + version = purl_data.version + + if name and version: + return f"http://registry.npmjs.org/{name}/-/{name}-{version}.tgz" + + +@download_router.route("pkg:hackage/.*") +def build_hackage_download_url(purl): + """ + Return a hackage download URL from the `purl` string. + """ + purl_data = PackageURL.from_string(purl) + + name = purl_data.name + version = purl_data.version + + if name and version: + return f"https://hackage.haskell.org/package/{name}-{version}/{name}-{version}.tar.gz" + + +@download_router.route("pkg:nuget/.*") +def build_nuget_download_url(purl): + """ + Return a nuget download URL from the `purl` string. + """ + purl_data = PackageURL.from_string(purl) + + name = purl_data.name + version = purl_data.version + + if name and version: + return f"https://www.nuget.org/api/v2/package/{name}/{version}" + + +@download_router.route("pkg:github/.*") +def build_github_download_url(purl): + """ + Return a github download URL from the `purl` string. + """ + purl_data = PackageURL.from_string(purl) + + namespace = purl_data.namespace + name = purl_data.name + version = purl_data.version + qualifiers = purl_data.qualifiers + + download_url = qualifiers.get("download_url") + if download_url: + return download_url + + if not (namespace and name and version): return - return f"https://rubygems.org/downloads/{name}-{version}.gem" + version_prefix = qualifiers.get("version_prefix", "") + version = f"{version_prefix}{version}" + + return f"https://github.com/{namespace}/{name}/archive/refs/tags/{version}.zip" diff --git a/src/packageurl/contrib/url2purl.py b/src/packageurl/contrib/url2purl.py index ea73e9b..0f51b2e 100644 --- a/src/packageurl/contrib/url2purl.py +++ b/src/packageurl/contrib/url2purl.py @@ -61,18 +61,31 @@ def url2purl(url): get_purl = url2purl -def purl_from_pattern(type_, pattern, url): +def purl_from_pattern(type_, pattern, url, qualifiers=None): url = unquote_plus(url) compiled_pattern = re.compile(pattern, re.VERBOSE) match = compiled_pattern.match(url) - if match: - purl_data = { - field: value - for field, value in match.groupdict().items() - if field in PackageURL._fields - } - return PackageURL(type_, **purl_data) + if not match: + return + + purl_data = { + field: value for field, value in match.groupdict().items() if field in PackageURL._fields + } + + qualifiers = qualifiers or {} + # Include the `version_prefix` as a qualifier to infer valid URLs in purl2url + version_prefix = match.groupdict().get("version_prefix") + if version_prefix: + qualifiers.update({"version_prefix": version_prefix}) + + if qualifiers: + if "qualifiers" in purl_data: + purl_data["qualifiers"].update(qualifiers) + else: + purl_data["qualifiers"] = qualifiers + + return PackageURL(type_, **purl_data) def register_pattern(type_, pattern, router=purl_router): @@ -303,7 +316,7 @@ def build_pypi_purl(uri): # http://nuget.org/packages/EntityFramework/4.2.0.0 # https://www.nuget.org/api/v2/package/Newtonsoft.Json/11.0.1 -nuget_www_pattern = r"^https?://.*nuget.org/(api/v2/)?packages?/(?P.+)/" r"(?P.+)$" +nuget_www_pattern = r"^https?://.*nuget.org/(api/v2/)?packages?/(?P.+)/(?P.+)$" register_pattern("nuget", nuget_www_pattern) @@ -342,16 +355,14 @@ def build_sourceforge_purl(uri): if not sourceforge_purl: # Get the project name from `uri` and use that as the Package name # http://master.dl.sourceforge.net/project/aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip - split_uri = uri.split( - "/project/" - ) # http://master.dl.sourceforge.net, aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip + split_uri = uri.split("/project/") + + # http://master.dl.sourceforge.net, aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip if len(split_uri) >= 2: - remaining_uri_path = split_uri[ - 1 - ] # aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip - remaining_uri_path_segments = remaining_uri_path.split( - "/" - ) # aloyscore, aloyscore, 0.1a1%2520stable, 0.1a1_stable_AloysCore.zip + # aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip + remaining_uri_path = split_uri[1] + # aloyscore, aloyscore, 0.1a1%2520stable, 0.1a1_stable_AloysCore.zip + remaining_uri_path_segments = remaining_uri_path.split("/") if remaining_uri_path_segments: project_name = remaining_uri_path_segments[0] # aloyscore sourceforge_purl = PackageURL( @@ -361,7 +372,7 @@ def build_sourceforge_purl(uri): # https://crates.io/api/v1/crates/rand/0.7.2/download -cargo_pattern = r"^https?://crates.io/api/v1/crates/(?P.+)/(?P.+)" r"(\/download)$" +cargo_pattern = r"^https?://crates.io/api/v1/crates/(?P.+)/(?P.+)(\/download)$" register_pattern("cargo", cargo_pattern) @@ -408,7 +419,7 @@ def build_github_api_purl(url): github_codeload_pattern = ( r"https?://codeload.github.com/(?P.+)/(?P.+)/" r"(zip|tar.gz|tar.bz2|tgz)/(.*/)*" - r"v?(?P.+)$" + r"(?Pv|V?)(?P.+)$" ) register_pattern("github", github_codeload_pattern) @@ -425,20 +436,20 @@ def build_github_purl(url): r"https?://github.com/(?P.+)/(?P.+)" r"/archive/(.*/)*" r"((?P=name)(-|_|@))?" - r"v?(?P.+).(zip|tar.gz|tar.bz2|.tgz)" + r"(?Pv|V?)(?P.+).(zip|tar.gz|tar.bz2|.tgz)" ) # https://github.com/downloads/mozilla/rhino/rhino1_7R4.zip download_pattern = ( r"https?://github.com/downloads/(?P.+)/(?P.+)/" r"((?P=name)(-|@)?)?" - r"v?(?P.+).(zip|tar.gz|tar.bz2|.tgz)" + r"(?Pv|V?)(?P.+).(zip|tar.gz|tar.bz2|.tgz)" ) # https://github.com/pypa/get-virtualenv/raw/20.0.31/public/virtualenv.pyz raw_pattern = ( r"https?://github.com/(?P.+)/(?P.+)" - r"/raw/v?(?P[^/]+)/(?P.*)$" + r"/raw/(?Pv|V?)(?P[^/]+)/(?P.*)$" ) # https://github.com/fanf2/unifdef/blob/master/unifdef.c @@ -449,7 +460,7 @@ def build_github_purl(url): releases_download_pattern = ( r"https?://github.com/(?P.+)/(?P.+)" - r"/releases/download/v?(?P[^/]+)/.*$" + r"/releases/download/(?Pv|V?)(?P[^/]+)/.*$" ) # https://github.com/pombredanne/schematics.git @@ -468,16 +479,10 @@ def build_github_purl(url): matches = re.search(pattern, url) qualifiers = {} if matches: - if pattern != releases_download_pattern: - return purl_from_pattern(type_="github", pattern=pattern, url=url) - qualifiers["download_url"] = url - purl = purl_from_pattern(type_="github", pattern=pattern, url=url) - return PackageURL( - type=purl.type, - name=purl.name, - namespace=purl.namespace, - version=purl.version, - qualifiers=qualifiers, + if pattern == releases_download_pattern: + qualifiers["download_url"] = url + return purl_from_pattern( + type_="github", pattern=pattern, url=url, qualifiers=qualifiers ) segments = get_path_segments(url) @@ -527,7 +532,8 @@ def build_bitbucket_purl(url): bitbucket_download_pattern = ( r"https?://bitbucket.org/" - r"(?P.+)/(?P.+)/downloads/(?P.+).(zip|tar.gz|tar.bz2|.tgz|exe|msi)" + r"(?P.+)/(?P.+)/downloads/" + r"(?P.+).(zip|tar.gz|tar.bz2|.tgz|exe|msi)" ) matches = re.search(bitbucket_download_pattern, url) @@ -596,22 +602,29 @@ def build_gitlab_purl(url): ) -# https://hackage.haskell.org/package/a50-0.5/a50-0.5.tar.gz -hackage_pattern = ( +# https://hackage.haskell.org/package/cli-extras-0.2.0.0/cli-extras-0.2.0.0.tar.gz +hackage_download_pattern = ( r"^https?://hackage.haskell.org/package/" r"(?P.+)-(?P.+)/" r"(?P=name)-(?P=version).*" r"[^/]$" ) -register_pattern("hackage", hackage_pattern) +register_pattern("hackage", hackage_download_pattern) + + +# https://hackage.haskell.org/package/cli-extras-0.2.0.0/ +hackage_project_pattern = r"^https?://hackage.haskell.org/package/(?P.+)-(?P[^/]+)/" + +register_pattern("hackage", hackage_project_pattern) @purl_router.route( "https?://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/.*" ) def build_generic_google_code_archive_purl(uri): - # https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/android-notifier/android-notifier-desktop-0.5.1-1.i386.rpm + # https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com + # /android-notifier/android-notifier-desktop-0.5.1-1.i386.rpm _, remaining_uri = uri.split( "https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/" ) diff --git a/tests/contrib/data/url2purl.json b/tests/contrib/data/url2purl.json index c4e6f59..ab92a05 100644 --- a/tests/contrib/data/url2purl.json +++ b/tests/contrib/data/url2purl.json @@ -189,33 +189,33 @@ "https://api.github.com/repos/nexB/scancode-toolkit": "pkg:github/nexb/scancode-toolkit", "https://api.github.com/repos/nexB/scancode-toolkit/commits/40593af0df6c8378d2b180324b97cb439fa11d66": "pkg:github/nexb/scancode-toolkit@40593af0df6c8378d2b180324b97cb439fa11d66", "https://codeload.github.com/nexB/scancode-toolkit/tar.gz/3.1.1": "pkg:github/nexb/scancode-toolkit@3.1.1", - "https://codeload.github.com/nexB/scancode-toolkit/tar.gz/v3.1.1": "pkg:github/nexb/scancode-toolkit@3.1.1", + "https://codeload.github.com/nexB/scancode-toolkit/tar.gz/v3.1.1": "pkg:github/nexb/scancode-toolkit@3.1.1?version_prefix=v", "https://codeload.github.com/nexB/scancode-toolkit/zip/3.1.1": "pkg:github/nexb/scancode-toolkit@3.1.1", - "https://codeload.github.com/nexB/scancode-toolkit/zip/v3.1.1": "pkg:github/nexb/scancode-toolkit@3.1.1", + "https://codeload.github.com/nexB/scancode-toolkit/zip/v3.1.1": "pkg:github/nexb/scancode-toolkit@3.1.1?version_prefix=v", "https://codeload.github.com/nexB/scancode.io/tar.gz/1.0": "pkg:github/nexb/scancode.io@1.0", - "https://codeload.github.com/nexB/scancode.io/tar.gz/v1.0": "pkg:github/nexb/scancode.io@1.0", + "https://codeload.github.com/nexB/scancode.io/tar.gz/V1.0": "pkg:github/nexb/scancode.io@1.0?version_prefix=V", "https://codeload.github.com/berngp/grails-rest/zip/release/0.7": "pkg:github/berngp/grails-rest@0.7", "https://codeload.github.com/eclipse/m2e-core/zip/releases/1.2/1.2.0.20120903-1050": "pkg:github/eclipse/m2e-core@1.2.0.20120903-1050", "https://github.com/nexB/scancode-toolkit/archive/3.1.1.zip": "pkg:github/nexb/scancode-toolkit@3.1.1", - "https://github.com/nexB/scancode-toolkit/archive/v3.1.1.zip": "pkg:github/nexb/scancode-toolkit@3.1.1", + "https://github.com/nexB/scancode-toolkit/archive/v3.1.1.zip": "pkg:github/nexb/scancode-toolkit@3.1.1?version_prefix=v", "https://github.com/pypa/get-virtualenv/raw/20.0.31/public/virtualenv.pyz": "pkg:github/pypa/get-virtualenv@20.0.31#public/virtualenv.pyz", - "https://github.com/pypa/get-virtualenv/raw/v20.0.31/public/virtualenv.pyz": "pkg:github/pypa/get-virtualenv@20.0.31#public/virtualenv.pyz", + "https://github.com/pypa/get-virtualenv/raw/v20.0.31/public/virtualenv.pyz": "pkg:github/pypa/get-virtualenv@20.0.31?version_prefix=v#public/virtualenv.pyz", "https://github.com/fanf2/unifdef/blob/master/unifdef.c": "pkg:github/fanf2/unifdef@master#unifdef.c", "https://github.com/joebeeson/amazon/blob/master/vendors/aws-sdk/sdk.class.php": "pkg:github/joebeeson/amazon@master#vendors/aws-sdk/sdk.class.php", "https://github.com/modelfabric/yowl/blob/master/bin/yowl": "pkg:github/modelfabric/yowl@master#bin/yowl", - "https://github.com/syncthing/syncthing/releases/download/v0.14.36/syncthing-source-v0.14.36.tar.gz": "pkg:github/syncthing/syncthing@0.14.36?download_url=https://github.com/syncthing/syncthing/releases/download/v0.14.36/syncthing-source-v0.14.36.tar.gz", - "https://github.com/torakiki/pdfsam/releases/download/v3.3.2/pdfsam-3.3.2-bin.zip": "pkg:github/torakiki/pdfsam@3.3.2?download_url=https://github.com/torakiki/pdfsam/releases/download/v3.3.2/pdfsam-3.3.2-bin.zip", - "https://github.com/yarnpkg/yarn/releases/download/v1.3.2/yarn-v1.3.2.tar.gz": "pkg:github/yarnpkg/yarn@1.3.2?download_url=https://github.com/yarnpkg/yarn/releases/download/v1.3.2/yarn-v1.3.2.tar.gz", + "https://github.com/syncthing/syncthing/releases/download/v0.14.36/syncthing-source-v0.14.36.tar.gz": "pkg:github/syncthing/syncthing@0.14.36?download_url=https://github.com/syncthing/syncthing/releases/download/v0.14.36/syncthing-source-v0.14.36.tar.gz&version_prefix=v", + "https://github.com/torakiki/pdfsam/releases/download/v3.3.2/pdfsam-3.3.2-bin.zip": "pkg:github/torakiki/pdfsam@3.3.2?download_url=https://github.com/torakiki/pdfsam/releases/download/v3.3.2/pdfsam-3.3.2-bin.zip&version_prefix=v", + "https://github.com/yarnpkg/yarn/releases/download/v1.3.2/yarn-v1.3.2.tar.gz": "pkg:github/yarnpkg/yarn@1.3.2?download_url=https://github.com/yarnpkg/yarn/releases/download/v1.3.2/yarn-v1.3.2.tar.gz&version_prefix=v", "https://github.com/z3APA3A/3proxy/releases/download/0.8.11/3proxy-0.8.11.zip": "pkg:github/z3apa3a/3proxy@0.8.11?download_url=https://github.com/z3APA3A/3proxy/releases/download/0.8.11/3proxy-0.8.11.zip", "https://github.com/FasterXML/woodstox/archive/woodstox-core-5.0.2.zip": "pkg:github/fasterxml/woodstox@core-5.0.2", "https://github.com/adobe-fonts/source-code-pro/archive/2.030R-ro/1.050R-it.tar.gz": "pkg:github/adobe-fonts/source-code-pro@1.050R-it", "https://github.com/cassandra-rb/simple_uuid/archive/simple_uuid-0.3.0.zip": "pkg:github/cassandra-rb/simple_uuid@0.3.0", "https://github.com/djberg96/sys-filesystem/archive/sys-filesystem-1.1.4.zip": "pkg:github/djberg96/sys-filesystem@1.1.4", "https://github.com/freedesktop/xorg-intel-gpu-tools/archive/igt-gpu-tools-1.23.tar.gz": "pkg:github/freedesktop/xorg-intel-gpu-tools@igt-gpu-tools-1.23", - "https://github.com/grnet/synnefo/archive/synnefo/v0.12.3.zip": "pkg:github/grnet/synnefo@0.12.3", + "https://github.com/grnet/synnefo/archive/synnefo/v0.12.3.zip": "pkg:github/grnet/synnefo@0.12.3?version_prefix=v", "https://github.com/n8n-io/n8n/archive/n8n@0.23.0.tar.gz": "pkg:github/n8n-io/n8n@0.23.0", "https://github.com/nginx/nginx/archive/branches/stable-0.7.zip": "pkg:github/nginx/nginx@stable-0.7", - "https://github.com/swagger-api/swagger-codegen/archive/refs/tags/v3.0.25.tar.gz": "pkg:github/swagger-api/swagger-codegen@3.0.25", + "https://github.com/swagger-api/swagger-codegen/archive/refs/tags/v3.0.25.tar.gz": "pkg:github/swagger-api/swagger-codegen@3.0.25?version_prefix=v", "https://github.com/bareos/bareos/archive/Release/16.2.6.zip": "pkg:github/bareos/bareos@16.2.6", "https://github.com/hessu/bchunk/archive/release/1.2.2.tar.gz": "pkg:github/hessu/bchunk@1.2.2", "https://github.com/downloads/mozilla/rhino/rhino1_7R4.zip": "pkg:github/mozilla/rhino@1_7R4", @@ -251,6 +251,8 @@ "https://hackage.haskell.org/package/a50-0.5/a50-0.5.tar.gz": "pkg:hackage/a50@0.5", "https://hackage.haskell.org/package/AC-HalfInteger-1.2.1/AC-HalfInteger-1.2.1.tar.gz": "pkg:hackage/AC-HalfInteger@1.2.1", "https://hackage.haskell.org/package/3d-graphics-examples-0.0.0.2/3d-graphics-examples-0.0.0.2.tar.gz": "pkg:hackage/3d-graphics-examples@0.0.0.2", + "https://hackage.haskell.org/package/cli-extras-0.2.0.0": "pkg:hackage/cli-extras@0.2.0.0", + "https://hackage.haskell.org/package/cli-extras-0.2.0.0/": "pkg:hackage/cli-extras@0.2.0.0", "https://salsa.debian.org/lxc-team/lxc/-/archive/master/lxc-master.tar.gz": "pkg:generic/lxc-master.tar.gz?download_url=https://salsa.debian.org/lxc-team/lxc/-/archive/master/lxc-master.tar.gz", "http://apt-rpm.org/": null, "": null, diff --git a/tests/contrib/test_purl2url.py b/tests/contrib/test_purl2url.py index 875709c..15d52e2 100644 --- a/tests/contrib/test_purl2url.py +++ b/tests/contrib/test_purl2url.py @@ -26,39 +26,117 @@ import pytest -from packageurl.contrib.purl2url import purl2url +from packageurl.contrib import purl2url -def test_purl2url_with_valid_purls(): +def test_purl2url_get_repo_url(): purls_url = { "pkg:github/tg1999/fetchcode": "https://github.com/tg1999/fetchcode", "pkg:github/tg1999/fetchcode@master": "https://github.com/tg1999/fetchcode/tree/master", - "pkg:github/tg1999/fetchcode@master#tests": "https://github.com/tg1999/fetchcode/tree/master/tests", + "pkg:github/tg1999/fetchcode@master#tests": "https://github.com/tg1999/fetchcode/tree/master", + "pkg:github/nexb/scancode-toolkit@3.1.1?version_prefix=v": "https://github.com/nexb/scancode-toolkit/tree/v3.1.1", "pkg:github/tg1999": None, - "pkg:cargo/clap@2.3.3": "https://crates.io/api/v1/crates/clap/2.3.3/download", - "pkg:cargo/rand@0.7.2": "https://crates.io/api/v1/crates/rand/0.7.2/download", - "pkg:cargo/structopt@0.3.11": "https://crates.io/api/v1/crates/structopt/0.3.11/download", - "pkg:cargo/abc": None, - "pkg:rubygems/unf@0.1.3": "https://rubygems.org/downloads/unf-0.1.3.gem", - "pkg:rubygems/yajl-ruby@1.2.0": "https://rubygems.org/downloads/yajl-ruby-1.2.0.gem", + "pkg:cargo/rand@0.7.2": "https://crates.io/crates/rand/0.7.2", + "pkg:cargo/abc": "https://crates.io/crates/abc", + "pkg:rubygems/bundler@2.3.23": "https://rubygems.org/gems/bundler/versions/2.3.23", "pkg:gem/package-name": None, "pkg:bitbucket/birkenfeld/pygments-main": "https://bitbucket.org/birkenfeld/pygments-main", - "pkg:bitbucket/birkenfeld/pygments-main@244fd47e07d1014f0aed9c": "https://bitbucket.org/birkenfeld/pygments-main/src/244fd47e07d1014f0aed9c", - "pkg:bitbucket/birkenfeld/pygments-main@master#views": "https://bitbucket.org/birkenfeld/pygments-main/src/master/views", + "pkg:bitbucket/birkenfeld/pygments-main@244fd47e07d1014f0aed9c": "https://bitbucket.org/birkenfeld/pygments-main", + "pkg:bitbucket/birkenfeld/pygments-main@master#views": "https://bitbucket.org/birkenfeld/pygments-main", "pkg:bitbucket/birkenfeld": None, - "pkg:gitlab/tg1999/firebase@master": "https://gitlab.com/tg1999/firebase/-/tree/master", - "pkg:gitlab/tg1999/firebase@1a122122#views": "https://gitlab.com/tg1999/firebase/-/tree/1a122122/views", + "pkg:gitlab/tg1999/firebase@master": "https://gitlab.com/tg1999/firebase", + "pkg:gitlab/tg1999/firebase@1a122122#views": "https://gitlab.com/tg1999/firebase", "pkg:gitlab/tg1999/firebase": "https://gitlab.com/tg1999/firebase", "pkg:gitlab/tg1999": None, + "pkg:pypi/sortedcontainers": "https://pypi.org/project/sortedcontainers/", + "pkg:pypi/sortedcontainers@2.4.0": "https://pypi.org/project/sortedcontainers/2.4.0/", + "pkg:pypi/packageurl_python": "https://pypi.org/project/packageurl-python/", + "pkg:npm/is-npm": "https://www.npmjs.com/package/is-npm", + "pkg:npm/is-npm@1.0.0": "https://www.npmjs.com/package/is-npm/v/1.0.0", + "pkg:nuget/System.Text.Json": "https://www.nuget.org/packages/System.Text.Json", + "pkg:nuget/System.Text.Json@6.0.6": "https://www.nuget.org/packages/System.Text.Json/6.0.6", + "pkg:hackage/cli-extras": "https://hackage.haskell.org/package/cli-extras", + "pkg:hackage/cli-extras@0.2.0.0": "https://hackage.haskell.org/package/cli-extras-0.2.0.0", } for purl, url in purls_url.items(): - assert url == purl2url(purl) + assert url == purl2url.get_repo_url(purl) + + +def test_purl2url_get_download_url(): + purls_url = { + # Generated + "pkg:cargo/rand@0.7.2": "https://crates.io/api/v1/crates/rand/0.7.2/download", + "pkg:rubygems/bundler@2.3.23": "https://rubygems.org/downloads/bundler-2.3.23.gem", + "pkg:npm/is-npm@1.0.0": "http://registry.npmjs.org/is-npm/-/is-npm-1.0.0.tgz", + "pkg:hackage/cli-extras@0.2.0.0": "https://hackage.haskell.org/package/cli-extras-0.2.0.0/cli-extras-0.2.0.0.tar.gz", + "pkg:nuget/System.Text.Json@6.0.6": "https://www.nuget.org/api/v2/package/System.Text.Json/6.0.6", + "pkg:github/nexb/scancode-toolkit@3.1.1?version_prefix=v": "https://github.com/nexb/scancode-toolkit/archive/refs/tags/v3.1.1.zip", + # From `download_url` qualifier + "pkg:github/yarnpkg/yarn@1.3.2?download_url=https://github.com/yarnpkg/yarn/releases/download/v1.3.2/yarn-v1.3.2.tar.gz&version_prefix=v": "https://github.com/yarnpkg/yarn/releases/download/v1.3.2/yarn-v1.3.2.tar.gz", + "pkg:generic/lxc-master.tar.gz?download_url=https://salsa.debian.org/lxc-team/lxc/-/archive/master/lxc-master.tar.gz": "https://salsa.debian.org/lxc-team/lxc/-/archive/master/lxc-master.tar.gz", + "pkg:generic/code.google.com/android-notifier?download_url=https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/android-notifier/android-notifier-desktop-0.5.1-1.i386.rpm": "https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/android-notifier/android-notifier-desktop-0.5.1-1.i386.rpm", + "pkg:bitbucket/robeden/trove?download_url=https://bitbucket.org/robeden/trove/downloads/trove-3.0.3.zip": "https://bitbucket.org/robeden/trove/downloads/trove-3.0.3.zip", + "pkg:sourceforge/zclasspath?download_url=http://master.dl.sourceforge.net/project/zclasspath/maven2/org/zclasspath/zclasspath/1.5/zclasspath-1.5.jar": "http://master.dl.sourceforge.net/project/zclasspath/maven2/org/zclasspath/zclasspath/1.5/zclasspath-1.5.jar", + "pkg:pypi/aboutcode-toolkit@3.4.0rc1?download_url=https://files.pythonhosted.org/packages/87/44/0fa8e9d0cccb8eb86fc1b5170208229dc6d6e9fd6e57ea1fe19cbeea68f5/aboutcode_toolkit-3.4.0rc1-py2.py3-none-any.whl": "https://files.pythonhosted.org/packages/87/44/0fa8e9d0cccb8eb86fc1b5170208229dc6d6e9fd6e57ea1fe19cbeea68f5/aboutcode_toolkit-3.4.0rc1-py2.py3-none-any.whl", + # Not-supported + "pkg:github/tg1999/fetchcode": None, + "pkg:cargo/abc": None, + "pkg:gem/package-name": None, + "pkg:bitbucket/birkenfeld": None, + "pkg:gitlab/tg1999/firebase@1a122122": None, + "pkg:pypi/sortedcontainers@2.4.0": None, + } + + for purl, url in purls_url.items(): + assert url == purl2url.get_download_url(purl) + + +def test_purl2url_get_inferred_urls(): + purls_url = { + "pkg:cargo/rand@0.7.2": [ + "https://crates.io/crates/rand/0.7.2", + "https://crates.io/api/v1/crates/rand/0.7.2/download", + ], + "pkg:rubygems/bundler@2.3.23": [ + "https://rubygems.org/gems/bundler/versions/2.3.23", + "https://rubygems.org/downloads/bundler-2.3.23.gem", + ], + "pkg:npm/is-npm@1.0.0": [ + "https://www.npmjs.com/package/is-npm/v/1.0.0", + "http://registry.npmjs.org/is-npm/-/is-npm-1.0.0.tgz", + ], + "pkg:hackage/cli-extras@0.2.0.0": [ + "https://hackage.haskell.org/package/cli-extras-0.2.0.0", + "https://hackage.haskell.org/package/cli-extras-0.2.0.0/cli-extras-0.2.0.0.tar.gz", + ], + "pkg:nuget/System.Text.Json@6.0.6": [ + "https://www.nuget.org/packages/System.Text.Json/6.0.6", + "https://www.nuget.org/api/v2/package/System.Text.Json/6.0.6", + ], + "pkg:cargo/abc": ["https://crates.io/crates/abc"], + "pkg:github/tg1999/fetchcode": ["https://github.com/tg1999/fetchcode"], + "pkg:gitlab/tg1999/firebase@1a122122": ["https://gitlab.com/tg1999/firebase"], + "pkg:pypi/sortedcontainers@2.4.0": ["https://pypi.org/project/sortedcontainers/2.4.0/"], + "pkg:gem/package-name": [], + "pkg:bitbucket/birkenfeld": [], + } + + for purl, url in purls_url.items(): + assert url == purl2url.get_inferred_urls(purl) + +def test_purl2url_get_repo_url_with_invalid_purls(): + purls = [ + "pkg:github", + "pkg:cargo", + "pkg:gem", + "pkg:bitbucket", + "pkg:gitlab", + None, + ] -def test_convert_with_invalid_purls(): - purls = ["pkg:github", "pkg:cargo", "pkg:gem", "pkg:bitbucket", "pkg:gitlab", None] - with pytest.raises(Exception) as e_info: - for purl in purls: - url = purl2url(purl) + for purl in purls: + with pytest.raises(Exception) as e_info: + purl2url.get_repo_url(purl) assert "Invalid PURL" == e_info