Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update purl2url support for pypi including download URL(s) #151

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ package_dir = =src
include_package_data = true
zip_safe = false
install_requires =
requests == 2.31.0

[options.packages.find]
where = src
Expand Down
37 changes: 37 additions & 0 deletions src/packageurl/contrib/purl2url.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
# Visit https://github.com/package-url/packageurl-python for support and
# download.

import requests

from packageurl import PackageURL
from packageurl.contrib.route import NoRouteAvailable
from packageurl.contrib.route import Router
Expand Down Expand Up @@ -412,3 +414,38 @@ def get_repo_download_url(purl):
return get_repo_download_url_by_package_type(
type=type, namespace=namespace, name=name, version=version
)


@download_router.route("pkg:pypi/.*")
def build_pypi_download_url(purl):
"""
Return a pypi download URL from the `purl` string. If there's a source
(e.g., .tar.gz) download URL, it will be tagged `"packagetype": "sdist"`,
and a .whl download URL will be tagged `"packagetype": "bdist_wheel"`.

TODO: Consider whether we want the download_url value type to be a list so
we can return multiple download file types like .tar.gz and .whl. Or might
we want to modify get_inferred_urls() somehow to include (in this example)
both .tar.gz and .whl?
"""
purl_data = PackageURL.from_string(purl)
name = purl_data.name
version = purl_data.version

if name and version:
base_path = "https://pypi.org/pypi"
api_url = f"{base_path}/{name}/json"
pypi_metadata = get_response(api_url)
download_url = ""
for rel in pypi_metadata.get("releases").get(version):
if rel.get("packagetype") == "sdist":
download_url = rel.get("url")
return download_url


def get_response(url):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if we want to do network calls in package-url library @tdruez what's your thoughts on this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TG1999 @tdruez I am also doing a requests.get for the cocoapods download function I'm currently working on.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and I think in addition I need to download and vet the cocoapods JSON in order to ID the homepage URL or similar info in order to locate the cocoapods download repo (if one exists -- for one of the PURL spec examples for cocoapods, if I recall correctly, there is no download available so the generated URL returns a 404).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TG1999 I think this belongs to the inspectors and not to this library.
@pombredanne Thoughts?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. This should be moved to a library that does network calls. Here we never did this and we should keep this simple and local only.

resp = requests.get(url)
if resp.status_code == 200:
return resp.json()

raise Exception(f"Failed to fetch: {url}")
22 changes: 19 additions & 3 deletions tests/contrib/test_purl2url.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,20 @@ def test_purl2url_get_download_url():
"pkg:gitlab/tg1999/firebase@1a122122": "https://gitlab.com/tg1999/firebase/-/archive/1a122122/firebase-1a122122.tar.gz",
"pkg:gitlab/tg1999/firebase@1a122122?version_prefix=v": "https://gitlab.com/tg1999/firebase/-/archive/v1a122122/firebase-v1a122122.tar.gz",
"pkg:gitlab/hoppr/[email protected]": "https://gitlab.com/hoppr/hoppr/-/archive/v1.11.1-dev.2/hoppr-v1.11.1-dev.2.tar.gz",
"pkg:pypi/[email protected]": "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz",
# From `download_url` qualifier
"pkg:github/yarnpkg/[email protected]?download_url=https://github.com/yarnpkg/yarn/releases/download/v1.3.2/yarn-v1.3.2.tar.gz&version_prefix=v": "https://github.com/yarnpkg/yarn/releases/download/v1.3.2/yarn-v1.3.2.tar.gz",
"pkg:generic/lxc-master.tar.gz?download_url=https://salsa.debian.org/lxc-team/lxc/-/archive/master/lxc-master.tar.gz": "https://salsa.debian.org/lxc-team/lxc/-/archive/master/lxc-master.tar.gz",
"pkg:generic/code.google.com/android-notifier?download_url=https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/android-notifier/android-notifier-desktop-0.5.1-1.i386.rpm": "https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/android-notifier/android-notifier-desktop-0.5.1-1.i386.rpm",
"pkg:bitbucket/robeden/trove?download_url=https://bitbucket.org/robeden/trove/downloads/trove-3.0.3.zip": "https://bitbucket.org/robeden/trove/downloads/trove-3.0.3.zip",
"pkg:sourceforge/zclasspath?download_url=http://master.dl.sourceforge.net/project/zclasspath/maven2/org/zclasspath/zclasspath/1.5/zclasspath-1.5.jar": "http://master.dl.sourceforge.net/project/zclasspath/maven2/org/zclasspath/zclasspath/1.5/zclasspath-1.5.jar",
"pkg:pypi/[email protected]?download_url=https://files.pythonhosted.org/packages/87/44/0fa8e9d0cccb8eb86fc1b5170208229dc6d6e9fd6e57ea1fe19cbeea68f5/aboutcode_toolkit-3.4.0rc1-py2.py3-none-any.whl": "https://files.pythonhosted.org/packages/87/44/0fa8e9d0cccb8eb86fc1b5170208229dc6d6e9fd6e57ea1fe19cbeea68f5/aboutcode_toolkit-3.4.0rc1-py2.py3-none-any.whl",
"pkg:pypi/[email protected]?download_url=https://files.pythonhosted.org/packages/87/44/0fa8e9d0cccb8eb86fc1b5170208229dc6d6e9fd6e57ea1fe19cbeea68f5/aboutcode_toolkit-3.4.0rc1-py2.py3-none-any.whl": "https://files.pythonhosted.org/packages/6a/16/9191e46344d6a5e98afa74730340bc5f82f2c9ac7922ac4a16e58885a652/aboutcode-toolkit-3.4.0rc1.tar.gz",
"pkg:pypi/[email protected]?download_url=https://files.pythonhosted.org/packages/6a/16/9191e46344d6a5e98afa74730340bc5f82f2c9ac7922ac4a16e58885a652/aboutcode-toolkit-3.4.0rc1.tar.gz": "https://files.pythonhosted.org/packages/6a/16/9191e46344d6a5e98afa74730340bc5f82f2c9ac7922ac4a16e58885a652/aboutcode-toolkit-3.4.0rc1.tar.gz",
# Not-supported
"pkg:github/tg1999/fetchcode": None,
"pkg:cargo/abc": None,
"pkg:rubygems/package-name": None,
"pkg:bitbucket/birkenfeld": None,
"pkg:pypi/[email protected]": None,
"pkg:composer/psr/[email protected]": None,
"pkg:golang/xorm.io/[email protected]": None,
"pkg:golang/gopkg.in/[email protected]": None,
Expand Down Expand Up @@ -134,7 +135,10 @@ def test_purl2url_get_inferred_urls():
"https://gitlab.com/tg1999/firebase",
"https://gitlab.com/tg1999/firebase/-/archive/1a122122/firebase-1a122122.tar.gz",
],
"pkg:pypi/[email protected]": ["https://pypi.org/project/sortedcontainers/2.4.0/"],
"pkg:pypi/[email protected]": [
"https://pypi.org/project/sortedcontainers/2.4.0/",
"https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz",
],
"pkg:composer/psr/[email protected]": ["https://packagist.org/packages/psr/log#1.1.3"],
"pkg:rubygems/package-name": ["https://rubygems.org/gems/package-name"],
"pkg:bitbucket/birkenfeld": [],
Expand All @@ -144,6 +148,18 @@ def test_purl2url_get_inferred_urls():
assert url == purl2url.get_inferred_urls(purl)


def test_purl2url_get_repo_download_url():
purls_url = {
"pkg:pypi/[email protected]?download_url=https://files.pythonhosted.org/packages/87/44/0fa8e9d0cccb8eb86fc1b5170208229dc6d6e9fd6e57ea1fe19cbeea68f5/aboutcode_toolkit-3.4.0rc1-py2.py3-none-any.whl": "https://files.pythonhosted.org/packages/87/44/0fa8e9d0cccb8eb86fc1b5170208229dc6d6e9fd6e57ea1fe19cbeea68f5/aboutcode_toolkit-3.4.0rc1-py2.py3-none-any.whl",
"pkg:pypi/[email protected]?download_url=https://files.pythonhosted.org/packages/6a/16/9191e46344d6a5e98afa74730340bc5f82f2c9ac7922ac4a16e58885a652/aboutcode-toolkit-3.4.0rc1.tar.gz": "https://files.pythonhosted.org/packages/6a/16/9191e46344d6a5e98afa74730340bc5f82f2c9ac7922ac4a16e58885a652/aboutcode-toolkit-3.4.0rc1.tar.gz",
"pkg:pypi/[email protected]?download_url=https://pypi.python.org/packages/any/s/setuptools/setuptools-0.6c11-1.src.rpm": "https://pypi.python.org/packages/any/s/setuptools/setuptools-0.6c11-1.src.rpm",
"pkg:pypi/[email protected]?download_url=https://pypi.python.org/packages/any/s/setuptools/setuptools-0.6c11-1.src.rpm": "https://pypi.python.org/packages/any/s/setuptools/setuptools-0.6c11-1.src.rpm",
}

for purl, url in purls_url.items():
assert url == purl2url.get_repo_download_url(purl)


def test_purl2url_get_repo_url_with_invalid_purls():
purls = [
"pkg:github",
Expand Down
Loading