From 513092c30d74b021c6caff760f9916e45230e0d3 Mon Sep 17 00:00:00 2001 From: ds-cbo <82801887+ds-cbo@users.noreply.github.com> Date: Fri, 5 Jan 2024 14:53:06 +0100 Subject: [PATCH 1/5] escape domain regexes --- src/packageurl/contrib/url2purl.py | 104 ++++++++++++++++++----------- 1 file changed, 66 insertions(+), 38 deletions(-) diff --git a/src/packageurl/contrib/url2purl.py b/src/packageurl/contrib/url2purl.py index d1d2034..98b6fc1 100644 --- a/src/packageurl/contrib/url2purl.py +++ b/src/packageurl/contrib/url2purl.py @@ -70,7 +70,9 @@ def purl_from_pattern(type_, pattern, url, qualifiers=None): return purl_data = { - field: value for field, value in match.groupdict().items() if field in PackageURL._fields + field: value + for field, value in match.groupdict().items() + if field in PackageURL._fields } qualifiers = qualifiers or {} @@ -120,14 +122,16 @@ def build_generic_purl(uri): uri_path_segments = get_path_segments(uri) if uri_path_segments: file_name = uri_path_segments[-1] - return PackageURL(type="generic", name=file_name, qualifiers={"download_url": uri}) + return PackageURL( + type="generic", name=file_name, qualifiers={"download_url": uri} + ) @purl_router.route( - "https?://registry.npmjs.*/.*", - "https?://registry.yarnpkg.com/.*", - "https?://(www\\.)?npmjs.*/package.*", - "https?://(www\\.)?yarnpkg.com/package.*", + r"https?://registry\.npmjs\.(com|org)/.*", + r"https?://registry\.yarnpkg\.com/.*", + r"https?://(www\.)?npmjs\.(com|org)/package.*", + r"https?://(www\.)?yarnpkg\.com/package.*", ) def build_npm_purl(uri): # npm URLs are difficult to disambiguate with regex @@ -221,9 +225,9 @@ def build_npm_web_purl(uri): @purl_router.route( - "https?://repo1.maven.org/maven2/.*", - "https?://central.maven.org/maven2/.*", - "maven-index://repo1.maven.org/.*", + r"https?://repo1\.maven\.org/maven2/.*", + r"https?://central\.maven\.org/maven2/.*", + r"maven-index://repo1\.maven\.org/.*", ) def build_maven_purl(uri): path = unquote_plus(urlparse(uri).path) @@ -253,7 +257,18 @@ def build_maven_purl(uri): qualifiers["classifier"] = classifier.strip("-") - valid_types = ("aar", "ear", "mar", "pom", "rar", "rpm", "sar", "tar.gz", "war", "zip") + valid_types = ( + "aar", + "ear", + "mar", + "pom", + "rar", + "rpm", + "sar", + "tar.gz", + "war", + "zip", + ) if extension in valid_types: qualifiers["type"] = extension @@ -261,7 +276,7 @@ def build_maven_purl(uri): # https://rubygems.org/gems/i18n-js-3.0.11.gem -@purl_router.route("https?://rubygems.org/(downloads|gems)/.*") +@purl_router.route(r"https?://rubygems\.org/(downloads|gems)/.*") def build_rubygems_purl(uri): # We use a more general route pattern instead of using `rubygems_pattern` # below by itself because we want to capture all rubygems download URLs, @@ -272,7 +287,7 @@ def build_rubygems_purl(uri): # https://rubygems.org/downloads/jwt-0.1.8.gem # https://rubygems.org/gems/i18n-js-3.0.11.gem rubygems_pattern = ( - r"^https?://rubygems.org/(downloads|gems)/(?P.+)-(?P.+)(\.gem)$" + r"^https?://rubygems\.org/(downloads|gems)/(?P.+)-(?P.+)(\.gem)$" ) return purl_from_pattern("gem", rubygems_pattern, uri) @@ -282,7 +297,9 @@ def build_rubygems_purl(uri): # https://pypi.python.org/packages/2.6/t/threadpool/threadpool-1.2.7-py2.6.egg # https://pypi.python.org/packages/any/s/setuptools/setuptools-0.6c11-1.src.rpm # https://files.pythonhosted.org/packages/84/d8/451842a5496844bb5c7634b231a2e4caf0d867d2e25f09b840d3b07f3d4b/multi_key_dict-2.0.win32.exe -pypi_pattern = r"(?P(\w\.?)+(-\w+)*)-(?P.+)\.(zip|tar.gz|tar.bz2|tgz|egg|rpm|exe)$" +pypi_pattern = ( + r"(?P(\w\.?)+(-\w+)*)-(?P.+)\.(zip|tar.gz|tar.bz2|tgz|egg|rpm|exe)$" +) # This pattern can be found in the following locations: # - wheel.wheelfile.WHEEL_INFO_RE @@ -298,8 +315,9 @@ def build_rubygems_purl(uri): @purl_router.route( - "https?://pypi.org/(packages|project)/.+", - "https?://.+python.+org/(packages|project)/.*", + r"https?://pypi\.org/(packages|project)/.+", + r"https?://pypi\.python\.org/(packages|project)/.*", + r"https?://files\.pythonhosted\.org/(packages|project)/.*", ) def build_pypi_purl(uri): path = unquote_plus(urlparse(uri).path) @@ -328,14 +346,16 @@ def build_pypi_purl(uri): # http://nuget.org/packages/EntityFramework/4.2.0.0 # https://www.nuget.org/api/v2/package/Newtonsoft.Json/11.0.1 -nuget_www_pattern = r"^https?://.*nuget.org/(api/v2/)?packages?/(?P.+)/(?P.+)$" +nuget_www_pattern = ( + r"^https?://(www\.)?nuget\.org/(api/v2/)?packages?/(?P.+)/(?P.+)$" +) register_pattern("nuget", nuget_www_pattern) # https://api.nuget.org/v3-flatcontainer/newtonsoft.json/10.0.1/newtonsoft.json.10.0.1.nupkg nuget_api_pattern = ( - r"^https?://api.nuget.org/v3-flatcontainer/" + r"^https?://api\.nuget\.org/v3-flatcontainer/" r"(?P.+)/" r"(?P.+)/" r".*(nupkg)$" # ends with "nupkg" @@ -344,7 +364,7 @@ def build_pypi_purl(uri): register_pattern("nuget", nuget_api_pattern) -@purl_router.route("https?://.*sourceforge.net/projects?/.*") +@purl_router.route(r"https?://((master|iweb)\.dl\.)?sourceforge\.net/projects?/.*") def build_sourceforge_purl(uri): # We use a more general route pattern instead of using `sourceforge_pattern` # below by itself because we want to capture all sourceforge download URLs, @@ -355,7 +375,7 @@ def build_sourceforge_purl(uri): # http://master.dl.sourceforge.net/project/libpng/zlib/1.2.3/zlib-1.2.3.tar.bz2 # https://sourceforge.net/projects/scribus/files/scribus/1.6.0/scribus-1.6.0.tar.gz/download sourceforge_pattern = ( - r"^https?://.*sourceforge.net/projects?/" + r"^https?://((master|iweb)\.dl\.)?sourceforge\.net/projects?/" r"(?P([^/]+))/" # do not allow more "/" segments r"(files/)?" # optional segment for "*/download" type URLs r"(?P.+)/" @@ -380,27 +400,31 @@ def build_sourceforge_purl(uri): if remaining_uri_path_segments: project_name = remaining_uri_path_segments[0] # aloyscore sourceforge_purl = PackageURL( - type="sourceforge", name=project_name, qualifiers={"download_url": uri} + type="sourceforge", + name=project_name, + qualifiers={"download_url": uri}, ) return sourceforge_purl # https://crates.io/api/v1/crates/rand/0.7.2/download -cargo_pattern = r"^https?://crates.io/api/v1/crates/(?P.+)/(?P.+)(\/download)$" +cargo_pattern = ( + r"^https?://crates\.io/api/v1/crates/(?P.+)/(?P.+)(\/download)$" +) register_pattern("cargo", cargo_pattern) # https://raw.githubusercontent.com/volatilityfoundation/dwarf2json/master/LICENSE.txt github_raw_content_pattern = ( - r"https?://raw.githubusercontent.com/(?P[^/]+)/(?P[^/]+)/" + r"https?://raw\.githubusercontent\.com/(?P[^/]+)/(?P[^/]+)/" r"(?P[^/]+)/(?P.*)$" ) register_pattern("github", github_raw_content_pattern) -@purl_router.route("https?://api.github\\.com/repos/.*") +@purl_router.route(r"https?://api\.github\.com/repos/.*") def build_github_api_purl(url): """ Return a PackageURL object from GitHub API `url`. @@ -431,7 +455,7 @@ def build_github_api_purl(url): # https://codeload.github.com/nexB/scancode-toolkit/tar.gz/v3.1.1 # https://codeload.github.com/berngp/grails-rest/zip/release/0.7 github_codeload_pattern = ( - r"https?://codeload.github.com/(?P.+)/(?P.+)/" + r"https?://codeload\.github\.com/(?P.+)/(?P.+)/" r"(zip|tar.gz|tar.bz2|tgz)/(.*/)*" r"(?Pv|V?)(?P.+)$" ) @@ -439,7 +463,7 @@ def build_github_api_purl(url): register_pattern("github", github_codeload_pattern) -@purl_router.route("https?://github\\.com/.*") +@purl_router.route(r"https?://github\.com/.*") def build_github_purl(url): """ Return a PackageURL object from GitHub `url`. @@ -447,7 +471,7 @@ def build_github_purl(url): # https://github.com/nexB/scancode-toolkit/archive/v3.1.1.zip archive_pattern = ( - r"https?://github.com/(?P.+)/(?P.+)" + r"https?://github\.com/(?P.+)/(?P.+)" r"/archive/(.*/)*" r"((?P=name)(-|_|@))?" r"(?Pv|V?)(?P.+).(zip|tar.gz|tar.bz2|.tgz)" @@ -455,30 +479,30 @@ def build_github_purl(url): # https://github.com/downloads/mozilla/rhino/rhino1_7R4.zip download_pattern = ( - r"https?://github.com/downloads/(?P.+)/(?P.+)/" + r"https?://github\.com/downloads/(?P.+)/(?P.+)/" r"((?P=name)(-|@)?)?" r"(?Pv|V?)(?P.+).(zip|tar.gz|tar.bz2|.tgz)" ) # https://github.com/pypa/get-virtualenv/raw/20.0.31/public/virtualenv.pyz raw_pattern = ( - r"https?://github.com/(?P.+)/(?P.+)" + r"https?://github\.com/(?P.+)/(?P.+)" r"/raw/(?Pv|V?)(?P[^/]+)/(?P.*)$" ) # https://github.com/fanf2/unifdef/blob/master/unifdef.c blob_pattern = ( - r"https?://github.com/(?P.+)/(?P.+)" + r"https?://github\.com/(?P.+)/(?P.+)" r"/blob/(?P[^/]+)/(?P.*)$" ) releases_download_pattern = ( - r"https?://github.com/(?P.+)/(?P.+)" + r"https?://github\.com/(?P.+)/(?P.+)" r"/releases/download/(?Pv|V?)(?P[^/]+)/.*$" ) # https://github.com/pombredanne/schematics.git - git_pattern = r"https?://github.com/(?P.+)/(?P.+).(git)" + git_pattern = r"https?://github\.com/(?P.+)/(?P.+).(git)" patterns = ( archive_pattern, @@ -527,7 +551,7 @@ def build_github_purl(url): ) -@purl_router.route("https?://bitbucket\\.org/.*") +@purl_router.route(r"https?://bitbucket\.org/.*") def build_bitbucket_purl(url): """ Return a PackageURL object from BitBucket `url`. @@ -554,7 +578,9 @@ def build_bitbucket_purl(url): qualifiers = {} if matches: qualifiers["download_url"] = url - return PackageURL(type="bitbucket", namespace=namespace, name=name, qualifiers=qualifiers) + return PackageURL( + type="bitbucket", namespace=namespace, name=name, qualifiers=qualifiers + ) version = None subpath = None @@ -578,7 +604,7 @@ def build_bitbucket_purl(url): ) -@purl_router.route("https?://gitlab\\.com/(?!.*/archive/).*") +@purl_router.route(r"https?://gitlab\.com/(?!.*/archive/).*") def build_gitlab_purl(url): """ Return a PackageURL object from Gitlab `url`. @@ -618,7 +644,7 @@ def build_gitlab_purl(url): # https://gitlab.com/hoppr/hoppr/-/archive/v1.11.1-dev.2/hoppr-v1.11.1-dev.2.tar.gz gitlab_archive_pattern = ( - r"^https?://gitlab.com/" + r"^https?://gitlab\.com/" r"(?P.+)/(?P.+)/-/archive/(?P.+)/" r"(?P=name)-(?P=version).*" r"[^/]$" @@ -629,7 +655,7 @@ def build_gitlab_purl(url): # https://hackage.haskell.org/package/cli-extras-0.2.0.0/cli-extras-0.2.0.0.tar.gz hackage_download_pattern = ( - r"^https?://hackage.haskell.org/package/" + r"^https?://hackage\.haskell\.org/package/" r"(?P.+)-(?P.+)/" r"(?P=name)-(?P=version).*" r"[^/]$" @@ -639,13 +665,15 @@ def build_gitlab_purl(url): # https://hackage.haskell.org/package/cli-extras-0.2.0.0/ -hackage_project_pattern = r"^https?://hackage.haskell.org/package/(?P.+)-(?P[^/]+)/" +hackage_project_pattern = ( + r"^https?://hackage\.haskell\.org/package/(?P.+)-(?P[^/]+)/" +) register_pattern("hackage", hackage_project_pattern) @purl_router.route( - "https?://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/.*" + r"https?://storage\.googleapis\.com/google-code-archive-downloads/v2/code.google.com/.*" ) def build_generic_google_code_archive_purl(uri): # https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com From 2628b2a8013b5ad9d215bc213154c422b1059497 Mon Sep 17 00:00:00 2001 From: ds-cbo <82801887+ds-cbo@users.noreply.github.com> Date: Fri, 5 Jan 2024 15:06:11 +0100 Subject: [PATCH 2/5] more regex patches --- src/packageurl/contrib/url2purl.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/packageurl/contrib/url2purl.py b/src/packageurl/contrib/url2purl.py index 98b6fc1..97fd0db 100644 --- a/src/packageurl/contrib/url2purl.py +++ b/src/packageurl/contrib/url2purl.py @@ -298,7 +298,7 @@ def build_rubygems_purl(uri): # https://pypi.python.org/packages/any/s/setuptools/setuptools-0.6c11-1.src.rpm # https://files.pythonhosted.org/packages/84/d8/451842a5496844bb5c7634b231a2e4caf0d867d2e25f09b840d3b07f3d4b/multi_key_dict-2.0.win32.exe pypi_pattern = ( - r"(?P(\w\.?)+(-\w+)*)-(?P.+)\.(zip|tar.gz|tar.bz2|tgz|egg|rpm|exe)$" + r"(?P(\w\.?)+(-\w+)*)-(?P.+)\.(zip|tar\.gz|tar\.bz2|tgz|egg|rpm|exe)$" ) # This pattern can be found in the following locations: @@ -456,7 +456,7 @@ def build_github_api_purl(url): # https://codeload.github.com/berngp/grails-rest/zip/release/0.7 github_codeload_pattern = ( r"https?://codeload\.github\.com/(?P.+)/(?P.+)/" - r"(zip|tar.gz|tar.bz2|tgz)/(.*/)*" + r"(zip|tar\.gz|tar\.bz2|tgz)/([^/]*/)*" r"(?Pv|V?)(?P.+)$" ) @@ -472,16 +472,16 @@ def build_github_purl(url): # https://github.com/nexB/scancode-toolkit/archive/v3.1.1.zip archive_pattern = ( r"https?://github\.com/(?P.+)/(?P.+)" - r"/archive/(.*/)*" + r"/archive/([^/]*/)*" r"((?P=name)(-|_|@))?" - r"(?Pv|V?)(?P.+).(zip|tar.gz|tar.bz2|.tgz)" + r"(?Pv|V?)(?P.+)\.(zip|tar\.gz|tar\.bz2|tgz)" ) # https://github.com/downloads/mozilla/rhino/rhino1_7R4.zip download_pattern = ( r"https?://github\.com/downloads/(?P.+)/(?P.+)/" r"((?P=name)(-|@)?)?" - r"(?Pv|V?)(?P.+).(zip|tar.gz|tar.bz2|.tgz)" + r"(?Pv|V?)(?P.+)\.(zip|tar\.gz|tar\.bz2|tgz)" ) # https://github.com/pypa/get-virtualenv/raw/20.0.31/public/virtualenv.pyz @@ -502,7 +502,7 @@ def build_github_purl(url): ) # https://github.com/pombredanne/schematics.git - git_pattern = r"https?://github\.com/(?P.+)/(?P.+).(git)" + git_pattern = r"https?://github\.com/(?P.+)/(?P.+)\.(git)" patterns = ( archive_pattern, @@ -571,7 +571,7 @@ def build_bitbucket_purl(url): bitbucket_download_pattern = ( r"https?://bitbucket.org/" r"(?P.+)/(?P.+)/downloads/" - r"(?P.+).(zip|tar.gz|tar.bz2|.tgz|exe|msi)" + r"(?P.+)\.(zip|tar\.gz|tar\.bz2|tgz|exe|msi)" ) matches = re.search(bitbucket_download_pattern, url) From 79ccb8bbaf4e72586e9778b8881b867e5a312f75 Mon Sep 17 00:00:00 2001 From: ds-cbo <82801887+ds-cbo@users.noreply.github.com> Date: Tue, 9 Jan 2024 11:27:27 +0100 Subject: [PATCH 3/5] add isort version and diff in output --- .github/workflows/ci.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 943c592..3d3a639 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,8 +23,11 @@ jobs: - name: Validate run: | - isort --check-only src/ tests/ + isort --version + isort --check-only --diff src/ tests/ + black --version black --check --line-length 100 . + mypy --version mypy build-and-test: From a450d06139053aceac7fa355caac7c0d5227b512 Mon Sep 17 00:00:00 2001 From: ds-cbo <82801887+ds-cbo@users.noreply.github.com> Date: Tue, 19 Mar 2024 12:12:52 +0100 Subject: [PATCH 4/5] black --- src/packageurl/__init__.py | 64 +++++++++++++++++++++++------- src/packageurl/contrib/url2purl.py | 5 +-- 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 170038b..e8d63fb 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -77,7 +77,9 @@ def unquote(s: AnyStr) -> str: Return a percent-decoded unicode string, given an `s` byte or unicode string. """ - unquoted = _percent_unquote(s) # type:ignore[arg-type] # typeshed is incorrect here + unquoted = _percent_unquote( + s + ) # type:ignore[arg-type] # typeshed is incorrect here if not isinstance(unquoted, str): unquoted = unquoted.decode("utf-8") return unquoted @@ -105,7 +107,9 @@ def get_quoter( return lambda x: x -def normalize_type(type: Optional[AnyStr], encode: Optional[bool] = True) -> Optional[str]: # NOQA +def normalize_type( + type: Optional[AnyStr], encode: Optional[bool] = True +) -> Optional[str]: # NOQA if not type: return None if not isinstance(type, str): @@ -213,11 +217,15 @@ def normalize_qualifiers( f"Invalid qualifier. Must be a string of key=value pairs:{repr(qualifiers_list)}" ) qualifiers_parts = [kv.partition("=") for kv in qualifiers_list] - qualifiers_pairs: "Iterable[Tuple[str, str]]" = [(k, v) for k, _, v in qualifiers_parts] + qualifiers_pairs: "Iterable[Tuple[str, str]]" = [ + (k, v) for k, _, v in qualifiers_parts + ] elif isinstance(qualifiers, dict): qualifiers_pairs = qualifiers.items() else: - raise ValueError(f"Invalid qualifier. Must be a string or dict:{repr(qualifiers)}") + raise ValueError( + f"Invalid qualifier. Must be a string or dict:{repr(qualifiers)}" + ) quoter = get_quoter(encode) qualifiers_map = { @@ -293,7 +301,9 @@ def normalize( qualifiers: Union[AnyStr, Dict[str, str], None], subpath: Optional[AnyStr], encode: "Optional[Literal[False]]", -) -> Tuple[str, Optional[str], str, Optional[str], Optional[Dict[str, str]], Optional[str]]: ... +) -> Tuple[ + str, Optional[str], str, Optional[str], Optional[Dict[str, str]], Optional[str] +]: ... @overload @@ -306,7 +316,12 @@ def normalize( subpath: Optional[AnyStr], encode: Optional[bool] = ..., ) -> Tuple[ - str, Optional[str], str, Optional[str], Union[str, Dict[str, str], None], Optional[str] + str, + Optional[str], + str, + Optional[str], + Union[str, Dict[str, str], None], + Optional[str], ]: ... @@ -335,11 +350,20 @@ def normalize( version_norm = normalize_version(version, encode) qualifiers_norm = normalize_qualifiers(qualifiers, encode) subpath_norm = normalize_subpath(subpath, encode) - return type_norm, namespace_norm, name_norm, version_norm, qualifiers_norm, subpath_norm + return ( + type_norm, + namespace_norm, + name_norm, + version_norm, + qualifiers_norm, + subpath_norm, + ) class PackageURL( - namedtuple("PackageURL", ("type", "namespace", "name", "version", "qualifiers", "subpath")) + namedtuple( + "PackageURL", ("type", "namespace", "name", "version", "qualifiers", "subpath") + ) ): """ A purl is a package URL as defined at @@ -361,7 +385,9 @@ def __new__( version: Optional[AnyStr] = None, qualifiers: Union[AnyStr, Dict[str, str], None] = None, subpath: Optional[AnyStr] = None, - ) -> "PackageURL": # this should be 'Self' https://github.com/python/mypy/pull/13133 + ) -> ( + "PackageURL" + ): # this should be 'Self' https://github.com/python/mypy/pull/13133 required = dict(type=type, name=name) for key, value in required.items(): if value: @@ -379,7 +405,9 @@ def __new__( for key, value in strings.items(): if value and isinstance(value, basestring) or not value: continue - raise ValueError(f"Invalid purl: {key} argument must be a string: {repr(value)}.") + raise ValueError( + f"Invalid purl: {key} argument must be a string: {repr(value)}." + ) if qualifiers and not isinstance( qualifiers, @@ -419,7 +447,9 @@ def __str__(self, *args: Any, **kwargs: Any) -> str: def __hash__(self) -> int: return hash(self.to_string()) - def to_dict(self, encode: Optional[bool] = False, empty: Any = None) -> Dict[str, Any]: + def to_dict( + self, encode: Optional[bool] = False, empty: Any = None + ) -> Dict[str, Any]: """ Return an ordered dict of purl components as {key: value}. If `encode` is True, then "qualifiers" are encoded as a normalized @@ -482,7 +512,9 @@ def from_string(cls, purl: str) -> "PackageURL": scheme, sep, remainder = purl.partition(":") if not sep or scheme != "pkg": - raise ValueError(f'purl is missing the required "pkg" scheme component: {repr(purl)}.') + raise ValueError( + f'purl is missing the required "pkg" scheme component: {repr(purl)}.' + ) # this strip '/, // and /// as possible in :// or :/// remainder = remainder.strip().lstrip("/") @@ -492,7 +524,9 @@ def from_string(cls, purl: str) -> "PackageURL": type, sep, remainder = remainder.partition("/") # NOQA if not type or not sep: - raise ValueError(f"purl is missing the required type component: {repr(purl)}.") + raise ValueError( + f"purl is missing the required type component: {repr(purl)}." + ) type = type.lower() @@ -534,7 +568,9 @@ def from_string(cls, purl: str) -> "PackageURL": name = ns_name_parts[0] if not name: - raise ValueError(f"purl is missing the required name component: {repr(purl)}") + raise ValueError( + f"purl is missing the required name component: {repr(purl)}" + ) type, namespace, name, version, qualifiers, subpath = normalize( # NOQA type, diff --git a/src/packageurl/contrib/url2purl.py b/src/packageurl/contrib/url2purl.py index f3e69a6..0ea2271 100644 --- a/src/packageurl/contrib/url2purl.py +++ b/src/packageurl/contrib/url2purl.py @@ -311,9 +311,7 @@ def build_cran_purl(uri): # https://pypi.python.org/packages/2.6/t/threadpool/threadpool-1.2.7-py2.6.egg # https://pypi.python.org/packages/any/s/setuptools/setuptools-0.6c11-1.src.rpm # https://files.pythonhosted.org/packages/84/d8/451842a5496844bb5c7634b231a2e4caf0d867d2e25f09b840d3b07f3d4b/multi_key_dict-2.0.win32.exe -pypi_pattern = ( - r"(?P(\w\.?)+(-\w+)*)-(?P.+)\.(zip|tar\.gz|tar\.bz2|tgz|egg|rpm|exe)$" -) +pypi_pattern = r"(?P(\w\.?)+(-\w+)*)-(?P.+)\.(zip|tar\.gz|tar\.bz2|tgz|egg|rpm|exe)$" # This pattern can be found in the following locations: # - wheel.wheelfile.WHEEL_INFO_RE @@ -421,6 +419,7 @@ def build_composer_purl(uri): register_pattern("sourceforge", sourceforge_download_pattern_bis) + @purl_router.route(r"https?://((master|iweb)\.dl\.)?sourceforge\.net/projects?/.*") def build_sourceforge_purl(uri): # We use a more general route pattern instead of using `sourceforge_pattern` From 7cf56c5b4ee24b1bbc3bac76772186b2399c9c13 Mon Sep 17 00:00:00 2001 From: ds-cbo <82801887+ds-cbo@users.noreply.github.com> Date: Tue, 19 Mar 2024 12:15:02 +0100 Subject: [PATCH 5/5] black length 100 --- src/packageurl/__init__.py | 48 ++++++++---------------------- src/packageurl/contrib/url2purl.py | 16 +++------- 2 files changed, 16 insertions(+), 48 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index e8d63fb..cd97875 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -77,9 +77,7 @@ def unquote(s: AnyStr) -> str: Return a percent-decoded unicode string, given an `s` byte or unicode string. """ - unquoted = _percent_unquote( - s - ) # type:ignore[arg-type] # typeshed is incorrect here + unquoted = _percent_unquote(s) # type:ignore[arg-type] # typeshed is incorrect here if not isinstance(unquoted, str): unquoted = unquoted.decode("utf-8") return unquoted @@ -107,9 +105,7 @@ def get_quoter( return lambda x: x -def normalize_type( - type: Optional[AnyStr], encode: Optional[bool] = True -) -> Optional[str]: # NOQA +def normalize_type(type: Optional[AnyStr], encode: Optional[bool] = True) -> Optional[str]: # NOQA if not type: return None if not isinstance(type, str): @@ -217,15 +213,11 @@ def normalize_qualifiers( f"Invalid qualifier. Must be a string of key=value pairs:{repr(qualifiers_list)}" ) qualifiers_parts = [kv.partition("=") for kv in qualifiers_list] - qualifiers_pairs: "Iterable[Tuple[str, str]]" = [ - (k, v) for k, _, v in qualifiers_parts - ] + qualifiers_pairs: "Iterable[Tuple[str, str]]" = [(k, v) for k, _, v in qualifiers_parts] elif isinstance(qualifiers, dict): qualifiers_pairs = qualifiers.items() else: - raise ValueError( - f"Invalid qualifier. Must be a string or dict:{repr(qualifiers)}" - ) + raise ValueError(f"Invalid qualifier. Must be a string or dict:{repr(qualifiers)}") quoter = get_quoter(encode) qualifiers_map = { @@ -301,9 +293,7 @@ def normalize( qualifiers: Union[AnyStr, Dict[str, str], None], subpath: Optional[AnyStr], encode: "Optional[Literal[False]]", -) -> Tuple[ - str, Optional[str], str, Optional[str], Optional[Dict[str, str]], Optional[str] -]: ... +) -> Tuple[str, Optional[str], str, Optional[str], Optional[Dict[str, str]], Optional[str]]: ... @overload @@ -361,9 +351,7 @@ def normalize( class PackageURL( - namedtuple( - "PackageURL", ("type", "namespace", "name", "version", "qualifiers", "subpath") - ) + namedtuple("PackageURL", ("type", "namespace", "name", "version", "qualifiers", "subpath")) ): """ A purl is a package URL as defined at @@ -385,9 +373,7 @@ def __new__( version: Optional[AnyStr] = None, qualifiers: Union[AnyStr, Dict[str, str], None] = None, subpath: Optional[AnyStr] = None, - ) -> ( - "PackageURL" - ): # this should be 'Self' https://github.com/python/mypy/pull/13133 + ) -> "PackageURL": # this should be 'Self' https://github.com/python/mypy/pull/13133 required = dict(type=type, name=name) for key, value in required.items(): if value: @@ -405,9 +391,7 @@ def __new__( for key, value in strings.items(): if value and isinstance(value, basestring) or not value: continue - raise ValueError( - f"Invalid purl: {key} argument must be a string: {repr(value)}." - ) + raise ValueError(f"Invalid purl: {key} argument must be a string: {repr(value)}.") if qualifiers and not isinstance( qualifiers, @@ -447,9 +431,7 @@ def __str__(self, *args: Any, **kwargs: Any) -> str: def __hash__(self) -> int: return hash(self.to_string()) - def to_dict( - self, encode: Optional[bool] = False, empty: Any = None - ) -> Dict[str, Any]: + def to_dict(self, encode: Optional[bool] = False, empty: Any = None) -> Dict[str, Any]: """ Return an ordered dict of purl components as {key: value}. If `encode` is True, then "qualifiers" are encoded as a normalized @@ -512,9 +494,7 @@ def from_string(cls, purl: str) -> "PackageURL": scheme, sep, remainder = purl.partition(":") if not sep or scheme != "pkg": - raise ValueError( - f'purl is missing the required "pkg" scheme component: {repr(purl)}.' - ) + raise ValueError(f'purl is missing the required "pkg" scheme component: {repr(purl)}.') # this strip '/, // and /// as possible in :// or :/// remainder = remainder.strip().lstrip("/") @@ -524,9 +504,7 @@ def from_string(cls, purl: str) -> "PackageURL": type, sep, remainder = remainder.partition("/") # NOQA if not type or not sep: - raise ValueError( - f"purl is missing the required type component: {repr(purl)}." - ) + raise ValueError(f"purl is missing the required type component: {repr(purl)}.") type = type.lower() @@ -568,9 +546,7 @@ def from_string(cls, purl: str) -> "PackageURL": name = ns_name_parts[0] if not name: - raise ValueError( - f"purl is missing the required name component: {repr(purl)}" - ) + raise ValueError(f"purl is missing the required name component: {repr(purl)}") type, namespace, name, version, qualifiers, subpath = normalize( # NOQA type, diff --git a/src/packageurl/contrib/url2purl.py b/src/packageurl/contrib/url2purl.py index 0ea2271..aa3c256 100644 --- a/src/packageurl/contrib/url2purl.py +++ b/src/packageurl/contrib/url2purl.py @@ -70,9 +70,7 @@ def purl_from_pattern(type_, pattern, url, qualifiers=None): return purl_data = { - field: value - for field, value in match.groupdict().items() - if field in PackageURL._fields + field: value for field, value in match.groupdict().items() if field in PackageURL._fields } qualifiers = qualifiers or {} @@ -122,9 +120,7 @@ def build_generic_purl(uri): uri_path_segments = get_path_segments(uri) if uri_path_segments: file_name = uri_path_segments[-1] - return PackageURL( - type="generic", name=file_name, qualifiers={"download_url": uri} - ) + return PackageURL(type="generic", name=file_name, qualifiers={"download_url": uri}) @purl_router.route( @@ -463,9 +459,7 @@ def build_sourceforge_purl(uri): # https://crates.io/api/v1/crates/rand/0.7.2/download -cargo_pattern = ( - r"^https?://crates\.io/api/v1/crates/(?P.+)/(?P.+)(\/download)$" -) +cargo_pattern = r"^https?://crates\.io/api/v1/crates/(?P.+)/(?P.+)(\/download)$" register_pattern("cargo", cargo_pattern) @@ -633,9 +627,7 @@ def build_bitbucket_purl(url): qualifiers = {} if matches: qualifiers["download_url"] = url - return PackageURL( - type="bitbucket", namespace=namespace, name=name, qualifiers=qualifiers - ) + return PackageURL(type="bitbucket", namespace=namespace, name=name, qualifiers=qualifiers) version = None subpath = None