Skip to content

Commit

Permalink
Update purlcli.py #247
Browse files Browse the repository at this point in the history
Reference: #247

Signed-off-by: John M. Horan <[email protected]>
  • Loading branch information
johnmhoran committed Jan 9, 2024
1 parent da8d816 commit bc869d2
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 63 deletions.
67 changes: 44 additions & 23 deletions packagedb/tests/test_purlcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,22 +99,21 @@ def test_validate_purl_strip(self):

self.assertEqual(validated_purls, expected_results)

def test_versions(self):
purls = ["pkg:pypi/fetchcode"]
abc = purlcli.list_versions(purls)
# def test_versions(self):
# purls = ["pkg:pypi/fetchcode"]
# abc = purlcli.list_versions(purls)
# print(f"\nabc = {abc}")

print(f"\nabc = {abc}")
# for purl_list in abc:
# for p in purl_list:
# # print(PackageVersion.to_dict(p))
# p_dict = PackageVersion.to_dict(p)
# print(f"\np_dict = {p_dict}")
# print(f"\ntype(p_dict) = {type(p_dict)}")

for purl_list in abc:
for p in purl_list:
# print(PackageVersion.to_dict(p))
p_dict = PackageVersion.to_dict(p)
# print(f"\np_dict = {p_dict}")
# print(f"\ntype(p_dict) = {type(p_dict)}")

json_p_dict = json.dumps(p_dict)
# print(f"\njson_p_dict = {json_p_dict}")
# print(f"\ntype(json_p_dict) = {type(json_p_dict)}")
# json_p_dict = json.dumps(p_dict)
# print(f"\njson_p_dict = {json_p_dict}")
# print(f"\ntype(json_p_dict) = {type(json_p_dict)}")


# 2024-01-08 Monday 17:55:15. Based on test_api.py's class PurlValidateApiTestCase(TestCase).
Expand All @@ -138,7 +137,8 @@ def setUp(self):

def test_api_purl_validation(self):
data1 = {
"purl": "pkg:npm/[email protected]",
# "purl": "pkg:npm/[email protected]",
"purl": "pkg:pypi/[email protected]",
"check_existence": True,
}
response1 = self.client.get(f"/api/validate/", data=data1)
Expand All @@ -148,7 +148,8 @@ def test_api_purl_validation(self):
print(f"")

data2 = {
"purl": "pkg:npm/[email protected]",
# "purl": "pkg:npm/[email protected]",
"purl": "pkg:pypi/[email protected]",
"check_existence": True,
}
response2 = self.client.get(f"/api/validate/", data=data2)
Expand All @@ -157,22 +158,24 @@ def test_api_purl_validation(self):
print(f"\nresponse2.data = {response2.data}")
print(f"")

self.assertEquals(True, response1.data["valid"])
self.assertEquals(True, response1.data["exists"])
self.assertEquals(
self.assertEqual(True, response1.data["valid"])
self.assertEqual(True, response1.data["exists"])
self.assertEqual(
"The provided Package URL is valid, and the package exists in the upstream repo.",
response1.data["message"],
)

self.assertEquals(False, response2.data["valid"])
self.assertEquals(
self.assertEqual(False, response2.data["valid"])
self.assertEqual(
"The provided PackageURL is not valid.", response2.data["message"]
)

# ZZZ: 2024-01-08 Monday 18:54:51. Some exploring:

data3 = {
"purl": "pkg:npm/ogdendunes",
# "purl": "pkg:npm/ogdendunes",
# "purl": "pkg:pypi/ogdendunes",
"purl": "pkg:pypi/[email protected]",
"check_existence": True,
}
response3 = self.client.get(f"/api/validate/", data=data3)
Expand All @@ -184,6 +187,24 @@ def test_api_purl_validation(self):
self.assertEqual(True, response3.data["valid"])
self.assertEqual(False, response3.data["exists"])
self.assertEqual(
"The provided Package URL is valid but does not exist in the upstream repo.",
"The provided PackageURL is valid but does not exist in the upstream repo.",
response3.data["message"],
)

data4 = {
# "purl": "pkg:nginx/[email protected]?os=windows",
"purl": "pkg:nginx/[email protected]",
"check_existence": True,
}
response4 = self.client.get(f"/api/validate/", data=data4)

print(f"\nresponse4 = {response4}")
print(f"\nresponse4.data = {response4.data}")
print(f"")

self.assertEqual(True, response4.data["valid"])
self.assertEqual(False, response4.data["exists"])
self.assertEqual(
"The provided PackageURL is valid but does not exist in the upstream repo.",
response4.data["message"],
)
161 changes: 121 additions & 40 deletions purlcli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
2024-01-09 Tuesday 11:59:11. This began the day as a cleaned-up version of purlcli-03.py.
"""

import json

import click
Expand Down Expand Up @@ -43,17 +47,13 @@ def validate(purls, output, file):
if file:
purls = file.read().splitlines(False)

api_query = "https://public.purldb.io/api/validate/"

# validated_purls = validate_purls(purls)
validated_purls = validate_purls(purls, api_query)
validated_purls = validate_purls(purls)

json.dump(validated_purls, output, indent=4)


# def validate_purls(purls):
def validate_purls(purls, api_query):
# api_query = "https://public.purldb.io/api/validate/"
def validate_purls(purls):
api_query = "https://public.purldb.io/api/validate/"
validated_purls = []
for purl in purls:
purl = purl.strip()
Expand Down Expand Up @@ -92,9 +92,14 @@ def get_versions(purls, output, file):
"""
Check the syntax of one or more PURLs.
"""
if (purls and file) or not (purls or file):
# if (purls and file) or not (purls or file):
# raise click.UsageError("Use either purls or file but not both.")
if purls and file:
raise click.UsageError("Use either purls or file but not both.")

if not (purls or file):
raise click.UsageError("Use either purls or file.")

if file:
purls = file.read().splitlines(False)

Expand All @@ -105,62 +110,129 @@ def get_versions(purls, output, file):


def list_versions(purls):
print(f"\nlen(purls) = {len(purls)}")
print(f"\ntype(purls) = {type(purls)}")
print(f"\npurls = {purls}")
# print(f"\nlen(purls) = {len(purls)}")
# print(f"\ntype(purls) = {type(purls)}")
# print(f"\npurls = {purls}")
purl_versions = []

list_of_dict_outputs = []
# and this will hold the dict_outputs converted with json.dumps():
list_of_dict_output_json_dumps = []
for purl in purls:
dict_output = {}
print(f"\n==> purl = {purl}")
# print(f"purl = {purl}")
dict_output["purl"] = purl
dict_output["versions"] = []
# YO: Do we want to check for version data and if found remove it? Or allow?
purl = purl.strip()
if not purl:
continue

# YO: try/except -- 2024-01-07 Sunday 21:14:37. I tried this but the error thrown by fetchcode does not trigger the try/except.
# https://github.com/nexB/fetchcode/blob/d0a3fa9bb56dc3a77f7d3d7bd5b8d0e40c7a8612/src/fetchcode/package_versions.py#L512-L524
# try:
# some_variable = list(router.process(purl))

# 2024-01-07 Sunday 21:58:44. TEST to detect fetchcode error.
# result = os.system("python other_script.py")
# if 0 == result:
# YO: 2024-01-07 Sunday 22:25:38. This DOES catch an error -- empty list means that, for some unidentified reason, there is no responsive data.
# results = list(router.process(purl))
# if results != []:
# print(" Command executed successfully")
# print(f"results = {results}")
# else:
# print(" Command didn't execute successfully")
# print(f"results = {results}")

# 2024-01-08 Monday 15:28:11. Can this alert us to a problem w/o that pseudo error message?
# test_variable01 = list(router.process(purl))
# print(f"\ntest_variable01 = {test_variable01}")

# YO: 2024-01-08 Monday 15:25:28. Each of these throws a pseudo error for pkg:pypi/ogdendunes but not for pkg:pypi/foobar -- Error while fetching 'https://pypi.org/pypi/ogdendunes/json': 404 -- but each produces an empty [].
# results = list(versions(purl))
# results = list(versions(purl))
# results = list(router.process(purl))

# test01 = versions(purl)
# test02 = router.process(purl)
# print(f"results = {results}")
# print(f"test01 = {test01}")
# print(f"test02 = {test02}")
# ZAP: If we have multiple inputs and some are valid, I assume we DO want to return data for those.
# if results == []:
# print(
# f"\nThere was an error with your '{purl}' query. Make sure that '{purl}' actually exists in the relevant repository."
# )
# continue

# ZAP: 2024-01-08 Monday 17:07:34. Rather than test results as above, we'll use 'validate'.

api_query = "https://public.purldb.io/api/validate/"
purl = purl.strip()
request_body = {"purl": purl, "check_existence": True}
response = requests.get(api_query, params=request_body)
results = response.json()

# print(f"\n\nresults = {results}")

if results["exists"] != True:
print(
f"\nThere was an error with your '{purl}' query. Make sure that '{purl}' actually exists in the relevant repository."
)
continue

# This works: this is a list of PackageVersion objects
# print(f"\n\n* ABOUT TO PROCESS A PURL")
results = list(router.process(purl))
# print(f"* JUST PROCESSED A PURL")

print(f"\n\nrouter.process(purl) = {router.process(purl)}")
# print(f"\n\nrouter.process(purl) = {router.process(purl)}")

print(f"\nlist(router.process(purl)) = {list(router.process(purl))}")
# print(f"\n\n* ABOUT TO PROCESS A PURL")
# print(f"\nlist(router.process(purl)) = {list(router.process(purl))}")
# print(f"* JUST PROCESSED A PURL")

# versions(purl) is a generator object
print(f"\nversions(purl) = {versions(purl)}")
# print(f"\nversions(purl) = {versions(purl)}")

# Another test -- this is a list of PackageVersion objects
# print(f"\n\n* ABOUT TO PROCESS A PURL")
results_versions = list(versions(purl))
print(f"\nresults_versions = {results_versions}")
# print(f"* JUST PROCESSED A PURL")
# print(f"\nresults_versions = {results_versions}")

purl_versions.append(results)

# Test: list of strings
# print(f"\n\n* ABOUT TO PROCESS A PURL")
results_values = [v.value for v in router.process(purl)]
print(f"\nresults_values = {results_values}")
# print(f"* JUST PROCESSED A PURL")
# print(f"\nresults_values = {results_values}")

# 2024-01-05 Friday 17:25:41. Iterate through PackageVersion() objects
# YO: rename the variable!
# print(f"\n\n* ABOUT TO PROCESS A PURL")
for package_version_object in list(versions(purl)):
print(f"\n*** package_version_object = {package_version_object}")
# YO This print statement is not reached when the PURL query returns a 404.
# print(f"* JUST PROCESSED A PURL")
# print(f"\n*** package_version_object = {package_version_object}")

print(
f"\n*** package_version_object.to_dict() = {package_version_object.to_dict()}"
)
# print(
# f"\n*** package_version_object.to_dict() = {package_version_object.to_dict()}"
# )

print(
f"\n*** package_version_object.to_dict()['value'] = {package_version_object.to_dict()['value']}"
)
# print(
# f"\n*** package_version_object.to_dict()['value'] = {package_version_object.to_dict()['value']}"
# )

print(
f"\n*** package_version_object.to_dict()['release_date'] = {package_version_object.to_dict()['release_date']}"
)
# print(
# f"\n*** package_version_object.to_dict()['release_date'] = {package_version_object.to_dict()['release_date']}"
# )

# Here, too, create dict which we'll convert to JSON with json.dumps().
nested_dict = {}
print(f"type(nested_dict) = {type(nested_dict)}")
# print(f"type(nested_dict) = {type(nested_dict)}")

nested_purl = purl + "@" + f'{package_version_object.to_dict()["value"]}'
# nested_purl = "TEST"
Expand All @@ -172,10 +244,14 @@ def list_versions(purls):
"release_date"
] = f'{package_version_object.to_dict()["release_date"]}'
dict_output["versions"].append(nested_dict)
# # print(f"\nnested_dict = {nested_dict}")
# print(
# f"\nnested_dict = {json.dumps(nested_dict, indent=4, sort_keys=False)}"
# )

# dict
print(f"\n==> dict_output = {dict_output}")
print(f"\n==> type(dict_output) = {type(dict_output)}")
# print(f"\n==> dict_output = {dict_output}")
# print(f"\n==> type(dict_output) = {type(dict_output)}")
# add to list
list_of_dict_outputs.append(dict_output)
# 2024-01-05 Friday 20:53:18. Does this format?
Expand All @@ -191,31 +267,30 @@ def list_versions(purls):
# dict_output_json_loads, indent=2, sort_keys=False
# )

print(f"\n==> dict_output_json_dumps = {dict_output_json_dumps}")
# YO: 2024-01-08 Monday 15:13:07. This is a list of the versions -- each nested_dict -- above.
# print(f"\n==> dict_output_json_dumps = {dict_output_json_dumps}")
# and add to the separate list
list_of_dict_output_json_dumps.append(dict_output_json_dumps)

print(f"\npurl_versions = {purl_versions}")

print(f"\n==> list_of_dict_outputs = {list_of_dict_outputs}")
print(f"\n==> list_of_dict_output_json_dumps = {list_of_dict_output_json_dumps}")
# ZAP: 2024-01-08 Monday 13:37:29. We don't want these to rpint for a PURL that threw an error, so need to revise.
# print(f"\npurl_versions = {purl_versions}")
# print(f"\n==> list_of_dict_outputs = {list_of_dict_outputs}")
# print(f"\n==> list_of_dict_output_json_dumps = {list_of_dict_output_json_dumps}")

with open(
"/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-list_of_dict_outputs-2024-01-08.json",
"/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-03-list_of_dict_outputs-2024-01-08.json",
"w",
) as f:
json.dump(list_of_dict_outputs, f)

with open(
"/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-list_of_dict_output_json_dumps-2024-01-08.json",
"/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-03-list_of_dict_output_json_dumps-2024-01-08.json",
"w",
) as f:
json.dump(list_of_dict_output_json_dumps, f)

# try just one json.dumps() object -- dict_output_json_dumps -- NO: this also looks like this:
# "{\n \"purl\": \"pkg:pypi/minecode\",\n \"versions\": [\n {\n \"purl\":
with open(
"/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-single_dict_output_json_dumps-2024-01-08.json",
"/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-03-formatted_list_of_dict_output_json_dumps-2024-01-08.json",
"w",
) as f:
# json.dump(dict_output_json_dumps, f)
Expand All @@ -232,6 +307,12 @@ def list_versions(purls):
# 2024-01-05 Friday 21:11:21. THIS NOW WORKS! OUTPUT IS A LIST OF FORMATTED JSON OBJECTS!
json.dump([obj for obj in list_of_dict_outputs], f, indent=4)

# print(json.dumps(dict_output, indent=4, sort_keys=False))
# print("===")
print(
f"\n\nlist_of_dict_outputs = {json.dumps(list_of_dict_outputs, indent=4, sort_keys=False)}"
)

return purl_versions


Expand Down

0 comments on commit bc869d2

Please sign in to comment.