From bc869d2b9ccd48ce7e2d651582efc7ab976e2e58 Mon Sep 17 00:00:00 2001 From: "John M. Horan" Date: Tue, 9 Jan 2024 15:50:46 -0800 Subject: [PATCH] Update purlcli.py #247 Reference: https://github.com/nexB/purldb/issues/247 Signed-off-by: John M. Horan --- packagedb/tests/test_purlcli.py | 67 ++++++++----- purlcli.py | 161 ++++++++++++++++++++++++-------- 2 files changed, 165 insertions(+), 63 deletions(-) diff --git a/packagedb/tests/test_purlcli.py b/packagedb/tests/test_purlcli.py index 85bc678f..f8f04845 100644 --- a/packagedb/tests/test_purlcli.py +++ b/packagedb/tests/test_purlcli.py @@ -99,22 +99,21 @@ def test_validate_purl_strip(self): self.assertEqual(validated_purls, expected_results) - def test_versions(self): - purls = ["pkg:pypi/fetchcode"] - abc = purlcli.list_versions(purls) + # def test_versions(self): + # purls = ["pkg:pypi/fetchcode"] + # abc = purlcli.list_versions(purls) + # print(f"\nabc = {abc}") - print(f"\nabc = {abc}") + # for purl_list in abc: + # for p in purl_list: + # # print(PackageVersion.to_dict(p)) + # p_dict = PackageVersion.to_dict(p) + # print(f"\np_dict = {p_dict}") + # print(f"\ntype(p_dict) = {type(p_dict)}") - for purl_list in abc: - for p in purl_list: - # print(PackageVersion.to_dict(p)) - p_dict = PackageVersion.to_dict(p) - # print(f"\np_dict = {p_dict}") - # print(f"\ntype(p_dict) = {type(p_dict)}") - - json_p_dict = json.dumps(p_dict) - # print(f"\njson_p_dict = {json_p_dict}") - # print(f"\ntype(json_p_dict) = {type(json_p_dict)}") + # json_p_dict = json.dumps(p_dict) + # print(f"\njson_p_dict = {json_p_dict}") + # print(f"\ntype(json_p_dict) = {type(json_p_dict)}") # 2024-01-08 Monday 17:55:15. Based on test_api.py's class PurlValidateApiTestCase(TestCase). @@ -138,7 +137,8 @@ def setUp(self): def test_api_purl_validation(self): data1 = { - "purl": "pkg:npm/foobar@1.1.0", + # "purl": "pkg:npm/foobar@1.1.0", + "purl": "pkg:pypi/packagedb@2.0.0", "check_existence": True, } response1 = self.client.get(f"/api/validate/", data=data1) @@ -148,7 +148,8 @@ def test_api_purl_validation(self): print(f"") data2 = { - "purl": "pkg:npm/?foobar@1.1.0", + # "purl": "pkg:npm/?foobar@1.1.0", + "purl": "pkg:pypi/?packagedb@2.0.0", "check_existence": True, } response2 = self.client.get(f"/api/validate/", data=data2) @@ -157,22 +158,24 @@ def test_api_purl_validation(self): print(f"\nresponse2.data = {response2.data}") print(f"") - self.assertEquals(True, response1.data["valid"]) - self.assertEquals(True, response1.data["exists"]) - self.assertEquals( + self.assertEqual(True, response1.data["valid"]) + self.assertEqual(True, response1.data["exists"]) + self.assertEqual( "The provided Package URL is valid, and the package exists in the upstream repo.", response1.data["message"], ) - self.assertEquals(False, response2.data["valid"]) - self.assertEquals( + self.assertEqual(False, response2.data["valid"]) + self.assertEqual( "The provided PackageURL is not valid.", response2.data["message"] ) # ZZZ: 2024-01-08 Monday 18:54:51. Some exploring: data3 = { - "purl": "pkg:npm/ogdendunes", + # "purl": "pkg:npm/ogdendunes", + # "purl": "pkg:pypi/ogdendunes", + "purl": "pkg:pypi/zzzzz@2.0.0", "check_existence": True, } response3 = self.client.get(f"/api/validate/", data=data3) @@ -184,6 +187,24 @@ def test_api_purl_validation(self): self.assertEqual(True, response3.data["valid"]) self.assertEqual(False, response3.data["exists"]) self.assertEqual( - "The provided Package URL is valid but does not exist in the upstream repo.", + "The provided PackageURL is valid but does not exist in the upstream repo.", response3.data["message"], ) + + data4 = { + # "purl": "pkg:nginx/nginx@0.8.9?os=windows", + "purl": "pkg:nginx/nginx@0.8.9", + "check_existence": True, + } + response4 = self.client.get(f"/api/validate/", data=data4) + + print(f"\nresponse4 = {response4}") + print(f"\nresponse4.data = {response4.data}") + print(f"") + + self.assertEqual(True, response4.data["valid"]) + self.assertEqual(False, response4.data["exists"]) + self.assertEqual( + "The provided PackageURL is valid but does not exist in the upstream repo.", + response4.data["message"], + ) diff --git a/purlcli.py b/purlcli.py index 44be1d19..407cf1b1 100644 --- a/purlcli.py +++ b/purlcli.py @@ -1,3 +1,7 @@ +""" +2024-01-09 Tuesday 11:59:11. This began the day as a cleaned-up version of purlcli-03.py. +""" + import json import click @@ -43,17 +47,13 @@ def validate(purls, output, file): if file: purls = file.read().splitlines(False) - api_query = "https://public.purldb.io/api/validate/" - - # validated_purls = validate_purls(purls) - validated_purls = validate_purls(purls, api_query) + validated_purls = validate_purls(purls) json.dump(validated_purls, output, indent=4) -# def validate_purls(purls): -def validate_purls(purls, api_query): - # api_query = "https://public.purldb.io/api/validate/" +def validate_purls(purls): + api_query = "https://public.purldb.io/api/validate/" validated_purls = [] for purl in purls: purl = purl.strip() @@ -92,9 +92,14 @@ def get_versions(purls, output, file): """ Check the syntax of one or more PURLs. """ - if (purls and file) or not (purls or file): + # if (purls and file) or not (purls or file): + # raise click.UsageError("Use either purls or file but not both.") + if purls and file: raise click.UsageError("Use either purls or file but not both.") + if not (purls or file): + raise click.UsageError("Use either purls or file.") + if file: purls = file.read().splitlines(False) @@ -105,9 +110,9 @@ def get_versions(purls, output, file): def list_versions(purls): - print(f"\nlen(purls) = {len(purls)}") - print(f"\ntype(purls) = {type(purls)}") - print(f"\npurls = {purls}") + # print(f"\nlen(purls) = {len(purls)}") + # print(f"\ntype(purls) = {type(purls)}") + # print(f"\npurls = {purls}") purl_versions = [] list_of_dict_outputs = [] @@ -115,52 +120,119 @@ def list_versions(purls): list_of_dict_output_json_dumps = [] for purl in purls: dict_output = {} - print(f"\n==> purl = {purl}") + # print(f"purl = {purl}") dict_output["purl"] = purl dict_output["versions"] = [] + # YO: Do we want to check for version data and if found remove it? Or allow? purl = purl.strip() if not purl: continue + # YO: try/except -- 2024-01-07 Sunday 21:14:37. I tried this but the error thrown by fetchcode does not trigger the try/except. + # https://github.com/nexB/fetchcode/blob/d0a3fa9bb56dc3a77f7d3d7bd5b8d0e40c7a8612/src/fetchcode/package_versions.py#L512-L524 + # try: + # some_variable = list(router.process(purl)) + + # 2024-01-07 Sunday 21:58:44. TEST to detect fetchcode error. + # result = os.system("python other_script.py") + # if 0 == result: + # YO: 2024-01-07 Sunday 22:25:38. This DOES catch an error -- empty list means that, for some unidentified reason, there is no responsive data. + # results = list(router.process(purl)) + # if results != []: + # print(" Command executed successfully") + # print(f"results = {results}") + # else: + # print(" Command didn't execute successfully") + # print(f"results = {results}") + + # 2024-01-08 Monday 15:28:11. Can this alert us to a problem w/o that pseudo error message? + # test_variable01 = list(router.process(purl)) + # print(f"\ntest_variable01 = {test_variable01}") + + # YO: 2024-01-08 Monday 15:25:28. Each of these throws a pseudo error for pkg:pypi/ogdendunes but not for pkg:pypi/foobar -- Error while fetching 'https://pypi.org/pypi/ogdendunes/json': 404 -- but each produces an empty []. + # results = list(versions(purl)) + # results = list(versions(purl)) + # results = list(router.process(purl)) + + # test01 = versions(purl) + # test02 = router.process(purl) + # print(f"results = {results}") + # print(f"test01 = {test01}") + # print(f"test02 = {test02}") + # ZAP: If we have multiple inputs and some are valid, I assume we DO want to return data for those. + # if results == []: + # print( + # f"\nThere was an error with your '{purl}' query. Make sure that '{purl}' actually exists in the relevant repository." + # ) + # continue + + # ZAP: 2024-01-08 Monday 17:07:34. Rather than test results as above, we'll use 'validate'. + + api_query = "https://public.purldb.io/api/validate/" + purl = purl.strip() + request_body = {"purl": purl, "check_existence": True} + response = requests.get(api_query, params=request_body) + results = response.json() + + # print(f"\n\nresults = {results}") + + if results["exists"] != True: + print( + f"\nThere was an error with your '{purl}' query. Make sure that '{purl}' actually exists in the relevant repository." + ) + continue + # This works: this is a list of PackageVersion objects + # print(f"\n\n* ABOUT TO PROCESS A PURL") results = list(router.process(purl)) + # print(f"* JUST PROCESSED A PURL") - print(f"\n\nrouter.process(purl) = {router.process(purl)}") + # print(f"\n\nrouter.process(purl) = {router.process(purl)}") - print(f"\nlist(router.process(purl)) = {list(router.process(purl))}") + # print(f"\n\n* ABOUT TO PROCESS A PURL") + # print(f"\nlist(router.process(purl)) = {list(router.process(purl))}") + # print(f"* JUST PROCESSED A PURL") # versions(purl) is a generator object - print(f"\nversions(purl) = {versions(purl)}") + # print(f"\nversions(purl) = {versions(purl)}") # Another test -- this is a list of PackageVersion objects + # print(f"\n\n* ABOUT TO PROCESS A PURL") results_versions = list(versions(purl)) - print(f"\nresults_versions = {results_versions}") + # print(f"* JUST PROCESSED A PURL") + # print(f"\nresults_versions = {results_versions}") purl_versions.append(results) # Test: list of strings + # print(f"\n\n* ABOUT TO PROCESS A PURL") results_values = [v.value for v in router.process(purl)] - print(f"\nresults_values = {results_values}") + # print(f"* JUST PROCESSED A PURL") + # print(f"\nresults_values = {results_values}") # 2024-01-05 Friday 17:25:41. Iterate through PackageVersion() objects + # YO: rename the variable! + # print(f"\n\n* ABOUT TO PROCESS A PURL") for package_version_object in list(versions(purl)): - print(f"\n*** package_version_object = {package_version_object}") + # YO This print statement is not reached when the PURL query returns a 404. + # print(f"* JUST PROCESSED A PURL") + # print(f"\n*** package_version_object = {package_version_object}") - print( - f"\n*** package_version_object.to_dict() = {package_version_object.to_dict()}" - ) + # print( + # f"\n*** package_version_object.to_dict() = {package_version_object.to_dict()}" + # ) - print( - f"\n*** package_version_object.to_dict()['value'] = {package_version_object.to_dict()['value']}" - ) + # print( + # f"\n*** package_version_object.to_dict()['value'] = {package_version_object.to_dict()['value']}" + # ) - print( - f"\n*** package_version_object.to_dict()['release_date'] = {package_version_object.to_dict()['release_date']}" - ) + # print( + # f"\n*** package_version_object.to_dict()['release_date'] = {package_version_object.to_dict()['release_date']}" + # ) # Here, too, create dict which we'll convert to JSON with json.dumps(). nested_dict = {} - print(f"type(nested_dict) = {type(nested_dict)}") + # print(f"type(nested_dict) = {type(nested_dict)}") nested_purl = purl + "@" + f'{package_version_object.to_dict()["value"]}' # nested_purl = "TEST" @@ -172,10 +244,14 @@ def list_versions(purls): "release_date" ] = f'{package_version_object.to_dict()["release_date"]}' dict_output["versions"].append(nested_dict) + # # print(f"\nnested_dict = {nested_dict}") + # print( + # f"\nnested_dict = {json.dumps(nested_dict, indent=4, sort_keys=False)}" + # ) # dict - print(f"\n==> dict_output = {dict_output}") - print(f"\n==> type(dict_output) = {type(dict_output)}") + # print(f"\n==> dict_output = {dict_output}") + # print(f"\n==> type(dict_output) = {type(dict_output)}") # add to list list_of_dict_outputs.append(dict_output) # 2024-01-05 Friday 20:53:18. Does this format? @@ -191,31 +267,30 @@ def list_versions(purls): # dict_output_json_loads, indent=2, sort_keys=False # ) - print(f"\n==> dict_output_json_dumps = {dict_output_json_dumps}") + # YO: 2024-01-08 Monday 15:13:07. This is a list of the versions -- each nested_dict -- above. + # print(f"\n==> dict_output_json_dumps = {dict_output_json_dumps}") # and add to the separate list list_of_dict_output_json_dumps.append(dict_output_json_dumps) - print(f"\npurl_versions = {purl_versions}") - - print(f"\n==> list_of_dict_outputs = {list_of_dict_outputs}") - print(f"\n==> list_of_dict_output_json_dumps = {list_of_dict_output_json_dumps}") + # ZAP: 2024-01-08 Monday 13:37:29. We don't want these to rpint for a PURL that threw an error, so need to revise. + # print(f"\npurl_versions = {purl_versions}") + # print(f"\n==> list_of_dict_outputs = {list_of_dict_outputs}") + # print(f"\n==> list_of_dict_output_json_dumps = {list_of_dict_output_json_dumps}") with open( - "/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-list_of_dict_outputs-2024-01-08.json", + "/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-03-list_of_dict_outputs-2024-01-08.json", "w", ) as f: json.dump(list_of_dict_outputs, f) with open( - "/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-list_of_dict_output_json_dumps-2024-01-08.json", + "/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-03-list_of_dict_output_json_dumps-2024-01-08.json", "w", ) as f: json.dump(list_of_dict_output_json_dumps, f) - # try just one json.dumps() object -- dict_output_json_dumps -- NO: this also looks like this: - # "{\n \"purl\": \"pkg:pypi/minecode\",\n \"versions\": [\n {\n \"purl\": with open( - "/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-single_dict_output_json_dumps-2024-01-08.json", + "/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-03-formatted_list_of_dict_output_json_dumps-2024-01-08.json", "w", ) as f: # json.dump(dict_output_json_dumps, f) @@ -232,6 +307,12 @@ def list_versions(purls): # 2024-01-05 Friday 21:11:21. THIS NOW WORKS! OUTPUT IS A LIST OF FORMATTED JSON OBJECTS! json.dump([obj for obj in list_of_dict_outputs], f, indent=4) + # print(json.dumps(dict_output, indent=4, sort_keys=False)) + # print("===") + print( + f"\n\nlist_of_dict_outputs = {json.dumps(list_of_dict_outputs, indent=4, sort_keys=False)}" + ) + return purl_versions