From bc869d2b9ccd48ce7e2d651582efc7ab976e2e58 Mon Sep 17 00:00:00 2001
From: "John M. Horan" <johnmhoran@gmail.com>
Date: Tue, 9 Jan 2024 15:50:46 -0800
Subject: [PATCH] Update purlcli.py #247

Reference: https://github.com/nexB/purldb/issues/247

Signed-off-by: John M. Horan <johnmhoran@gmail.com>
---
 packagedb/tests/test_purlcli.py |  67 ++++++++-----
 purlcli.py                      | 161 ++++++++++++++++++++++++--------
 2 files changed, 165 insertions(+), 63 deletions(-)

diff --git a/packagedb/tests/test_purlcli.py b/packagedb/tests/test_purlcli.py
index 85bc678f..f8f04845 100644
--- a/packagedb/tests/test_purlcli.py
+++ b/packagedb/tests/test_purlcli.py
@@ -99,22 +99,21 @@ def test_validate_purl_strip(self):
 
         self.assertEqual(validated_purls, expected_results)
 
-    def test_versions(self):
-        purls = ["pkg:pypi/fetchcode"]
-        abc = purlcli.list_versions(purls)
+    # def test_versions(self):
+    #     purls = ["pkg:pypi/fetchcode"]
+    #     abc = purlcli.list_versions(purls)
+    #     print(f"\nabc = {abc}")
 
-        print(f"\nabc = {abc}")
+    #     for purl_list in abc:
+    #         for p in purl_list:
+    #             # print(PackageVersion.to_dict(p))
+    #             p_dict = PackageVersion.to_dict(p)
+    #             print(f"\np_dict = {p_dict}")
+    #             print(f"\ntype(p_dict) = {type(p_dict)}")
 
-        for purl_list in abc:
-            for p in purl_list:
-                # print(PackageVersion.to_dict(p))
-                p_dict = PackageVersion.to_dict(p)
-                # print(f"\np_dict = {p_dict}")
-                # print(f"\ntype(p_dict) = {type(p_dict)}")
-
-                json_p_dict = json.dumps(p_dict)
-                # print(f"\njson_p_dict = {json_p_dict}")
-                # print(f"\ntype(json_p_dict) = {type(json_p_dict)}")
+    #             json_p_dict = json.dumps(p_dict)
+    #             print(f"\njson_p_dict = {json_p_dict}")
+    #             print(f"\ntype(json_p_dict) = {type(json_p_dict)}")
 
 
 # 2024-01-08 Monday 17:55:15.  Based on test_api.py's class PurlValidateApiTestCase(TestCase).
@@ -138,7 +137,8 @@ def setUp(self):
 
     def test_api_purl_validation(self):
         data1 = {
-            "purl": "pkg:npm/foobar@1.1.0",
+            # "purl": "pkg:npm/foobar@1.1.0",
+            "purl": "pkg:pypi/packagedb@2.0.0",
             "check_existence": True,
         }
         response1 = self.client.get(f"/api/validate/", data=data1)
@@ -148,7 +148,8 @@ def test_api_purl_validation(self):
         print(f"")
 
         data2 = {
-            "purl": "pkg:npm/?foobar@1.1.0",
+            # "purl": "pkg:npm/?foobar@1.1.0",
+            "purl": "pkg:pypi/?packagedb@2.0.0",
             "check_existence": True,
         }
         response2 = self.client.get(f"/api/validate/", data=data2)
@@ -157,22 +158,24 @@ def test_api_purl_validation(self):
         print(f"\nresponse2.data = {response2.data}")
         print(f"")
 
-        self.assertEquals(True, response1.data["valid"])
-        self.assertEquals(True, response1.data["exists"])
-        self.assertEquals(
+        self.assertEqual(True, response1.data["valid"])
+        self.assertEqual(True, response1.data["exists"])
+        self.assertEqual(
             "The provided Package URL is valid, and the package exists in the upstream repo.",
             response1.data["message"],
         )
 
-        self.assertEquals(False, response2.data["valid"])
-        self.assertEquals(
+        self.assertEqual(False, response2.data["valid"])
+        self.assertEqual(
             "The provided PackageURL is not valid.", response2.data["message"]
         )
 
         # ZZZ: 2024-01-08 Monday 18:54:51.  Some exploring:
 
         data3 = {
-            "purl": "pkg:npm/ogdendunes",
+            # "purl": "pkg:npm/ogdendunes",
+            # "purl": "pkg:pypi/ogdendunes",
+            "purl": "pkg:pypi/zzzzz@2.0.0",
             "check_existence": True,
         }
         response3 = self.client.get(f"/api/validate/", data=data3)
@@ -184,6 +187,24 @@ def test_api_purl_validation(self):
         self.assertEqual(True, response3.data["valid"])
         self.assertEqual(False, response3.data["exists"])
         self.assertEqual(
-            "The provided Package URL is valid but does not exist in the upstream repo.",
+            "The provided PackageURL is valid but does not exist in the upstream repo.",
             response3.data["message"],
         )
+
+        data4 = {
+            # "purl": "pkg:nginx/nginx@0.8.9?os=windows",
+            "purl": "pkg:nginx/nginx@0.8.9",
+            "check_existence": True,
+        }
+        response4 = self.client.get(f"/api/validate/", data=data4)
+
+        print(f"\nresponse4 = {response4}")
+        print(f"\nresponse4.data = {response4.data}")
+        print(f"")
+
+        self.assertEqual(True, response4.data["valid"])
+        self.assertEqual(False, response4.data["exists"])
+        self.assertEqual(
+            "The provided PackageURL is valid but does not exist in the upstream repo.",
+            response4.data["message"],
+        )
diff --git a/purlcli.py b/purlcli.py
index 44be1d19..407cf1b1 100644
--- a/purlcli.py
+++ b/purlcli.py
@@ -1,3 +1,7 @@
+"""
+2024-01-09 Tuesday 11:59:11.  This began the day as a cleaned-up version of purlcli-03.py.
+"""
+
 import json
 
 import click
@@ -43,17 +47,13 @@ def validate(purls, output, file):
     if file:
         purls = file.read().splitlines(False)
 
-    api_query = "https://public.purldb.io/api/validate/"
-
-    # validated_purls = validate_purls(purls)
-    validated_purls = validate_purls(purls, api_query)
+    validated_purls = validate_purls(purls)
 
     json.dump(validated_purls, output, indent=4)
 
 
-# def validate_purls(purls):
-def validate_purls(purls, api_query):
-    # api_query = "https://public.purldb.io/api/validate/"
+def validate_purls(purls):
+    api_query = "https://public.purldb.io/api/validate/"
     validated_purls = []
     for purl in purls:
         purl = purl.strip()
@@ -92,9 +92,14 @@ def get_versions(purls, output, file):
     """
     Check the syntax of one or more PURLs.
     """
-    if (purls and file) or not (purls or file):
+    # if (purls and file) or not (purls or file):
+    #     raise click.UsageError("Use either purls or file but not both.")
+    if purls and file:
         raise click.UsageError("Use either purls or file but not both.")
 
+    if not (purls or file):
+        raise click.UsageError("Use either purls or file.")
+
     if file:
         purls = file.read().splitlines(False)
 
@@ -105,9 +110,9 @@ def get_versions(purls, output, file):
 
 
 def list_versions(purls):
-    print(f"\nlen(purls) = {len(purls)}")
-    print(f"\ntype(purls) = {type(purls)}")
-    print(f"\npurls = {purls}")
+    # print(f"\nlen(purls) = {len(purls)}")
+    # print(f"\ntype(purls) = {type(purls)}")
+    # print(f"\npurls = {purls}")
     purl_versions = []
 
     list_of_dict_outputs = []
@@ -115,52 +120,119 @@ def list_versions(purls):
     list_of_dict_output_json_dumps = []
     for purl in purls:
         dict_output = {}
-        print(f"\n==> purl = {purl}")
+        # print(f"purl = {purl}")
         dict_output["purl"] = purl
         dict_output["versions"] = []
+        # YO: Do we want to check for version data and if found remove it?  Or allow?
         purl = purl.strip()
         if not purl:
             continue
 
+        # YO: try/except -- 2024-01-07 Sunday 21:14:37.  I tried this but the error thrown by fetchcode does not trigger the try/except.
+        # https://github.com/nexB/fetchcode/blob/d0a3fa9bb56dc3a77f7d3d7bd5b8d0e40c7a8612/src/fetchcode/package_versions.py#L512-L524
+        # try:
+        #     some_variable = list(router.process(purl))
+
+        # 2024-01-07 Sunday 21:58:44.  TEST to detect fetchcode error.
+        # result = os.system("python other_script.py")
+        # if 0 == result:
+        # YO: 2024-01-07 Sunday 22:25:38.  This DOES catch an error -- empty list means that, for some unidentified reason, there is no responsive data.
+        # results = list(router.process(purl))
+        # if results != []:
+        #     print(" Command executed successfully")
+        #     print(f"results = {results}")
+        # else:
+        #     print(" Command didn't execute successfully")
+        #     print(f"results = {results}")
+
+        # 2024-01-08 Monday 15:28:11.  Can this alert us to a problem w/o that pseudo error message?
+        # test_variable01 = list(router.process(purl))
+        # print(f"\ntest_variable01 = {test_variable01}")
+
+        # YO: 2024-01-08 Monday 15:25:28.  Each of these throws a pseudo error for pkg:pypi/ogdendunes but not for pkg:pypi/foobar -- Error while fetching 'https://pypi.org/pypi/ogdendunes/json': 404 -- but each produces an empty [].
+        # results = list(versions(purl))
+        # results = list(versions(purl))
+        # results = list(router.process(purl))
+
+        # test01 = versions(purl)
+        # test02 = router.process(purl)
+        # print(f"results = {results}")
+        # print(f"test01 = {test01}")
+        # print(f"test02 = {test02}")
+        # ZAP: If we have multiple inputs and some are valid, I assume we DO want to return data for those.
+        # if results == []:
+        #     print(
+        #         f"\nThere was an error with your '{purl}' query.  Make sure that '{purl}' actually exists in the relevant repository."
+        #     )
+        #     continue
+
+        # ZAP: 2024-01-08 Monday 17:07:34.  Rather than test results as above, we'll use 'validate'.
+
+        api_query = "https://public.purldb.io/api/validate/"
+        purl = purl.strip()
+        request_body = {"purl": purl, "check_existence": True}
+        response = requests.get(api_query, params=request_body)
+        results = response.json()
+
+        # print(f"\n\nresults = {results}")
+
+        if results["exists"] != True:
+            print(
+                f"\nThere was an error with your '{purl}' query.  Make sure that '{purl}' actually exists in the relevant repository."
+            )
+            continue
+
         # This works: this is a list of PackageVersion objects
+        # print(f"\n\n* ABOUT TO PROCESS A PURL")
         results = list(router.process(purl))
+        # print(f"* JUST PROCESSED A PURL")
 
-        print(f"\n\nrouter.process(purl) = {router.process(purl)}")
+        # print(f"\n\nrouter.process(purl) = {router.process(purl)}")
 
-        print(f"\nlist(router.process(purl)) = {list(router.process(purl))}")
+        # print(f"\n\n* ABOUT TO PROCESS A PURL")
+        # print(f"\nlist(router.process(purl)) = {list(router.process(purl))}")
+        # print(f"* JUST PROCESSED A PURL")
 
         # versions(purl) is a generator object
-        print(f"\nversions(purl) = {versions(purl)}")
+        # print(f"\nversions(purl) = {versions(purl)}")
 
         # Another test -- this is a list of PackageVersion objects
+        # print(f"\n\n* ABOUT TO PROCESS A PURL")
         results_versions = list(versions(purl))
-        print(f"\nresults_versions = {results_versions}")
+        # print(f"* JUST PROCESSED A PURL")
+        # print(f"\nresults_versions = {results_versions}")
 
         purl_versions.append(results)
 
         # Test: list of strings
+        # print(f"\n\n* ABOUT TO PROCESS A PURL")
         results_values = [v.value for v in router.process(purl)]
-        print(f"\nresults_values = {results_values}")
+        # print(f"* JUST PROCESSED A PURL")
+        # print(f"\nresults_values = {results_values}")
 
         # 2024-01-05 Friday 17:25:41.  Iterate through PackageVersion() objects
+        # YO: rename the variable!
+        # print(f"\n\n* ABOUT TO PROCESS A PURL")
         for package_version_object in list(versions(purl)):
-            print(f"\n*** package_version_object = {package_version_object}")
+            # YO This print statement is not reached when the PURL query returns a 404.
+            # print(f"* JUST PROCESSED A PURL")
+            # print(f"\n*** package_version_object = {package_version_object}")
 
-            print(
-                f"\n*** package_version_object.to_dict() = {package_version_object.to_dict()}"
-            )
+            # print(
+            #     f"\n*** package_version_object.to_dict() = {package_version_object.to_dict()}"
+            # )
 
-            print(
-                f"\n*** package_version_object.to_dict()['value'] = {package_version_object.to_dict()['value']}"
-            )
+            # print(
+            #     f"\n*** package_version_object.to_dict()['value'] = {package_version_object.to_dict()['value']}"
+            # )
 
-            print(
-                f"\n*** package_version_object.to_dict()['release_date'] = {package_version_object.to_dict()['release_date']}"
-            )
+            # print(
+            #     f"\n*** package_version_object.to_dict()['release_date'] = {package_version_object.to_dict()['release_date']}"
+            # )
 
             # Here, too, create dict which we'll convert to JSON with json.dumps().
             nested_dict = {}
-            print(f"type(nested_dict) = {type(nested_dict)}")
+            # print(f"type(nested_dict) = {type(nested_dict)}")
 
             nested_purl = purl + "@" + f'{package_version_object.to_dict()["value"]}'
             # nested_purl = "TEST"
@@ -172,10 +244,14 @@ def list_versions(purls):
                 "release_date"
             ] = f'{package_version_object.to_dict()["release_date"]}'
             dict_output["versions"].append(nested_dict)
+            # # print(f"\nnested_dict = {nested_dict}")
+            # print(
+            #     f"\nnested_dict = {json.dumps(nested_dict, indent=4, sort_keys=False)}"
+            # )
 
         # dict
-        print(f"\n==> dict_output = {dict_output}")
-        print(f"\n==> type(dict_output) = {type(dict_output)}")
+        # print(f"\n==> dict_output = {dict_output}")
+        # print(f"\n==> type(dict_output) = {type(dict_output)}")
         # add to list
         list_of_dict_outputs.append(dict_output)
         # 2024-01-05 Friday 20:53:18.  Does this format?
@@ -191,31 +267,30 @@ def list_versions(purls):
         #     dict_output_json_loads, indent=2, sort_keys=False
         # )
 
-        print(f"\n==> dict_output_json_dumps = {dict_output_json_dumps}")
+        # YO: 2024-01-08 Monday 15:13:07.  This is a list of the versions -- each nested_dict -- above.
+        # print(f"\n==> dict_output_json_dumps = {dict_output_json_dumps}")
         # and add to the separate list
         list_of_dict_output_json_dumps.append(dict_output_json_dumps)
 
-    print(f"\npurl_versions = {purl_versions}")
-
-    print(f"\n==> list_of_dict_outputs = {list_of_dict_outputs}")
-    print(f"\n==> list_of_dict_output_json_dumps = {list_of_dict_output_json_dumps}")
+    # ZAP: 2024-01-08 Monday 13:37:29.  We don't want these to rpint for a PURL that threw an error, so need to revise.
+    # print(f"\npurl_versions = {purl_versions}")
+    # print(f"\n==> list_of_dict_outputs = {list_of_dict_outputs}")
+    # print(f"\n==> list_of_dict_output_json_dumps = {list_of_dict_output_json_dumps}")
 
     with open(
-        "/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-list_of_dict_outputs-2024-01-08.json",
+        "/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-03-list_of_dict_outputs-2024-01-08.json",
         "w",
     ) as f:
         json.dump(list_of_dict_outputs, f)
 
     with open(
-        "/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-list_of_dict_output_json_dumps-2024-01-08.json",
+        "/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-03-list_of_dict_output_json_dumps-2024-01-08.json",
         "w",
     ) as f:
         json.dump(list_of_dict_output_json_dumps, f)
 
-    # try just one json.dumps() object -- dict_output_json_dumps -- NO: this also looks like this:
-    # "{\n  \"purl\": \"pkg:pypi/minecode\",\n  \"versions\": [\n    {\n      \"purl\":
     with open(
-        "/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-single_dict_output_json_dumps-2024-01-08.json",
+        "/mnt/c/nexb/purldb-testing/2024-01-08-testing/json-output/purlcli-03-formatted_list_of_dict_output_json_dumps-2024-01-08.json",
         "w",
     ) as f:
         # json.dump(dict_output_json_dumps, f)
@@ -232,6 +307,12 @@ def list_versions(purls):
         # 2024-01-05 Friday 21:11:21.  THIS NOW WORKS!  OUTPUT IS A LIST OF FORMATTED JSON OBJECTS!
         json.dump([obj for obj in list_of_dict_outputs], f, indent=4)
 
+    # print(json.dumps(dict_output, indent=4, sort_keys=False))
+    # print("===")
+    print(
+        f"\n\nlist_of_dict_outputs = {json.dumps(list_of_dict_outputs, indent=4, sort_keys=False)}"
+    )
+
     return purl_versions