Skip to content

Commit

Permalink
Generate a package_uid in create_from_data when not provided #1256 (#…
Browse files Browse the repository at this point in the history
…1258)

Signed-off-by: tdruez <[email protected]>
  • Loading branch information
tdruez authored Jun 7, 2024
1 parent 7fabcb2 commit 224a00d
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 30 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ v34.6.0 (unreleased)
- Add a new ``run`` entry point for executing pipeline as a single command.
https://github.com/nexB/scancode.io/pull/1256

- Generate a DiscoveredPackage.package_uid in create_from_data when not provided.
https://github.com/nexB/scancode.io/issues/1256

v34.5.0 (2024-05-22)
--------------------

Expand Down
12 changes: 12 additions & 0 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
from licensedcode.cache import build_spdx_license_expression
from licensedcode.cache import get_licensing
from matchcode_toolkit.fingerprinting import IGNORED_DIRECTORY_FINGERPRINTS
from packagedcode.models import build_package_uid
from packageurl import PackageURL
from packageurl import normalize_qualifiers
from packageurl.contrib.django.models import PackageURLMixin
Expand Down Expand Up @@ -3143,6 +3144,17 @@ def create_from_data(cls, project, package_data):
}

discovered_package = cls(project=project, **cleaned_data)

# The ``package_uid`` field is not defined as required on the model,
# but it is essential for retrieving the Package object from the database
# in various places, such as in the ``update_or_create_resource`` function.
# If ``package_uid`` is not provided in the ``package_data``, a value is
# generated using the ``build_package_uid`` function from the ``packagedcode``
# module.
if not package_data.get("package_uid"):
package_uid = build_package_uid(discovered_package.package_url)
discovered_package.package_uid = package_uid

# Using save_error=False to not capture potential errors at this level but
# rather in the CodebaseResource.create_and_add_package method so resource data
# can be injected in the ProjectMessage record.
Expand Down
6 changes: 3 additions & 3 deletions scanpipe/tests/data/d2d/about_files/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
"notice_text": "",
"source_packages": [],
"extra_data": {},
"package_uid": "",
"package_uid": "pkg:local-files/analysis-90cb6382/90cb6382-431c-4187-be76-d4f1a2199a2f?uuid=fixed-uid-done-for-testing-5642512d1758",
"datasource_ids": [],
"datafile_paths": [],
"file_references": [],
Expand Down Expand Up @@ -122,7 +122,7 @@
"*flume-ng-node-*.jar-extract/org/apache/flume/node/ConfigurationProvider.class"
]
},
"package_uid": "",
"package_uid": "pkg:maven/log4j/[email protected]?uuid=fixed-uid-done-for-testing-5642512d1758",
"datasource_ids": [],
"datafile_paths": [],
"file_references": [],
Expand Down Expand Up @@ -555,7 +555,7 @@
"authors": [],
"package_data": [],
"for_packages": [
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=90cb6382-431c-4187-be76-d4f1a2199a2f"
],
"emails": [],
"urls": [
Expand Down
26 changes: 13 additions & 13 deletions scanpipe/tests/data/flume-ng-node-d2d.json
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
"notice_text": "",
"source_packages": [],
"extra_data": {},
"package_uid": "",
"package_uid": "pkg:local-files/analysis-b74fe5df/b74fe5df-e965-415e-ba65-f38421a0695d?uuid=fixed-uid-done-for-testing-5642512d1758",
"datasource_ids": [],
"datafile_paths": [],
"file_references": [],
Expand Down Expand Up @@ -411,7 +411,7 @@
"authors": [],
"package_data": [],
"for_packages": [
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
],
"emails": [],
"urls": [
Expand Down Expand Up @@ -602,7 +602,7 @@
"authors": [],
"package_data": [],
"for_packages": [
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
],
"emails": [],
"urls": [
Expand Down Expand Up @@ -666,7 +666,7 @@
"authors": [],
"package_data": [],
"for_packages": [
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
],
"emails": [],
"urls": [
Expand Down Expand Up @@ -730,7 +730,7 @@
"authors": [],
"package_data": [],
"for_packages": [
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
],
"emails": [],
"urls": [
Expand Down Expand Up @@ -794,7 +794,7 @@
"authors": [],
"package_data": [],
"for_packages": [
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
],
"emails": [],
"urls": [
Expand Down Expand Up @@ -858,7 +858,7 @@
"authors": [],
"package_data": [],
"for_packages": [
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
],
"emails": [],
"urls": [
Expand Down Expand Up @@ -922,7 +922,7 @@
"authors": [],
"package_data": [],
"for_packages": [
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
],
"emails": [],
"urls": [
Expand Down Expand Up @@ -986,7 +986,7 @@
"authors": [],
"package_data": [],
"for_packages": [
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
],
"emails": [],
"urls": [
Expand Down Expand Up @@ -1050,7 +1050,7 @@
"authors": [],
"package_data": [],
"for_packages": [
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
],
"emails": [],
"urls": [
Expand Down Expand Up @@ -1114,7 +1114,7 @@
"authors": [],
"package_data": [],
"for_packages": [
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
],
"emails": [],
"urls": [
Expand Down Expand Up @@ -1178,7 +1178,7 @@
"authors": [],
"package_data": [],
"for_packages": [
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
],
"emails": [],
"urls": [
Expand Down Expand Up @@ -1242,7 +1242,7 @@
"authors": [],
"package_data": [],
"for_packages": [
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
],
"emails": [],
"urls": [
Expand Down
34 changes: 26 additions & 8 deletions scanpipe/tests/pipes/test_pipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def test_scanpipe_pipes_update_or_create_package(self):

# Make sure we can assign a package to multiple Resources calling
# update_or_create_package() several times.
package_data2["package_uid"] = package2.package_uid
resource2 = make_resource_file(project=p1, path="filename2.ext")
package2 = pipes.update_or_create_package(p1, package_data2, [resource2])
self.assertIn(package2, resource1.discovered_packages.all())
Expand Down Expand Up @@ -157,25 +158,42 @@ def test_scanpipe_pipes_create_local_files_package(self, mock_uuid4):
self.assertEqual(expected_purl, local_package.purl)
self.assertEqual("mit", local_package.declared_license_expression)
self.assertEqual("Copyright", local_package.copyright)
self.assertEqual([expected_purl], resource1.for_packages)
self.assertEqual(
[f"{expected_purl}?uuid={forced_uuid}"], resource1.for_packages
)

def test_scanpipe_pipes_update_or_create_package_package_uid(self):
p1 = Project.objects.create(name="Analysis")
package_data = dict(package_data1)

package_data["package_uid"] = None
pipes.update_or_create_package(p1, package_data)
pipes.update_or_create_package(p1, package_data)
package1 = pipes.update_or_create_package(p1, package_data)
self.assertTrue(package1.package_uid)

package_data["package_uid"] = ""
pipes.update_or_create_package(p1, package_data)
package2 = pipes.update_or_create_package(p1, package_data)
self.assertTrue(package2.package_uid)

del package_data["package_uid"]
pipes.update_or_create_package(p1, package_data)
package3 = pipes.update_or_create_package(p1, package_data)
self.assertTrue(package3.package_uid)

self.assertNotEqual(package1.package_uid, package2.package_uid)
self.assertNotEqual(package2.package_uid, package3.package_uid)

# Make sure only 1 package was created, then properly found in the db regardless
# of the empty/none package_uid.
self.assertEqual(1, DiscoveredPackage.objects.count())
# A `package_uid` value is generated when not provided, making each
# package instance unique.
self.assertEqual(3, DiscoveredPackage.objects.count())

# In that case, there is a match in the db, the object is updated
package_data["package_uid"] = package1.package_uid
package_data["sha1"] = "sha1"
# We need to use an empty field since override=False in update_from_data
self.assertEqual("", package1.sha1)
pipes.update_or_create_package(p1, package_data)
package1.refresh_from_db()
self.assertEqual("sha1", package1.sha1)
self.assertEqual(3, DiscoveredPackage.objects.count())

def test_scanpipe_pipes_update_or_create_dependency(self):
p1 = Project.objects.create(name="Analysis")
Expand Down
6 changes: 4 additions & 2 deletions scanpipe/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2569,9 +2569,11 @@ def test_scanpipe_discovered_package_model_unique_package_uid_in_project(self):
package_data_no_uid = package_data1.copy()
package_data_no_uid.pop("package_uid")
package2 = DiscoveredPackage.create_from_data(project1, package_data_no_uid)
self.assertFalse(package2.package_uid)
self.assertTrue(package2.package_uid)
self.assertNotEqual(package.package_uid, package2.package_uid)
package3 = DiscoveredPackage.create_from_data(project1, package_data_no_uid)
self.assertFalse(package3.package_uid)
self.assertTrue(package3.package_uid)
self.assertNotEqual(package.package_uid, package3.package_uid)

@skipIf(connection.vendor == "sqlite", "No max_length constraints on SQLite.")
def test_scanpipe_codebase_resource_create_and_add_package_warnings(self):
Expand Down
18 changes: 14 additions & 4 deletions scanpipe/tests/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -1107,10 +1107,15 @@ def test_scanpipe_load_sbom_pipeline_cyclonedx_integration(self):
self.assertEqual(expected["filename"], package.filename)

@mock.patch("scanpipe.pipes.purldb.request_post")
def test_scanpipe_deploy_to_develop_pipeline_integration(self, mock_request):
@mock.patch("uuid.uuid4")
def test_scanpipe_deploy_to_develop_pipeline_integration(
self, mock_uuid4, mock_request
):
forced_uuid = "b74fe5df-e965-415e-ba65-f38421a0695d"
mock_uuid4.return_value = forced_uuid
mock_request.return_value = None
pipeline_name = "map_deploy_to_develop"
project1 = Project.objects.create(name="Analysis")
project1 = Project.objects.create(name="Analysis", uuid=forced_uuid)

jar_location = self.data_location / "d2d" / "jars"
project1.copy_input_from(jar_location / "from-flume-ng-node-1.9.0.zip")
Expand All @@ -1132,10 +1137,15 @@ def test_scanpipe_deploy_to_develop_pipeline_integration(self, mock_request):
self.assertPipelineResultEqual(expected_file, result_file)

@mock.patch("scanpipe.pipes.purldb.request_post")
def test_scanpipe_deploy_to_develop_pipeline_with_about_file(self, mock_request):
@mock.patch("uuid.uuid4")
def test_scanpipe_deploy_to_develop_pipeline_with_about_file(
self, mock_uuid4, mock_request
):
forced_uuid = "90cb6382-431c-4187-be76-d4f1a2199a2f"
mock_uuid4.return_value = forced_uuid
mock_request.return_value = None
pipeline_name = "map_deploy_to_develop"
project1 = Project.objects.create(name="Analysis")
project1 = Project.objects.create(name="Analysis", uuid=forced_uuid)

data_dir = self.data_location / "d2d" / "about_files"
project1.copy_input_from(data_dir / "from-with-about-file.zip")
Expand Down

0 comments on commit 224a00d

Please sign in to comment.