Skip to content

Commit

Permalink
Manifest is now artifactless
Browse files Browse the repository at this point in the history
Objects of the Manifest class no longer store the manifest data (raw binary json data) in a
linked artifact. Instead, a new TextField called 'data' was added to the Manifest class itself.

closes #1288
  • Loading branch information
MichalPysik committed Apr 22, 2024
1 parent 742acc5 commit 5178773
Show file tree
Hide file tree
Showing 13 changed files with 279 additions and 228 deletions.
3 changes: 3 additions & 0 deletions CHANGES/1288.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Updated the Manifest model to no longer rely on artifacts, storing all manifest data internally
within the database. This change dissociates the manifest from external files on the storage
backend.
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

from json.decoder import JSONDecodeError

from gettext import gettext as _
Expand All @@ -6,9 +8,12 @@

from django.core.exceptions import ObjectDoesNotExist
from django.core.management import BaseCommand
from django.db.models import Q

from pulp_container.app.models import Manifest

from pulp_container.app.utils import get_content_data

from pulp_container.constants import MEDIA_TYPE


Expand All @@ -31,41 +36,60 @@ class Command(BaseCommand):
def handle(self, *args, **options):
manifests_updated_count = 0

manifests = Manifest.objects.filter(labels={}, annotations={})
manifests = Manifest.objects.filter(Q(data="") | Q(annotations={}, labels={}))
manifests = manifests.exclude(
media_type__in=[MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI, MEDIA_TYPE.MANIFEST_V1]
)
manifests_updated_count += self.update_manifests(manifests)

manifest_lists = Manifest.objects.filter(
media_type__in=[MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI], annotations={}
Q(media_type__in=[MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI]),
Q(data="") | Q(annotations={}),
)
manifests_updated_count += self.update_manifests(manifest_lists)

self.stdout.write(
self.style.SUCCESS("Successfully updated %d manifests." % manifests_updated_count)
)

def init_manifest(self, manifest):
has_initialized_data = manifest.data != ""
if has_initialized_data:
manifest_data = json.loads(manifest.data)
else:
manifest_artifact = manifest._artifacts.get()
manifest_data, raw_bytes_data = get_content_data(manifest_artifact)
manifest.data = raw_bytes_data.decode("utf-8")
manifest._artifacts.clear()

manifest.annotations = manifest_data.get("annotations", {})

has_annotations = bool(manifest.annotations)
has_labels = manifest.init_labels()
has_image_nature = manifest.init_image_nature()

return has_annotations or has_labels or has_image_nature or (not has_initialized_data)

def update_manifests(self, manifests_qs):
manifests_updated_count = 0
manifests_to_update = []
for manifest in manifests_qs.iterator():
# suppress non-existing/already migrated artifacts and corrupted JSON files
with suppress(ObjectDoesNotExist, JSONDecodeError):
has_metadata = manifest.init_metadata()
if has_metadata:
needs_update = self.init_manifest(manifest)
if needs_update:
manifests_to_update.append(manifest)

if len(manifests_to_update) > 1000:
fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak"]
fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak", "data"]
manifests_qs.model.objects.bulk_update(
manifests_to_update,
fields_to_update,
)
manifests_updated_count += len(manifests_to_update)
manifests_to_update.clear()
if manifests_to_update:
fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak"]
fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak", "data"]
manifests_qs.model.objects.bulk_update(
manifests_to_update,
fields_to_update,
Expand Down
30 changes: 30 additions & 0 deletions pulp_container/app/migrations/0039_manifest_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Generated by Django 4.2.10 on 2024-03-05 11:22
import warnings

from django.db import migrations, models


def print_warning_for_initializing_manifest_data(apps, schema_editor):
warnings.warn(
"Run 'pulpcore-manager container-handle-image-data' to move the manifests' data from artifacts to the new 'data' database field."
)


class Migration(migrations.Migration):

dependencies = [
("container", "0038_add_manifest_metadata_fields"),
]

operations = [
migrations.AddField(
model_name="manifest",
name="data",
field=models.TextField(default=""),
),
migrations.RunPython(
print_warning_for_initializing_manifest_data,
reverse_code=migrations.RunPython.noop,
elidable=True,
),
]
2 changes: 2 additions & 0 deletions pulp_container/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class Manifest(Content):
digest (models.TextField): The manifest digest.
schema_version (models.IntegerField): The manifest schema version.
media_type (models.TextField): The manifest media type.
data (models.TextField): The manifest's data in text format.
annotations (models.JSONField): Metadata stored inside the image manifest.
labels (models.JSONField): Metadata stored inside the image configuration.
is_bootable (models.BooleanField): Indicates whether the image is bootable or not.
Expand All @@ -98,6 +99,7 @@ class Manifest(Content):
digest = models.TextField(db_index=True)
schema_version = models.IntegerField()
media_type = models.TextField(choices=MANIFEST_CHOICES)
data = models.TextField(default="")

annotations = models.JSONField(default=dict)
labels = models.JSONField(default=dict)
Expand Down
95 changes: 52 additions & 43 deletions pulp_container/app/redirects.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.http import Http404
from django.shortcuts import redirect
from django.http import Http404

from pulp_container.app.exceptions import ManifestNotFound
from pulp_container.app.utils import get_accepted_media_types
Expand Down Expand Up @@ -29,11 +29,11 @@ def redirect_to_content_app(self, content_type, content_id):
f"{settings.CONTENT_ORIGIN}/pulp/container/{self.path}/{content_type}/{content_id}"
)


class FileStorageRedirects(CommonRedirects):
"""
A class which contains methods used for redirecting to the default django's file storage.
"""
def issue_manifest_redirect(self, manifest):
"""
Issue a redirect for the passed manifest.
"""
return self.redirect_to_content_app("manifests", manifest.digest)

def issue_tag_redirect(self, tag):
"""
Expand All @@ -48,11 +48,11 @@ def issue_tag_redirect(self, tag):

return self.redirect_to_content_app("manifests", tag.name)

def issue_manifest_redirect(self, manifest):
"""
Issue a redirect for the passed manifest.
"""
return self.redirect_to_content_app("manifests", manifest.digest)

class FileStorageRedirects(CommonRedirects):
"""
A class which contains methods used for redirecting to the default django's file storage.
"""

def issue_blob_redirect(self, blob):
"""
Expand All @@ -66,38 +66,6 @@ class S3StorageRedirects(CommonRedirects):
A class that implements methods for the direct retrieval of manifest objects.
"""

def issue_tag_redirect(self, tag):
"""
Issue a redirect if an accepted media type requires it or return not found if manifest
version is not supported.
"""
manifest_media_type = tag.tagged_manifest.media_type
if manifest_media_type == MEDIA_TYPE.MANIFEST_V1:
return self.redirect_to_artifact(
tag.name, tag.tagged_manifest, MEDIA_TYPE.MANIFEST_V1_SIGNED
)
elif manifest_media_type in get_accepted_media_types(self.request.headers):
return self.redirect_to_artifact(tag.name, tag.tagged_manifest, manifest_media_type)
else:
raise ManifestNotFound(reference=tag.name)

def issue_manifest_redirect(self, manifest):
"""
Directly redirect to an associated manifest's artifact.
"""
return self.redirect_to_artifact(manifest.digest, manifest, manifest.media_type)

def redirect_to_artifact(self, content_name, manifest, manifest_media_type):
"""
Search for the passed manifest's artifact and issue a redirect.
"""
try:
artifact = manifest._artifacts.get()
except ObjectDoesNotExist:
raise Http404(f"An artifact for '{content_name}' was not found")

return self.redirect_to_object_storage(artifact, manifest_media_type)

def issue_blob_redirect(self, blob):
"""
Redirect to the passed blob or stream content when an associated artifact is not present.
Expand All @@ -123,6 +91,47 @@ def redirect_to_object_storage(self, artifact, return_media_type):
)
return redirect(content_url)

# TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifests
def redirect_to_artifact(self, content_name, manifest, manifest_media_type):
"""
Search for the passed manifest's artifact and issue a redirect.
"""
try:
artifact = manifest._artifacts.get()
except ObjectDoesNotExist:
raise Http404(f"An artifact for '{content_name}' was not found")

return self.redirect_to_object_storage(artifact, manifest_media_type)

def issue_tag_redirect(self, tag):
"""
Issue a redirect if an accepted media type requires it or return not found if manifest
version is not supported.
"""
if tag.tagged_manifest.data:
return super().issue_tag_redirect(tag)

manifest_media_type = tag.tagged_manifest.media_type
if manifest_media_type == MEDIA_TYPE.MANIFEST_V1:
return self.redirect_to_artifact(
tag.name, tag.tagged_manifest, MEDIA_TYPE.MANIFEST_V1_SIGNED
)
elif manifest_media_type in get_accepted_media_types(self.request.headers):
return self.redirect_to_artifact(tag.name, tag.tagged_manifest, manifest_media_type)
else:
raise ManifestNotFound(reference=tag.name)

def issue_manifest_redirect(self, manifest):
"""
Directly redirect to an associated manifest's artifact.
"""
if manifest.data:
return super().issue_manifest_redirect(manifest)

return self.redirect_to_artifact(manifest.digest, manifest, manifest.media_type)

# END OF BACKWARD COMPATIBILITY


class AzureStorageRedirects(S3StorageRedirects):
"""
Expand Down
Loading

0 comments on commit 5178773

Please sign in to comment.