Skip to content

Commit

Permalink
Add pull-through caching
Browse files Browse the repository at this point in the history
closes #507
  • Loading branch information
lubosmj committed Oct 29, 2023
1 parent 4df9e2e commit 826e8c3
Show file tree
Hide file tree
Showing 17 changed files with 1,034 additions and 104 deletions.
3 changes: 3 additions & 0 deletions CHANGES/507.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added support for pull-through caching. Users can now configure a dedicated distribution and remote
linked to an external registry without specifying a repository name (upstream name). Pulp downloads
missing content automatically if requested and acts as a caching proxy.
35 changes: 35 additions & 0 deletions docs/workflows/host.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,38 @@ Docker Output::
In general, the automatic conversion cannot be performed when the content is not available
in the storage. Therefore, it may be successful only if the content was previously synced
with the ``immediate`` policy.


Pull-Through Caching
--------------------

The Pull-Through Caching feature offers an alternative way to host content by leveraging a **remote
registry** as the source of truth. This eliminates the need for repository synchronization, reducing
storage overhead, and ensuring up-to-date images. Pulp acts as a **caching proxy** and stores images
in a local repository.

Administering the caching::

# initialize a pull-through remote (the concept of upstream-name is not applicable here)
REMOTE_HREF=$(http ${BASE_ADDR}/pulp/api/v3/remotes/container/pull-through/ name=docker-cache url=https://registry-1.docker.io | jq -r ".pulp_href")

# create a specialized distribution linked to the initialized remote
http ${BASE_ADDR}/pulp/api/v3/distributions/container/pull-through/ remote=${REMOTE_HREF} name=docker-cache base_path=docker-cache

Downloading content::

podman pull localhost:24817/docker-cache/library/busybox

In the example above, the image "busybox" is pulled from the "docker-cache" distribution, acting as
a transparent caching layer.

By incorporating the Pull-Through Caching feature, administrators can **reduce external network
dependencies**, and ensure a more reliable and responsive container deployment system in production
environments.

.. note::
Pulp creates repositories that maintain a single repository version for user-pulled images.
Thus, only the latest repository version is retained. For instance, when pulling "debian:10,"
a "debian" repository with the "10" tag is established. Subsequent pulls such as "debian:11"
result in a new repository version that incorporates both tags while removing the previous
version. Repositories and their content remain manageable through standard API endpoints.
17 changes: 12 additions & 5 deletions pulp_container/app/cache.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from django.core.exceptions import ObjectDoesNotExist
from django.db.models import F, Value

from pulpcore.plugin.cache import CacheKeys, AsyncContentCache, SyncContentCache

from pulp_container.app.models import ContainerDistribution
from pulp_container.app.models import ContainerDistribution, ContainerPullThroughDistribution
from pulp_container.app.exceptions import RepositoryNotFound

ACCEPT_HEADER_KEY = "accept_header"
Expand Down Expand Up @@ -67,11 +68,17 @@ def find_base_path_cached(request, cached):
return path
else:
try:
distro = ContainerDistribution.objects.select_related(
"repository", "repository_version"
).get(base_path=path)
distro = ContainerDistribution.objects.get(base_path=path)
except ObjectDoesNotExist:
raise RepositoryNotFound(name=path)
distro = (
ContainerPullThroughDistribution.objects.annotate(path=Value(path))
.filter(path__startswith=F("base_path"))
.order_by("-base_path")
.first()
)
if not distro:
raise RepositoryNotFound(name=path)

return distro.base_path


Expand Down
16 changes: 15 additions & 1 deletion pulp_container/app/downloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re

from aiohttp.client_exceptions import ClientResponseError
from collections import namedtuple
from logging import getLogger
from multidict import MultiDict
from urllib import parse
Expand All @@ -15,6 +16,8 @@

log = getLogger(__name__)

InMemoryDownloadResult = namedtuple("InMemoryDownloadResult", ["data", "headers", "status_code"])


class RegistryAuthHttpDownloader(HttpDownloader):
"""
Expand All @@ -24,13 +27,14 @@ class RegistryAuthHttpDownloader(HttpDownloader):
"""

registry_auth = {"bearer": None, "basic": None}
token_lock = asyncio.Lock()

def __init__(self, *args, **kwargs):
"""
Initialize the downloader.
"""
self.remote = kwargs.pop("remote")
self.token_lock = asyncio.Lock()

super().__init__(*args, **kwargs)

async def _run(self, handle_401=True, extra_data=None):
Expand Down Expand Up @@ -174,6 +178,16 @@ def auth_header(token, basic_auth):
return {}


class InMemoryDownloader(RegistryAuthHttpDownloader):
"""A downloader class suited for downloading data in-memory."""

async def _handle_response(self, response):
data = await response.text()
return InMemoryDownloadResult(
data=data, headers=response.headers, status_code=response.status
)


class NoAuthSignatureDownloader(HttpDownloader):
"""A downloader class suited for signature downloads."""

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Generated by Django 4.2.6 on 2023-10-25 20:04

from django.db import migrations, models
import django.db.models.deletion
import pulpcore.app.models.access_policy


class Migration(migrations.Migration):

dependencies = [
('core', '0108_task_versions'),
('container', '0036_containerpushrepository_pending_blobs_manifests'),
]

operations = [
migrations.CreateModel(
name='ContainerPullThroughDistribution',
fields=[
('distribution_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='core.distribution')),
],
options={
'permissions': [('manage_roles_containerpullthroughdistribution', 'Can manage role assignments on pull-through cache distribution')],
'default_related_name': '%(app_label)s_%(model_name)s',
},
bases=('core.distribution', pulpcore.app.models.access_policy.AutoAddObjPermsMixin),
),
migrations.CreateModel(
name='ContainerPullThroughRemote',
fields=[
('remote_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='core.remote')),
],
options={
'permissions': [('manage_roles_containerpullthroughremote', 'Can manage role assignments on pull-through container remote')],
'default_related_name': '%(app_label)s_%(model_name)s',
},
bases=('core.remote', pulpcore.app.models.access_policy.AutoAddObjPermsMixin),
),
migrations.AddField(
model_name='containerdistribution',
name='pull_through_distribution',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='distributions', to='container.containerpullthroughdistribution'),
),
]
147 changes: 147 additions & 0 deletions pulp_container/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,33 @@ def noauth_download_factory(self):
)
return self._noauth_download_factory

@property
def in_memory_download_factory(self):
"""
A Downloader Factory that stores downloaded data in-memory.
This downloader should be used in workflows where the size of downloaded content is
reasonably small. For instance, for downloading manifests or manifest lists.
Upon first access, the InMemoryDownloaderFactory is instantiated and saved internally.
Returns:
DownloadFactory: The instantiated InMemoryDownloaderFactory to be used by
get_in_memory_downloader().
"""
try:
return self._in_memory_download_factory
except AttributeError:
self._in_memory_download_factory = DownloaderFactory(
self,
downloader_overrides={
"http": downloaders.InMemoryDownloader,
"https": downloaders.InMemoryDownloader,
},
)
return self._in_memory_download_factory

def get_downloader(self, remote_artifact=None, url=None, **kwargs):
"""
Get a downloader from either a RemoteArtifact or URL that is configured with this Remote.
Expand Down Expand Up @@ -388,6 +415,36 @@ def get_noauth_downloader(self, remote_artifact=None, url=None, **kwargs):
**kwargs,
)

def get_in_memory_downloader(self, remote_artifact=None, url=None, **kwargs):
"""
Get an in-memory downloader from either a RemoteArtifact or URL that is provided.
This method accepts either `remote_artifact` or `url` but not both. At least one is
required. If neither of both are passed a ValueError is raised.
Args:
remote_artifact (:class:`~pulpcore.app.models.RemoteArtifact`): The RemoteArtifact to
download.
url (str): The URL to download.
kwargs (dict): This accepts the parameters of
:class:`~pulpcore.plugin.download.BaseDownloader`.
Raises:
ValueError: If neither remote_artifact and url are passed, or if both are passed.
Returns:
subclass of :class:`~pulpcore.plugin.download.BaseDownloader`: A downloader that
is configured with the remote settings.
"""
kwargs["remote"] = self
return super().get_downloader(
remote_artifact=remote_artifact,
url=url,
download_factory=self.in_memory_download_factory,
**kwargs,
)

@property
def namespaced_upstream_name(self):
"""
Expand All @@ -413,6 +470,72 @@ class Meta:
]


class ContainerPullThroughRemote(Remote, AutoAddObjPermsMixin):
"""
A remote for pull-through caching, omitting the requirement for the upstream name.
"""

TYPE = "pull-through"

@property
def download_factory(self):
"""
Downloader Factory that maps to custom downloaders which support registry auth.
Upon first access, the DownloaderFactory is instantiated and saved internally.
Returns:
DownloadFactory: The instantiated DownloaderFactory to be used by
get_downloader()
"""
try:
return self._download_factory
except AttributeError:
self._download_factory = DownloaderFactory(
self,
downloader_overrides={
"http": downloaders.RegistryAuthHttpDownloader,
"https": downloaders.RegistryAuthHttpDownloader,
},
)
return self._download_factory

def get_downloader(self, remote_artifact=None, url=None, **kwargs):
"""
Get a downloader from either a RemoteArtifact or URL that is configured with this Remote.
This method accepts either `remote_artifact` or `url` but not both. At least one is
required. If neither or both are passed a ValueError is raised.
Args:
remote_artifact (:class:`~pulpcore.app.models.RemoteArtifact`): The RemoteArtifact to
download.
url (str): The URL to download.
kwargs (dict): This accepts the parameters of
:class:`~pulpcore.plugin.download.BaseDownloader`.
Raises:
ValueError: If neither remote_artifact and url are passed, or if both are passed.
Returns:
subclass of :class:`~pulpcore.plugin.download.BaseDownloader`: A downloader that
is configured with the remote settings.
"""
kwargs["remote"] = self
return super().get_downloader(remote_artifact=remote_artifact, url=url, **kwargs)

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
permissions = [
(
"manage_roles_containerpullthroughremote",
"Can manage role assignments on pull-through container remote",
),
]


class ManifestSigningService(SigningService):
"""
Signing service used for creating container signatures.
Expand Down Expand Up @@ -565,6 +688,23 @@ def remove_pending_content(self, repository_version):
self.pending_manifests.remove(*Manifest.objects.filter(pk__in=added_content))


class ContainerPullThroughDistribution(Distribution, AutoAddObjPermsMixin):
"""
A distribution for pull-through caching, referencing normal distributions.
"""

TYPE = "pull-through"

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
permissions = [
(
"manage_roles_containerpullthroughdistribution",
"Can manage role assignments on pull-through cache distribution",
),
]


class ContainerDistribution(Distribution, AutoAddObjPermsMixin):
"""
A container distribution defines how a repository version is distributed by Pulp's webserver.
Expand Down Expand Up @@ -595,6 +735,13 @@ class ContainerDistribution(Distribution, AutoAddObjPermsMixin):
)
description = models.TextField(null=True)

pull_through_distribution = models.ForeignKey(
ContainerPullThroughDistribution,
related_name="distributions",
on_delete=models.CASCADE,
null=True,
)

def get_repository_version(self):
"""
Returns the repository version that is supposed to be served by this ContainerDistribution.
Expand Down
Loading

0 comments on commit 826e8c3

Please sign in to comment.