Skip to content

Commit

Permalink
Add includes/excludes fields to filter remotes
Browse files Browse the repository at this point in the history
closes: #459
  • Loading branch information
git-hyagi committed Jul 2, 2024
1 parent a832b9f commit 993fe47
Show file tree
Hide file tree
Showing 11 changed files with 268 additions and 67 deletions.
2 changes: 2 additions & 0 deletions CHANGES/459.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Added support for filtering remote repositories in pull-through caching using `includes` and
`excludes` fields. These fields can be set on pull-through caching remote objects.
24 changes: 24 additions & 0 deletions pulp_container/app/migrations/0040_add_remote_repo_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 4.2.13 on 2024-06-28 10:34

import django.contrib.postgres.fields
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('container', '0039_manifest_data'),
]

operations = [
migrations.AddField(
model_name='containerpullthroughremote',
name='excludes',
field=django.contrib.postgres.fields.ArrayField(base_field=models.TextField(null=True), null=True, size=None),
),
migrations.AddField(
model_name='containerpullthroughremote',
name='includes',
field=django.contrib.postgres.fields.ArrayField(base_field=models.TextField(null=True), null=True, size=None),
),
]
34 changes: 34 additions & 0 deletions pulp_container/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,11 @@ class ContainerPullThroughRemote(Remote, AutoAddObjPermsMixin):
from within a single instance of this remote.
"""

TYPE = "pull-through"

includes = fields.ArrayField(models.TextField(null=True), null=True)
excludes = fields.ArrayField(models.TextField(null=True), null=True)

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
permissions = [
Expand All @@ -512,6 +517,35 @@ class Meta:
),
]

def model_to_dict(self):
"""
Returns a dictionary representation of the model ignoring some
pull-through remote attributes.
"""
return {
"pulp_labels": self.pulp_labels,
"url": self.url,
"ca_cert": self.ca_cert,
"client_cert": self.client_cert,
"client_key": self.client_key,
"tls_validation": self.tls_validation,
"username": self.username,
"password": self.password,
"proxy_url": self.proxy_url,
"proxy_username": self.proxy_username,
"proxy_password": self.proxy_password,
"download_concurrency": self.download_concurrency,
"max_retries": self.max_retries,
"policy": self.policy,
"total_timeout": self.total_timeout,
"connect_timeout": self.connect_timeout,
"sock_connect_timeout": self.sock_connect_timeout,
"sock_read_timeout": self.sock_read_timeout,
"headers": self.headers,
"rate_limit": self.rate_limit,
"pulp_domain_id": self.pulp_domain.pk,
}


class ManifestSigningService(SigningService):
"""
Expand Down
37 changes: 14 additions & 23 deletions pulp_container/app/registry_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
from pulp_container.app.utils import (
determine_media_type,
extract_data_from_signature,
filter_resource,
has_task_completed,
validate_manifest,
)
Expand All @@ -96,16 +97,6 @@

log = logging.getLogger(__name__)

IGNORED_PULL_THROUGH_REMOTE_ATTRIBUTES = [
"remote_ptr_id",
"pulp_type",
"pulp_last_updated",
"pulp_created",
"pulp_id",
"url",
"name",
]


class ContentRenderer(BaseRenderer):
"""
Expand Down Expand Up @@ -309,18 +300,27 @@ def get_pull_through_drv(self, path):
if not pull_through_cache_distribution:
raise RepositoryNotFound(name=path)

upstream_name = path.split(pull_through_cache_distribution.base_path, maxsplit=1)[1].strip(
"/"
)
pull_through_remote = models.ContainerPullThroughRemote.objects.get(
pk=pull_through_cache_distribution.remote_id
)
if not filter_resource(
upstream_name, pull_through_remote.includes, pull_through_remote.excludes
):
raise RepositoryNotFound(name=path)

try:
with transaction.atomic():
repository, _ = models.ContainerRepository.objects.get_or_create(
name=path, retain_repo_versions=1
)

remote_data = _get_pull_through_remote_data(pull_through_cache_distribution)
upstream_name = path.split(pull_through_cache_distribution.base_path, maxsplit=1)[1]
remote_data = pull_through_remote.model_to_dict()
remote, _ = models.ContainerRemote.objects.get_or_create(
name=path,
upstream_name=upstream_name.strip("/"),
url=pull_through_cache_distribution.remote.url,
upstream_name=upstream_name,
**remote_data,
)

Expand Down Expand Up @@ -389,15 +389,6 @@ def create_dr(self, path, request):
return distribution, repository


def _get_pull_through_remote_data(root_cache_distribution):
remote_data = models.ContainerPullThroughRemote.objects.filter(
pk=root_cache_distribution.remote_id
).values()[0]
for attr in IGNORED_PULL_THROUGH_REMOTE_ATTRIBUTES:
remote_data.pop(attr, None)
return remote_data


class BearerTokenView(APIView):
"""
Hand out anonymous or authenticated bearer tokens.
Expand Down
26 changes: 25 additions & 1 deletion pulp_container/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,9 +307,33 @@ class ContainerPullThroughRemoteSerializer(RemoteSerializer):
"""

policy = serializers.ChoiceField(choices=[Remote.ON_DEMAND], default=Remote.ON_DEMAND)
includes = serializers.ListField(
child=serializers.CharField(max_length=255),
allow_null=True,
required=False,
help_text=_(
"""
A list of remotes to include during pull-through caching.
Wildcards *, ? are recognized.
'includes' is evaluated before 'excludes'.
"""
),
)
excludes = serializers.ListField(
child=serializers.CharField(max_length=255),
allow_null=True,
required=False,
help_text=_(
"""
A list of remotes to exclude during pull-through caching.
Wildcards *, ? are recognized.
'excludes' is evaluated after 'includes'.
"""
),
)

class Meta:
fields = RemoteSerializer.Meta.fields
fields = RemoteSerializer.Meta.fields + ("includes", "excludes")
model = models.ContainerPullThroughRemote


Expand Down
28 changes: 4 additions & 24 deletions pulp_container/app/tasks/sync_stages.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import aiohttp
import asyncio
import base64
import fnmatch
import hashlib
import json
import logging
Expand Down Expand Up @@ -34,6 +33,7 @@
determine_media_type,
validate_manifest,
calculate_digest,
filter_resources,
get_content_data,
)

Expand Down Expand Up @@ -118,7 +118,9 @@ async def run(self):
repo_name = self.remote.namespaced_upstream_name
tag_list_url = "/v2/{name}/tags/list".format(name=repo_name)
tag_list = await self.get_paginated_tag_list(tag_list_url, repo_name)
tag_list = self.filter_tags(tag_list)
tag_list = filter_resources(
tag_list, self.remote.include_tags, self.remote.exclude_tags
)
await pb.aincrement()

for tag_name in tag_list:
Expand Down Expand Up @@ -303,28 +305,6 @@ async def resolve_flush(self):
await self.put(signature_dc)
self.signature_dcs.clear()

def filter_tags(self, tag_list):
"""
Filter tags by a list of included and excluded tags.
"""
include_tags = self.remote.include_tags
if include_tags:
tag_list = [
tag
for tag in tag_list
if any(fnmatch.fnmatch(tag, pattern) for pattern in include_tags)
]

exclude_tags = self.remote.exclude_tags
if exclude_tags:
tag_list = [
tag
for tag in tag_list
if not any(fnmatch.fnmatch(tag, pattern) for pattern in exclude_tags)
]

return tag_list

async def get_paginated_tag_list(self, rel_link, repo_name):
"""
Handle registries that have pagination enabled.
Expand Down
43 changes: 42 additions & 1 deletion pulp_container/app/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import base64
import hashlib
import fnmatch
import re
import subprocess
import gnupg
Expand All @@ -11,11 +12,15 @@
from jsonschema import Draft7Validator, validate, ValidationError
from django.core.files.storage import default_storage as storage
from django.db import IntegrityError
from functools import partial
from rest_framework.exceptions import Throttled

from pulpcore.plugin.models import Artifact, Task

from pulp_container.constants import MANIFEST_MEDIA_TYPES, MEDIA_TYPE
from pulp_container.constants import (
MANIFEST_MEDIA_TYPES,
MEDIA_TYPE,
)
from pulp_container.app.exceptions import ManifestInvalid
from pulp_container.app.json_schemas import (
OCI_INDEX_SCHEMA,
Expand Down Expand Up @@ -309,3 +314,39 @@ def get_content_data(saved_artifact):
raw_data = file.read()
content_data = json.loads(raw_data)
return content_data, raw_data


def include(x, patterns):
"""
Checks if any item from `patterns` matches x, meaning it should be included as a remote repo.
"""
return any(fnmatch.fnmatch(x, pattern) for pattern in patterns)


def exclude(x, patterns):
"""
Checks if any item from `patterns` matches x, meaning it should not be considered a remote repo.
"""
return not include(x, patterns)


def filter_resource(element, include_patterns, exclude_patterns):
"""
Verify if the repository should have access to the remote upstream based on the include_patterns
and exclude_patterns filters.
"""
if not (include_patterns or exclude_patterns):
return True
return include(element, include_patterns or []) and exclude(element, exclude_patterns or [])


def filter_resources(element_list, include_patterns, exclude_patterns):
"""
Returns a list of elements (tags or repositories) allowed to be pulled/synced based on
include_patterns and exclude_patterns filters.
"""
if include_patterns:
element_list = filter(partial(include, patterns=include_patterns), element_list)
if exclude_patterns:
element_list = filter(partial(exclude, patterns=exclude_patterns), element_list)
return list(element_list)
20 changes: 2 additions & 18 deletions pulp_container/tests/functional/api/test_pull_through_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,11 @@

from pulp_container.tests.functional.constants import (
REGISTRY_V2,
REGISTRY_V2_FEED_URL,
PULP_HELLO_WORLD_REPO,
PULP_FIXTURE_1,
)


@pytest.fixture
def pull_through_distribution(
gen_object_with_cleanup,
container_pull_through_remote_api,
container_pull_through_distribution_api,
):
remote = gen_object_with_cleanup(
container_pull_through_remote_api,
{"name": str(uuid4()), "url": REGISTRY_V2_FEED_URL},
)
distribution = gen_object_with_cleanup(
container_pull_through_distribution_api,
{"name": str(uuid4()), "base_path": str(uuid4()), "remote": remote.pulp_href},
)
return distribution


@pytest.fixture
def pull_and_verify(
add_to_cleanup,
Expand All @@ -42,6 +24,7 @@ def pull_and_verify(
):
def _pull_and_verify(images, pull_through_distribution):
tags_to_verify = []
pull_through_distribution = pull_through_distribution()
for version, image_path in enumerate(images, start=1):
remote_image_path = f"{REGISTRY_V2}/{image_path}"
local_image_path = f"{pull_through_distribution.base_path}/{image_path}"
Expand Down Expand Up @@ -113,6 +96,7 @@ def test_conflicting_names_and_paths(
local_registry,
monitor_task,
):
pull_through_distribution = pull_through_distribution()
local_image_path = f"{pull_through_distribution.base_path}/{str(uuid4())}"

remote = container_remote_factory(name=local_image_path)
Expand Down
Loading

0 comments on commit 993fe47

Please sign in to comment.