Skip to content

Commit

Permalink
refact(discovery): introduce container class for discovery scope
Browse files Browse the repository at this point in the history
  • Loading branch information
davidlougheed committed Sep 3, 2024
1 parent f2cb8d5 commit 9e03bc4
Show file tree
Hide file tree
Showing 14 changed files with 390 additions and 200 deletions.
4 changes: 2 additions & 2 deletions chord_metadata_service/chord/tests/test_api_bento_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_dataset_data_type_summary(self):

r = self.dt_authz_full_get(
reverse("chord-dataset-data-type-summary", kwargs={"dataset_id": "not-a-uuid"}))
self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)

def _dataset_data_type_url(self, dt: str, ds_id: str = ""):
return reverse("chord-dataset-data-type", kwargs={
Expand Down Expand Up @@ -121,7 +121,7 @@ def test_get_dataset_data_type(self):
def test_get_dataset_data_type_dne(self):
subtest_params = [
(self._dataset_data_type_url(DATA_TYPE_PHENOPACKET, str(uuid.uuid4())), status.HTTP_404_NOT_FOUND),
(self._dataset_data_type_url(DATA_TYPE_PHENOPACKET, "not-a-uuid"), status.HTTP_400_BAD_REQUEST),
(self._dataset_data_type_url(DATA_TYPE_PHENOPACKET, "not-a-uuid"), status.HTTP_404_NOT_FOUND),
(self._dataset_data_type_url("does-not-exist"), status.HTTP_404_NOT_FOUND),
]

Expand Down
56 changes: 40 additions & 16 deletions chord_metadata_service/chord/tests/test_api_data_types.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import uuid

from django.test import override_settings
from django.urls import reverse
from rest_framework import status

from chord_metadata_service.authz.tests.helpers import AuthzAPITestCase, PermissionsTestCaseMixin
from chord_metadata_service.discovery.tests.constants import DISCOVERY_CONFIG_TEST
from chord_metadata_service.discovery.utils import get_discovery_scope
from chord_metadata_service.phenopackets.tests.helpers import PhenoTestCase

from ..data_types import DATA_TYPE_EXPERIMENT, DATA_TYPE_PHENOPACKET, DATA_TYPES
from ..views_data_types import get_count_for_data_type
from ...discovery.exceptions import DiscoveryScopeException

POST_GET = ("POST", "GET")

Expand All @@ -17,38 +20,59 @@

class DataTypeHelperTest(PhenoTestCase, PermissionsTestCaseMixin):
@staticmethod
async def get_count_for_phenopackets(permissions, project=None, dataset=None):
return await get_count_for_data_type(
DATA_TYPE_PHENOPACKET, project, dataset, DISCOVERY_CONFIG_TEST, permissions
)
async def get_count_for_phenopackets(permissions, project: str | None = None, dataset: str | None = None):
scope = await get_discovery_scope(project, dataset)
return await get_count_for_data_type(DATA_TYPE_PHENOPACKET, scope, permissions)

@override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST)
async def test_data_type_count(self):
self.assertEqual(await self.get_count_for_phenopackets(self.permissions_full), 1)

@override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST)
async def test_data_type_count_censored(self):
self.assertEqual(await self.get_count_for_phenopackets(self.permissions_counts), 0) # censored
self.assertEqual(await self.get_count_for_phenopackets(self.permissions_bool), 0) # censored
self.assertEqual(await self.get_count_for_phenopackets(self.permissions_none), 0) # censored

@override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST)
async def test_data_type_count_bad_project_id(self):
with self.assertRaises(ValueError):
with self.assertRaises(DiscoveryScopeException):
await self.get_count_for_phenopackets(self.permissions_full, project="not-uuid")

with self.assertRaises(ValueError):
@override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST)
async def test_data_type_count_bad_project_id_with_dataset_id(self):
with self.assertRaises(DiscoveryScopeException):
await self.get_count_for_phenopackets(self.permissions_full, project="not-uuid", dataset=str(uuid.uuid4()))

@override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST)
async def test_data_type_count_bad_dataset_id(self):
with self.assertRaises(ValueError):
await self.get_count_for_phenopackets(self.permissions_full, project=str(uuid.uuid4()), dataset="not-uuid")
with self.assertRaises(DiscoveryScopeException):
await self.get_count_for_phenopackets(
self.permissions_full, project=str(self.project.identifier), dataset="not-uuid"
)

with self.assertRaises(ValueError):
@override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST)
async def test_data_type_count_project_dne(self):
with self.assertRaises(DiscoveryScopeException):
await self.get_count_for_phenopackets(self.permissions_full, project=str(uuid.uuid4()))

@override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST)
async def test_data_type_count_dataset_dne(self):
with self.assertRaises(DiscoveryScopeException):
await self.get_count_for_phenopackets(
self.permissions_full, project=str(self.project.identifier), dataset=str(uuid.uuid4())
)

@override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST)
async def test_data_type_count_bad_dataset_id_2(self): # TODO: redo
with self.assertRaises(DiscoveryScopeException):
await self.get_count_for_phenopackets(self.permissions_full, project=None, dataset="not-uuid")

@override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST)
async def test_data_type_count_bad_data_type(self):
scope = await get_discovery_scope(None, None)
with self.assertRaises(ValueError):
await get_count_for_data_type(
DATA_TYPE_NOT_REAL, None, None, DISCOVERY_CONFIG_TEST, self.permissions_full
)
await get_count_for_data_type(DATA_TYPE_NOT_REAL, scope, self.permissions_full)


class DataTypeTest(AuthzAPITestCase, PermissionsTestCaseMixin):
Expand All @@ -64,12 +88,12 @@ def test_data_type_list(self):
def test_data_type_list_non_uuid_project(self):
# Non-UUID project
r = self.dt_authz_counts_get(reverse("data-type-list"), {"project": "a"})
self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)

def test_data_type_list_non_uuid_dataset(self):
# Non-UUID dataset
r = self.dt_authz_counts_get(reverse("data-type-list"), {"dataset": "a"})
self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)

def test_data_type_detail(self):
# counts permission
Expand Down Expand Up @@ -108,13 +132,13 @@ def test_data_type_detail_non_uuid_project(self):
# Non-UUID project
r = self.dt_authz_counts_get(
reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_PHENOPACKET}), {"project": "a"})
self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)

def test_data_type_detail_non_uuid_dataset(self):
# Non-UUID dataset
r = self.dt_authz_counts_get(
reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_PHENOPACKET}), {"dataset": "a"})
self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)

def test_data_type_detail_404(self):
r = self.dt_authz_counts_get(reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_NOT_REAL}))
Expand Down
97 changes: 43 additions & 54 deletions chord_metadata_service/chord/views_data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,10 @@
from chord_metadata_service.authz.permissions import BentoAllowAny, BentoDeferToHandler
from chord_metadata_service.authz.types import DataPermissionsDict
from chord_metadata_service.discovery.censorship import thresholded_count
from chord_metadata_service.discovery.types import DiscoveryConfig
from chord_metadata_service.discovery.utils import (
get_discovery,
get_project_id_and_dataset_id_from_request,
get_request_discovery,
get_discovery_data_type_permissions,
ValidatedDiscoveryScope,
get_request_discovery_scope,
)
from chord_metadata_service.chord.models import Dataset, Project
from chord_metadata_service.cleanup import run_all_cleanup
Expand All @@ -32,6 +30,7 @@
from chord_metadata_service.phenopackets.models import Phenopacket

from . import data_types as dt
from ..discovery.exceptions import DiscoveryScopeException

QUERYSET_FN: dict[str, Callable] = {
dt.DATA_TYPE_EXPERIMENT: lambda dataset_id: Experiment.objects.filter(dataset_id=dataset_id),
Expand Down Expand Up @@ -67,17 +66,15 @@ async def _filtered_query(data_type: str, project: str | None = None, dataset: s

async def get_count_for_data_type(
data_type: str,
project: str | None,
dataset: str | None,
discovery: DiscoveryConfig,
scope: ValidatedDiscoveryScope,
permissions: DataPermissionsDict,
) -> int:
"""
Returns the count for a particular data type. If dataset is provided, project will be ignored. If neither are
provided, the count will be for the whole node.
"""
q = await _filtered_query(data_type, project, dataset)
return thresholded_count(await q.acount(), discovery, permissions)
q = await _filtered_query(data_type, scope.project_id, scope.dataset_id)
return thresholded_count(await q.acount(), await scope.get_discovery(), permissions)


async def get_last_ingested_for_data_type(data_type: str, project: str | None = None,
Expand All @@ -95,19 +92,17 @@ async def get_last_ingested_for_data_type(data_type: str, project: str | None =
async def make_data_type_response_object(
data_type_id: str,
data_type_details: dict,
project: str | None,
dataset: str | None,
discovery: DiscoveryConfig,
scope: ValidatedDiscoveryScope,
permissions: DataPermissionsDict,
) -> dict:
return {
**data_type_details,
"id": data_type_id,
**(
{"count": await get_count_for_data_type(data_type_id, project, dataset, discovery, permissions)}
{"count": await get_count_for_data_type(data_type_id, scope, permissions)}
if permissions["counts"] else {}
),
"last_ingested": await get_last_ingested_for_data_type(data_type_id, project, dataset)
"last_ingested": await get_last_ingested_for_data_type(data_type_id, scope.project_id, scope.dataset_id)
}


Expand All @@ -116,22 +111,20 @@ async def make_data_type_response_object(
async def data_type_list(request: DrfRequest):
# TODO: Permissions: only return counts when we are authenticated/have access to counts or full data.

project_id, dataset_id = get_project_id_and_dataset_id_from_request(request)

try:
discovery, dt_permissions = await asyncio.gather(
get_request_discovery(request), get_discovery_data_type_permissions(request)
)
except ValidationError as e:
# UUID most likely - used to be handled in get_count_for_data_type inside make_data_type_response_object, but
# now triggered earlier by discovery scoping
return Response(errors.bad_request_error(str(e)), status=status.HTTP_400_BAD_REQUEST)
discovery_scope = await get_request_discovery_scope(request)
except DiscoveryScopeException as e:
# Does not exist, or a UUID validation error - used to be triggered later but scope validation does some of the
# Django validation for us.
return Response(e.message, status=status.HTTP_404_NOT_FOUND)

discovery, dt_permissions = await asyncio.gather(
discovery_scope.get_discovery(), get_discovery_data_type_permissions(request, discovery_scope)
)

dt_response: list[dict] = list(
await asyncio.gather(*(
make_data_type_response_object(
dt_id, dt_d, project_id, dataset_id, discovery, dt_permissions[dt_id]
)
make_data_type_response_object(dt_id, dt_d, discovery_scope, dt_permissions[dt_id])
for dt_id, dt_d in dt.DATA_TYPES.items()
))
)
Expand All @@ -152,21 +145,21 @@ async def data_type_detail(request: DrfRequest, data_type: str):
if data_type not in dt.DATA_TYPES:
return Response(errors.not_found_error(f"Data type {data_type} not found"), status=status.HTTP_404_NOT_FOUND)

project_id, dataset_id = get_project_id_and_dataset_id_from_request(request)

try:
# TODO: just get the one data type
discovery, dt_permissions = await asyncio.gather(
get_request_discovery(request), get_discovery_data_type_permissions(request)
)
except ValidationError as e:
# UUID most likely - used to be handled in get_count_for_data_type inside make_data_type_response_object, but
# now triggered earlier by discovery scoping
return bad_request_from_exc(e)
discovery_scope = await get_request_discovery_scope(request)
except DiscoveryScopeException as e:
# Does not exist, or a UUID validation error - used to be triggered later but scope validation does some of the
# Django validation for us.
return Response(e.message, status=status.HTTP_404_NOT_FOUND)

# TODO: just get the one data type
discovery, dt_permissions = await asyncio.gather(
discovery_scope.get_discovery(), get_discovery_data_type_permissions(request, discovery_scope)
)

return Response(
await make_data_type_response_object(
data_type, dt.DATA_TYPES[data_type], project_id, dataset_id, discovery, dt_permissions[data_type]
data_type, dt.DATA_TYPES[data_type], discovery_scope, dt_permissions[data_type]
)
)

Expand Down Expand Up @@ -195,10 +188,8 @@ async def data_type_metadata_schema(_request: DrfRequest, data_type: str):
async def dataset_data_type(request: DrfRequest, dataset_id: str, data_type: str):
try:
dataset = await Dataset.objects.aget(identifier=dataset_id)
except Dataset.DoesNotExist as e:
except (Dataset.DoesNotExist, ValidationError) as e:
return Response(errors.not_found_error(str(e)), status=status.HTTP_404_NOT_FOUND)
except ValidationError as e:
return bad_request_from_exc(e)

project = await Project.objects.aget(datasets=dataset)
project_id = str(project.identifier)
Expand Down Expand Up @@ -227,15 +218,16 @@ async def dataset_data_type(request: DrfRequest, dataset_id: str, data_type: str

return Response(status=status.HTTP_204_NO_CONTENT)

discovery = await get_discovery(project_id, dataset_id)
dt_permissions = await get_discovery_data_type_permissions(request, project_id, dataset_id)
# we've already validated that the project and dataset exist above, so we are allowed to directly build a validated
# discovery scope.
discovery_scope = ValidatedDiscoveryScope(project, dataset)

dt_permissions = await get_discovery_data_type_permissions(request, discovery_scope)

response_object = await make_data_type_response_object(
data_type,
dt.DATA_TYPES[data_type],
project=str(project.identifier),
dataset=dataset_id,
discovery=discovery,
discovery_scope,
permissions=dt_permissions[data_type],
)

Expand All @@ -248,20 +240,17 @@ async def dataset_data_type(request: DrfRequest, dataset_id: str, data_type: str
async def dataset_data_type_summary(request: DrfRequest, dataset_id: str):
try:
dataset = await Dataset.objects.aget(identifier=dataset_id)
except Dataset.DoesNotExist as e:
except (Dataset.DoesNotExist, ValidationError) as e:
return Response(errors.not_found_error(str(e)), status=status.HTTP_404_NOT_FOUND)
except ValidationError as e:
return bad_request_from_exc(e)

project = await Project.objects.aget(datasets=dataset)
project_id = str(project.identifier)

discovery = await get_discovery(project_id, dataset_id)
dt_permissions = await get_discovery_data_type_permissions(request, project_id, dataset_id)
# we've already validated that the project and dataset exist above, so we can build an instance of
# ValidatedDiscoveryScope directly.
discovery_scope = ValidatedDiscoveryScope(await Project.objects.aget(datasets=dataset), dataset)
dt_permissions = await get_discovery_data_type_permissions(request, discovery_scope)

dt_response = sorted(
await asyncio.gather(*(
make_data_type_response_object(dt_id, dt_d, project_id, dataset_id, discovery, dt_permissions[dt_id])
make_data_type_response_object(dt_id, dt_d, discovery_scope, dt_permissions[dt_id])
for dt_id, dt_d in dt.DATA_TYPES.items()
)),
key=lambda d: d["id"]
Expand Down
13 changes: 7 additions & 6 deletions chord_metadata_service/chord/views_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import logging

from adrf.decorators import api_view as async_api_view
from bento_lib.auth.resources import build_resource
from bento_lib.responses import errors
from bento_lib.search import build_search_response, postgres

Expand All @@ -28,7 +27,7 @@
from chord_metadata_service.authz.types import DataPermissionsDict

from chord_metadata_service.discovery.types import DiscoveryConfig
from chord_metadata_service.discovery.utils import get_request_discovery
from chord_metadata_service.discovery.utils import ValidatedDiscoveryScope

from chord_metadata_service.experiments.api_views import EXPERIMENT_SELECT_REL, EXPERIMENT_PREFETCH
from chord_metadata_service.experiments.models import Experiment
Expand All @@ -46,8 +45,7 @@
from chord_metadata_service.restapi.utils import build_experiments_by_subject, get_biosamples_with_experiment_details

from .data_types import DATA_TYPE_EXPERIMENT, DATA_TYPE_PHENOPACKET, DATA_TYPES
from .models import Dataset

from .models import Dataset, Project

OUTPUT_FORMAT_VALUES_LIST = "values_list"
OUTPUT_FORMAT_BENTO_SEARCH_RESULT = "bento_search_result"
Expand Down Expand Up @@ -531,13 +529,16 @@ def private_dataset_search(request: DrfRequest, dataset_id: str):
@permission_classes([BentoAllowAny])
async def dataset_summary(request: DrfRequest, dataset_id: str):
dataset = await Dataset.objects.aget(identifier=dataset_id)
discovery = await get_request_discovery(request)
project = await Project.objects.aget(identifier=dataset.project_id)
# don't use request scope - the project/dataset are validated by the aget calls above and fixed
discovery_scope = ValidatedDiscoveryScope(project, dataset)
dt_permissions = await get_data_type_query_permissions(
request,
data_types=list(DATASET_DATA_TYPE_SUMMARY_FUNCTIONS.keys()),
resource=build_resource(str(dataset.project_id), dataset_id),
resource=discovery_scope.as_authz_resource(),
)

discovery = await discovery_scope.get_discovery()
summaries = await asyncio.gather(
*map(lambda dt: DATASET_DATA_TYPE_SUMMARY_FUNCTIONS[dt](discovery, dataset, dt_permissions[dt]),
DATASET_DATA_TYPE_SUMMARY_FUNCTIONS.keys())
Expand Down
Loading

0 comments on commit 9e03bc4

Please sign in to comment.