From 9e03bc4e9b537a1cbc8d194796b6aeda4dd08740 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 3 Sep 2024 16:39:46 -0400 Subject: [PATCH] refact(discovery): introduce container class for discovery scope --- .../chord/tests/test_api_bento_datasets.py | 4 +- .../chord/tests/test_api_data_types.py | 56 ++++-- .../chord/views_data_types.py | 97 ++++----- chord_metadata_service/chord/views_search.py | 13 +- chord_metadata_service/discovery/api_views.py | 36 ++-- .../discovery/censorship.py | 15 +- .../discovery/exceptions.py | 8 +- .../discovery/tests/test_api.py | 63 ++++-- .../discovery/tests/test_discovery_utils.py | 49 +++++ chord_metadata_service/discovery/types.py | 5 + chord_metadata_service/discovery/utils.py | 189 ++++++++++++------ chord_metadata_service/metadata/settings.py | 2 + chord_metadata_service/patients/api_views.py | 40 ++-- chord_metadata_service/restapi/api_views.py | 13 +- 14 files changed, 390 insertions(+), 200 deletions(-) create mode 100644 chord_metadata_service/discovery/tests/test_discovery_utils.py diff --git a/chord_metadata_service/chord/tests/test_api_bento_datasets.py b/chord_metadata_service/chord/tests/test_api_bento_datasets.py index 376c0bc8d..f6f13d308 100644 --- a/chord_metadata_service/chord/tests/test_api_bento_datasets.py +++ b/chord_metadata_service/chord/tests/test_api_bento_datasets.py @@ -68,7 +68,7 @@ def test_dataset_data_type_summary(self): r = self.dt_authz_full_get( reverse("chord-dataset-data-type-summary", kwargs={"dataset_id": "not-a-uuid"})) - self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND) def _dataset_data_type_url(self, dt: str, ds_id: str = ""): return reverse("chord-dataset-data-type", kwargs={ @@ -121,7 +121,7 @@ def test_get_dataset_data_type(self): def test_get_dataset_data_type_dne(self): subtest_params = [ (self._dataset_data_type_url(DATA_TYPE_PHENOPACKET, str(uuid.uuid4())), status.HTTP_404_NOT_FOUND), - (self._dataset_data_type_url(DATA_TYPE_PHENOPACKET, "not-a-uuid"), status.HTTP_400_BAD_REQUEST), + (self._dataset_data_type_url(DATA_TYPE_PHENOPACKET, "not-a-uuid"), status.HTTP_404_NOT_FOUND), (self._dataset_data_type_url("does-not-exist"), status.HTTP_404_NOT_FOUND), ] diff --git a/chord_metadata_service/chord/tests/test_api_data_types.py b/chord_metadata_service/chord/tests/test_api_data_types.py index 7eb3d0338..31799c669 100644 --- a/chord_metadata_service/chord/tests/test_api_data_types.py +++ b/chord_metadata_service/chord/tests/test_api_data_types.py @@ -1,14 +1,17 @@ import uuid +from django.test import override_settings from django.urls import reverse from rest_framework import status from chord_metadata_service.authz.tests.helpers import AuthzAPITestCase, PermissionsTestCaseMixin from chord_metadata_service.discovery.tests.constants import DISCOVERY_CONFIG_TEST +from chord_metadata_service.discovery.utils import get_discovery_scope from chord_metadata_service.phenopackets.tests.helpers import PhenoTestCase from ..data_types import DATA_TYPE_EXPERIMENT, DATA_TYPE_PHENOPACKET, DATA_TYPES from ..views_data_types import get_count_for_data_type +from ...discovery.exceptions import DiscoveryScopeException POST_GET = ("POST", "GET") @@ -17,38 +20,59 @@ class DataTypeHelperTest(PhenoTestCase, PermissionsTestCaseMixin): @staticmethod - async def get_count_for_phenopackets(permissions, project=None, dataset=None): - return await get_count_for_data_type( - DATA_TYPE_PHENOPACKET, project, dataset, DISCOVERY_CONFIG_TEST, permissions - ) + async def get_count_for_phenopackets(permissions, project: str | None = None, dataset: str | None = None): + scope = await get_discovery_scope(project, dataset) + return await get_count_for_data_type(DATA_TYPE_PHENOPACKET, scope, permissions) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) async def test_data_type_count(self): self.assertEqual(await self.get_count_for_phenopackets(self.permissions_full), 1) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) async def test_data_type_count_censored(self): self.assertEqual(await self.get_count_for_phenopackets(self.permissions_counts), 0) # censored self.assertEqual(await self.get_count_for_phenopackets(self.permissions_bool), 0) # censored self.assertEqual(await self.get_count_for_phenopackets(self.permissions_none), 0) # censored + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) async def test_data_type_count_bad_project_id(self): - with self.assertRaises(ValueError): + with self.assertRaises(DiscoveryScopeException): await self.get_count_for_phenopackets(self.permissions_full, project="not-uuid") - with self.assertRaises(ValueError): + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) + async def test_data_type_count_bad_project_id_with_dataset_id(self): + with self.assertRaises(DiscoveryScopeException): await self.get_count_for_phenopackets(self.permissions_full, project="not-uuid", dataset=str(uuid.uuid4())) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) async def test_data_type_count_bad_dataset_id(self): - with self.assertRaises(ValueError): - await self.get_count_for_phenopackets(self.permissions_full, project=str(uuid.uuid4()), dataset="not-uuid") + with self.assertRaises(DiscoveryScopeException): + await self.get_count_for_phenopackets( + self.permissions_full, project=str(self.project.identifier), dataset="not-uuid" + ) - with self.assertRaises(ValueError): + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) + async def test_data_type_count_project_dne(self): + with self.assertRaises(DiscoveryScopeException): + await self.get_count_for_phenopackets(self.permissions_full, project=str(uuid.uuid4())) + + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) + async def test_data_type_count_dataset_dne(self): + with self.assertRaises(DiscoveryScopeException): + await self.get_count_for_phenopackets( + self.permissions_full, project=str(self.project.identifier), dataset=str(uuid.uuid4()) + ) + + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) + async def test_data_type_count_bad_dataset_id_2(self): # TODO: redo + with self.assertRaises(DiscoveryScopeException): await self.get_count_for_phenopackets(self.permissions_full, project=None, dataset="not-uuid") + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) async def test_data_type_count_bad_data_type(self): + scope = await get_discovery_scope(None, None) with self.assertRaises(ValueError): - await get_count_for_data_type( - DATA_TYPE_NOT_REAL, None, None, DISCOVERY_CONFIG_TEST, self.permissions_full - ) + await get_count_for_data_type(DATA_TYPE_NOT_REAL, scope, self.permissions_full) class DataTypeTest(AuthzAPITestCase, PermissionsTestCaseMixin): @@ -64,12 +88,12 @@ def test_data_type_list(self): def test_data_type_list_non_uuid_project(self): # Non-UUID project r = self.dt_authz_counts_get(reverse("data-type-list"), {"project": "a"}) - self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND) def test_data_type_list_non_uuid_dataset(self): # Non-UUID dataset r = self.dt_authz_counts_get(reverse("data-type-list"), {"dataset": "a"}) - self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND) def test_data_type_detail(self): # counts permission @@ -108,13 +132,13 @@ def test_data_type_detail_non_uuid_project(self): # Non-UUID project r = self.dt_authz_counts_get( reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_PHENOPACKET}), {"project": "a"}) - self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND) def test_data_type_detail_non_uuid_dataset(self): # Non-UUID dataset r = self.dt_authz_counts_get( reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_PHENOPACKET}), {"dataset": "a"}) - self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND) def test_data_type_detail_404(self): r = self.dt_authz_counts_get(reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_NOT_REAL})) diff --git a/chord_metadata_service/chord/views_data_types.py b/chord_metadata_service/chord/views_data_types.py index b90e9ac48..9dfeae70a 100644 --- a/chord_metadata_service/chord/views_data_types.py +++ b/chord_metadata_service/chord/views_data_types.py @@ -18,12 +18,10 @@ from chord_metadata_service.authz.permissions import BentoAllowAny, BentoDeferToHandler from chord_metadata_service.authz.types import DataPermissionsDict from chord_metadata_service.discovery.censorship import thresholded_count -from chord_metadata_service.discovery.types import DiscoveryConfig from chord_metadata_service.discovery.utils import ( - get_discovery, - get_project_id_and_dataset_id_from_request, - get_request_discovery, get_discovery_data_type_permissions, + ValidatedDiscoveryScope, + get_request_discovery_scope, ) from chord_metadata_service.chord.models import Dataset, Project from chord_metadata_service.cleanup import run_all_cleanup @@ -32,6 +30,7 @@ from chord_metadata_service.phenopackets.models import Phenopacket from . import data_types as dt +from ..discovery.exceptions import DiscoveryScopeException QUERYSET_FN: dict[str, Callable] = { dt.DATA_TYPE_EXPERIMENT: lambda dataset_id: Experiment.objects.filter(dataset_id=dataset_id), @@ -67,17 +66,15 @@ async def _filtered_query(data_type: str, project: str | None = None, dataset: s async def get_count_for_data_type( data_type: str, - project: str | None, - dataset: str | None, - discovery: DiscoveryConfig, + scope: ValidatedDiscoveryScope, permissions: DataPermissionsDict, ) -> int: """ Returns the count for a particular data type. If dataset is provided, project will be ignored. If neither are provided, the count will be for the whole node. """ - q = await _filtered_query(data_type, project, dataset) - return thresholded_count(await q.acount(), discovery, permissions) + q = await _filtered_query(data_type, scope.project_id, scope.dataset_id) + return thresholded_count(await q.acount(), await scope.get_discovery(), permissions) async def get_last_ingested_for_data_type(data_type: str, project: str | None = None, @@ -95,19 +92,17 @@ async def get_last_ingested_for_data_type(data_type: str, project: str | None = async def make_data_type_response_object( data_type_id: str, data_type_details: dict, - project: str | None, - dataset: str | None, - discovery: DiscoveryConfig, + scope: ValidatedDiscoveryScope, permissions: DataPermissionsDict, ) -> dict: return { **data_type_details, "id": data_type_id, **( - {"count": await get_count_for_data_type(data_type_id, project, dataset, discovery, permissions)} + {"count": await get_count_for_data_type(data_type_id, scope, permissions)} if permissions["counts"] else {} ), - "last_ingested": await get_last_ingested_for_data_type(data_type_id, project, dataset) + "last_ingested": await get_last_ingested_for_data_type(data_type_id, scope.project_id, scope.dataset_id) } @@ -116,22 +111,20 @@ async def make_data_type_response_object( async def data_type_list(request: DrfRequest): # TODO: Permissions: only return counts when we are authenticated/have access to counts or full data. - project_id, dataset_id = get_project_id_and_dataset_id_from_request(request) - try: - discovery, dt_permissions = await asyncio.gather( - get_request_discovery(request), get_discovery_data_type_permissions(request) - ) - except ValidationError as e: - # UUID most likely - used to be handled in get_count_for_data_type inside make_data_type_response_object, but - # now triggered earlier by discovery scoping - return Response(errors.bad_request_error(str(e)), status=status.HTTP_400_BAD_REQUEST) + discovery_scope = await get_request_discovery_scope(request) + except DiscoveryScopeException as e: + # Does not exist, or a UUID validation error - used to be triggered later but scope validation does some of the + # Django validation for us. + return Response(e.message, status=status.HTTP_404_NOT_FOUND) + + discovery, dt_permissions = await asyncio.gather( + discovery_scope.get_discovery(), get_discovery_data_type_permissions(request, discovery_scope) + ) dt_response: list[dict] = list( await asyncio.gather(*( - make_data_type_response_object( - dt_id, dt_d, project_id, dataset_id, discovery, dt_permissions[dt_id] - ) + make_data_type_response_object(dt_id, dt_d, discovery_scope, dt_permissions[dt_id]) for dt_id, dt_d in dt.DATA_TYPES.items() )) ) @@ -152,21 +145,21 @@ async def data_type_detail(request: DrfRequest, data_type: str): if data_type not in dt.DATA_TYPES: return Response(errors.not_found_error(f"Data type {data_type} not found"), status=status.HTTP_404_NOT_FOUND) - project_id, dataset_id = get_project_id_and_dataset_id_from_request(request) - try: - # TODO: just get the one data type - discovery, dt_permissions = await asyncio.gather( - get_request_discovery(request), get_discovery_data_type_permissions(request) - ) - except ValidationError as e: - # UUID most likely - used to be handled in get_count_for_data_type inside make_data_type_response_object, but - # now triggered earlier by discovery scoping - return bad_request_from_exc(e) + discovery_scope = await get_request_discovery_scope(request) + except DiscoveryScopeException as e: + # Does not exist, or a UUID validation error - used to be triggered later but scope validation does some of the + # Django validation for us. + return Response(e.message, status=status.HTTP_404_NOT_FOUND) + + # TODO: just get the one data type + discovery, dt_permissions = await asyncio.gather( + discovery_scope.get_discovery(), get_discovery_data_type_permissions(request, discovery_scope) + ) return Response( await make_data_type_response_object( - data_type, dt.DATA_TYPES[data_type], project_id, dataset_id, discovery, dt_permissions[data_type] + data_type, dt.DATA_TYPES[data_type], discovery_scope, dt_permissions[data_type] ) ) @@ -195,10 +188,8 @@ async def data_type_metadata_schema(_request: DrfRequest, data_type: str): async def dataset_data_type(request: DrfRequest, dataset_id: str, data_type: str): try: dataset = await Dataset.objects.aget(identifier=dataset_id) - except Dataset.DoesNotExist as e: + except (Dataset.DoesNotExist, ValidationError) as e: return Response(errors.not_found_error(str(e)), status=status.HTTP_404_NOT_FOUND) - except ValidationError as e: - return bad_request_from_exc(e) project = await Project.objects.aget(datasets=dataset) project_id = str(project.identifier) @@ -227,15 +218,16 @@ async def dataset_data_type(request: DrfRequest, dataset_id: str, data_type: str return Response(status=status.HTTP_204_NO_CONTENT) - discovery = await get_discovery(project_id, dataset_id) - dt_permissions = await get_discovery_data_type_permissions(request, project_id, dataset_id) + # we've already validated that the project and dataset exist above, so we are allowed to directly build a validated + # discovery scope. + discovery_scope = ValidatedDiscoveryScope(project, dataset) + + dt_permissions = await get_discovery_data_type_permissions(request, discovery_scope) response_object = await make_data_type_response_object( data_type, dt.DATA_TYPES[data_type], - project=str(project.identifier), - dataset=dataset_id, - discovery=discovery, + discovery_scope, permissions=dt_permissions[data_type], ) @@ -248,20 +240,17 @@ async def dataset_data_type(request: DrfRequest, dataset_id: str, data_type: str async def dataset_data_type_summary(request: DrfRequest, dataset_id: str): try: dataset = await Dataset.objects.aget(identifier=dataset_id) - except Dataset.DoesNotExist as e: + except (Dataset.DoesNotExist, ValidationError) as e: return Response(errors.not_found_error(str(e)), status=status.HTTP_404_NOT_FOUND) - except ValidationError as e: - return bad_request_from_exc(e) - - project = await Project.objects.aget(datasets=dataset) - project_id = str(project.identifier) - discovery = await get_discovery(project_id, dataset_id) - dt_permissions = await get_discovery_data_type_permissions(request, project_id, dataset_id) + # we've already validated that the project and dataset exist above, so we can build an instance of + # ValidatedDiscoveryScope directly. + discovery_scope = ValidatedDiscoveryScope(await Project.objects.aget(datasets=dataset), dataset) + dt_permissions = await get_discovery_data_type_permissions(request, discovery_scope) dt_response = sorted( await asyncio.gather(*( - make_data_type_response_object(dt_id, dt_d, project_id, dataset_id, discovery, dt_permissions[dt_id]) + make_data_type_response_object(dt_id, dt_d, discovery_scope, dt_permissions[dt_id]) for dt_id, dt_d in dt.DATA_TYPES.items() )), key=lambda d: d["id"] diff --git a/chord_metadata_service/chord/views_search.py b/chord_metadata_service/chord/views_search.py index 8cd7fa13a..44cc9d77c 100644 --- a/chord_metadata_service/chord/views_search.py +++ b/chord_metadata_service/chord/views_search.py @@ -4,7 +4,6 @@ import logging from adrf.decorators import api_view as async_api_view -from bento_lib.auth.resources import build_resource from bento_lib.responses import errors from bento_lib.search import build_search_response, postgres @@ -28,7 +27,7 @@ from chord_metadata_service.authz.types import DataPermissionsDict from chord_metadata_service.discovery.types import DiscoveryConfig -from chord_metadata_service.discovery.utils import get_request_discovery +from chord_metadata_service.discovery.utils import ValidatedDiscoveryScope from chord_metadata_service.experiments.api_views import EXPERIMENT_SELECT_REL, EXPERIMENT_PREFETCH from chord_metadata_service.experiments.models import Experiment @@ -46,8 +45,7 @@ from chord_metadata_service.restapi.utils import build_experiments_by_subject, get_biosamples_with_experiment_details from .data_types import DATA_TYPE_EXPERIMENT, DATA_TYPE_PHENOPACKET, DATA_TYPES -from .models import Dataset - +from .models import Dataset, Project OUTPUT_FORMAT_VALUES_LIST = "values_list" OUTPUT_FORMAT_BENTO_SEARCH_RESULT = "bento_search_result" @@ -531,13 +529,16 @@ def private_dataset_search(request: DrfRequest, dataset_id: str): @permission_classes([BentoAllowAny]) async def dataset_summary(request: DrfRequest, dataset_id: str): dataset = await Dataset.objects.aget(identifier=dataset_id) - discovery = await get_request_discovery(request) + project = await Project.objects.aget(identifier=dataset.project_id) + # don't use request scope - the project/dataset are validated by the aget calls above and fixed + discovery_scope = ValidatedDiscoveryScope(project, dataset) dt_permissions = await get_data_type_query_permissions( request, data_types=list(DATASET_DATA_TYPE_SUMMARY_FUNCTIONS.keys()), - resource=build_resource(str(dataset.project_id), dataset_id), + resource=discovery_scope.as_authz_resource(), ) + discovery = await discovery_scope.get_discovery() summaries = await asyncio.gather( *map(lambda dt: DATASET_DATA_TYPE_SUMMARY_FUNCTIONS[dt](discovery, dataset, dt_permissions[dt]), DATASET_DATA_TYPE_SUMMARY_FUNCTIONS.keys()) diff --git a/chord_metadata_service/discovery/api_views.py b/chord_metadata_service/discovery/api_views.py index 9eb59d0da..437cacc7d 100644 --- a/chord_metadata_service/discovery/api_views.py +++ b/chord_metadata_service/discovery/api_views.py @@ -10,8 +10,8 @@ from rest_framework.request import Request as DrfRequest from rest_framework.response import Response -from chord_metadata_service.discovery.exceptions import DiscoveryConfigException -from chord_metadata_service.discovery.utils import get_request_discovery +from chord_metadata_service.discovery.exceptions import DiscoveryScopeException +from chord_metadata_service.discovery.utils import get_request_discovery_scope from ..authz.permissions import BentoAllowAny from ..chord import data_types as dts, models as cm @@ -29,11 +29,7 @@ from .censorship import get_rules from .schemas import DISCOVERY_SCHEMA from .types import BinWithValue -from .utils import ( - get_project_id_and_dataset_id_from_request, - get_discovery_data_type_permissions, - get_discovery_field_set_permissions, -) +from .utils import get_discovery_data_type_permissions, get_discovery_field_set_permissions @extend_schema( @@ -58,16 +54,18 @@ async def public_search_fields(request: DrfRequest): """ try: - discovery = await get_request_discovery(request) - except DiscoveryConfigException as e: + discovery_scope = await get_request_discovery_scope(request) + except DiscoveryScopeException as e: return Response(e.message, status=status.HTTP_404_NOT_FOUND) except ValidationError as e: # UUID error return Response(errors.bad_request_error(*e.messages), status=status.HTTP_400_BAD_REQUEST) + discovery = await discovery_scope.get_discovery() + if not discovery: return Response(dres.NO_PUBLIC_FIELDS_CONFIGURED, status=status.HTTP_404_NOT_FOUND) - dt_permissions = await get_discovery_data_type_permissions(request) + dt_permissions = await get_discovery_data_type_permissions(request, discovery_scope) _, field_permissions = get_discovery_field_set_permissions(discovery, None, dt_permissions) # Note: the array is wrapped in a dictionary structure to help with JSON @@ -123,20 +121,23 @@ async def public_overview(request: DrfRequest): """ try: - discovery = await get_request_discovery(request) - except DiscoveryConfigException as e: + discovery_scope = await get_request_discovery_scope(request) + except DiscoveryScopeException as e: return Response(e.message, status=status.HTTP_404_NOT_FOUND) except ValidationError as e: return Response(errors.bad_request_error(*e.messages), status=status.HTTP_400_BAD_REQUEST) + discovery = await discovery_scope.get_discovery() + if not discovery: return Response(dres.NO_PUBLIC_DATA_AVAILABLE, status=status.HTTP_404_NOT_FOUND) - dt_permissions = await get_discovery_data_type_permissions(request) + dt_permissions = await get_discovery_data_type_permissions(request, discovery_scope) if not any(d["counts"] for d in dt_permissions.values()): return Response(dres.INSUFFICIENT_PRIVILEGES, status=status.HTTP_403_FORBIDDEN) - project_id, dataset_id = get_project_id_and_dataset_id_from_request(request) + project_id = discovery_scope.project_id + dataset_id = discovery_scope.dataset_id async def _counts_for_scoped_model_name(mn: PublicModelNames) -> tuple[PublicModelNames, int]: scope: PublicScopeFilterKeys @@ -263,11 +264,12 @@ async def discovery_schema(_request: DrfRequest): @permission_classes([BentoAllowAny]) async def public_rules(request: DrfRequest): try: - discovery = await get_request_discovery(request) - except DiscoveryConfigException as e: + discovery_scope = await get_request_discovery_scope(request) + except DiscoveryScopeException as e: return Response(e.message, status=status.HTTP_404_NOT_FOUND) - dt_permissions = await get_discovery_data_type_permissions(request) + dt_permissions = await get_discovery_data_type_permissions(request, discovery_scope) + discovery = await discovery_scope.get_discovery() # TODO: allow filtering by fields accessed? fs_permissions, _ = get_discovery_field_set_permissions(discovery, None, dt_permissions) diff --git a/chord_metadata_service/discovery/censorship.py b/chord_metadata_service/discovery/censorship.py index a28af483c..da097233d 100644 --- a/chord_metadata_service/discovery/censorship.py +++ b/chord_metadata_service/discovery/censorship.py @@ -1,7 +1,7 @@ import sys from ..authz.types import DataPermissionsDict -from .types import DiscoveryConfig, DiscoveryRules +from .types import DiscoveryConfig, DiscoveryRules, EmptyConfig __all__ = [ "RULES_NO_PERMISSIONS", @@ -23,7 +23,7 @@ } -def get_rules(discovery: DiscoveryConfig | None, data_permissions: DataPermissionsDict) -> DiscoveryRules: +def get_rules(discovery: DiscoveryConfig | EmptyConfig | None, data_permissions: DataPermissionsDict) -> DiscoveryRules: if data_permissions["data"]: return RULES_FULL_PERMISSIONS elif not data_permissions["counts"] or not (discovery or {}).get("rules"): @@ -38,11 +38,18 @@ def get_threshold(discovery: DiscoveryConfig | None, field_set_permissions: Data return get_rules(discovery, field_set_permissions)["count_threshold"] -def thresholded_count(c: int, discovery: DiscoveryConfig | None, field_set_permissions: DataPermissionsDict) -> int: +def thresholded_count( + c: int, + discovery: DiscoveryConfig | EmptyConfig | None, + field_set_permissions: DataPermissionsDict, +) -> int: return 0 if c <= get_threshold(discovery, field_set_permissions) else c -def get_max_query_parameters(discovery: DiscoveryConfig | None, field_set_permissions: DataPermissionsDict) -> int: +def get_max_query_parameters( + discovery: DiscoveryConfig | EmptyConfig | None, + field_set_permissions: DataPermissionsDict, +) -> int: """ Gets the maximum number of query parameters allowed for discovery. """ diff --git a/chord_metadata_service/discovery/exceptions.py b/chord_metadata_service/discovery/exceptions.py index c72186218..c5c75bc3d 100644 --- a/chord_metadata_service/discovery/exceptions.py +++ b/chord_metadata_service/discovery/exceptions.py @@ -1,15 +1,15 @@ __all__ = [ - "DiscoveryConfigException" + "DiscoveryScopeException", ] -class DiscoveryConfigException(Exception): +class DiscoveryScopeException(Exception): - def __init__(self, dataset_id: str | None = None, project_id: str | None = None, *args: object) -> None: + def __init__(self, dataset_id: str | None = None, project_id: str | None = None, *args) -> None: self.dataset_id = dataset_id self.project_id = project_id - message = "Error retrieving {0} scoped discovery config: {0} {1} does not exist." + message = "Error validating discovery scope: {0} {1} does not exist." if dataset_id and project_id: message = message.format("project-dataset", f"({project_id}, {dataset_id}) pair") elif dataset_id: diff --git a/chord_metadata_service/discovery/tests/test_api.py b/chord_metadata_service/discovery/tests/test_api.py index a3dcca81c..594150289 100644 --- a/chord_metadata_service/discovery/tests/test_api.py +++ b/chord_metadata_service/discovery/tests/test_api.py @@ -7,6 +7,7 @@ from django.urls import reverse from django.test import TestCase, override_settings from rest_framework import status +from typing import Literal, TypedDict from chord_metadata_service.authz.tests.helpers import DTAccessLevel, AuthzAPITestCase from chord_metadata_service.chord import models as ch_m @@ -72,6 +73,16 @@ def setUpTestData(cls) -> None: cls.id_ds_b = cls.dataset_b.identifier +TestDiscoveryConfigKey = Literal["public", "sex_only", "extra_props", "none"] + + +class TestDiscoveryConfigsDict(TypedDict): + public: DiscoveryConfig + sex_only: DiscoveryConfig + extra_props: DiscoveryConfig + none: None + + class PublicSearchFieldsTest(AuthzAPITestCase, ScopedDiscoveryTestCase): def setUp(self) -> None: @@ -105,46 +116,58 @@ def assert_response_section_fields(self, response_obj: dict, config: dict): set(field for section in config["search"] for field in section["fields"]) ) + @staticmethod + def test_discovery_configs() -> TestDiscoveryConfigsDict: + return { + "public": settings.CONFIG_PUBLIC, + "sex_only": CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY, + "extra_props": DISCOVERY_CONFIG_EXTRA_PROPERTIES, + "none": None, + } + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_public_search_fields_configured(self): search_fields_url = reverse("public-search-fields") - subtest_params: list[tuple[DTAccessLevel, str, int, DiscoveryConfig | None]] = [ + subtest_params: list[tuple[DTAccessLevel, str, int, TestDiscoveryConfigKey]] = [ # SCOPE: whole node - ("counts", "", status.HTTP_200_OK, settings.CONFIG_PUBLIC), + ("counts", "", status.HTTP_200_OK, "public"), # SCOPE: project_a (same discovery as whole node) - ("counts", f"?project={str(self.id_proj_a)}", status.HTTP_200_OK, settings.CONFIG_PUBLIC), + ("counts", f"?project={str(self.id_proj_a)}", status.HTTP_200_OK, "public"), # SCOPE: project_b (discovery search sex only) - ("counts", f"?project={str(self.id_proj_b)}", status.HTTP_200_OK, CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY), + ("counts", f"?project={str(self.id_proj_b)}", status.HTTP_200_OK, "sex_only"), # SCOPE: dataset_a (discovery with dataset specific extra_properties) - ("counts", f"?dataset={str(self.id_ds_a)}", status.HTTP_200_OK, DISCOVERY_CONFIG_EXTRA_PROPERTIES), + ("counts", f"?dataset={str(self.id_ds_a)}", status.HTTP_200_OK, "extra_props"), # SCOPE: non-existant dataset - ("counts", f"?dataset={uuid.uuid4()}", status.HTTP_404_NOT_FOUND, None), + ("counts", f"?dataset={uuid.uuid4()}", status.HTTP_404_NOT_FOUND, "none"), # SCOPE: non-existant project - ("counts", f"?project={uuid.uuid4()}", status.HTTP_404_NOT_FOUND, None), + ("counts", f"?project={uuid.uuid4()}", status.HTTP_404_NOT_FOUND, "none"), # SCOPE: dataset_b # - fallback on project's config, responses should be the same # - see above - CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY - ("counts", f"?dataset={self.id_ds_b}", status.HTTP_200_OK, CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY), + ("counts", f"?dataset={self.id_ds_b}", status.HTTP_200_OK, "sex_only"), # SCOPE: project_a + dataset_b (invalid) - ("counts", f"?project={str(self.id_proj_a)}&dataset={self.id_ds_b}", status.HTTP_404_NOT_FOUND, None), + ("counts", f"?project={str(self.id_proj_a)}&dataset={self.id_ds_b}", status.HTTP_404_NOT_FOUND, "none"), # SCOPE: project_a + dataset_a (valid) # - same as dataset_a - DISCOVERY_CONFIG_EXTRA_PROPERTIES - ("counts", f"?project={str(self.id_proj_a)}&dataset={self.id_ds_a}", status.HTTP_200_OK, - DISCOVERY_CONFIG_EXTRA_PROPERTIES), + ("counts", f"?project={str(self.id_proj_a)}&dataset={self.id_ds_a}", status.HTTP_200_OK, "extra_props"), # invalid UUID for project - ("counts", "?project=not-a-uuid", status.HTTP_400_BAD_REQUEST, None), + ("counts", "?project=not-a-uuid", status.HTTP_404_NOT_FOUND, "none"), # invalid UUID for dataset - ("counts", "?dataset=not-a-uuid", status.HTTP_400_BAD_REQUEST, None), + ("counts", "?dataset=not-a-uuid", status.HTTP_404_NOT_FOUND, "none"), ] + # use key aliases for configs to make subtest failure output more readable + tdc = self.test_discovery_configs() # to get injected CONFIG_PUBLIC, need to calculate this in-test + for params in subtest_params: with self.subTest(params=params): - level, qp, expected_status_code, expected_body = params + level, qp, expected_status_code, config_key = params + expected_body_config: DiscoveryConfig | None = tdc[config_key] res = self.dt_get(level, f"{search_fields_url}{qp}") self.assertEqual(res.status_code, expected_status_code) - if expected_body is not None: - self.assert_response_section_fields(res.json(), expected_body) + if expected_body_config is not None: + self.assert_response_section_fields(res.json(), expected_body_config) @override_settings(CONFIG_PUBLIC={}) def test_public_search_fields_not_configured(self): @@ -251,10 +274,10 @@ def test_overview(self): # SCOPE: dataset_b (project_b fallback) (f"?dataset={self.id_ds_b}", status.HTTP_200_OK, self.project_b.discovery, self.data_type_counts_ds_b), # --- INVALID --- - # invalid UUID for project - ("?project=not-a-uuid", status.HTTP_400_BAD_REQUEST, None, None), - # invalid UUID for dataset - ("?dataset=not-a-uuid", status.HTTP_400_BAD_REQUEST, None, None), + # invalid UUID for project (not found; IDs are not of this format) + ("?project=not-a-uuid", status.HTTP_404_NOT_FOUND, None, None), + # invalid UUID for dataset (not found; IDs are not of this format) + ("?dataset=not-a-uuid", status.HTTP_404_NOT_FOUND, None, None), ] for params in subtest_params: diff --git a/chord_metadata_service/discovery/tests/test_discovery_utils.py b/chord_metadata_service/discovery/tests/test_discovery_utils.py new file mode 100644 index 000000000..81830a490 --- /dev/null +++ b/chord_metadata_service/discovery/tests/test_discovery_utils.py @@ -0,0 +1,49 @@ +from chord_metadata_service.chord.tests.helpers import ProjectTestCase +from chord_metadata_service.discovery.exceptions import DiscoveryScopeException +from chord_metadata_service.discovery.utils import ValidatedDiscoveryScope + + +class DiscoveryScopeBuildingTestCase(ProjectTestCase): + + def setUp(self): + self.instance_scope = ValidatedDiscoveryScope(None, None) + self.project_scope = ValidatedDiscoveryScope(self.project, None) + self.project_dataset_scope = ValidatedDiscoveryScope(self.project, self.dataset) + + def test_scope_init_fail(self): + with self.assertRaises(DiscoveryScopeException): + ValidatedDiscoveryScope(None, self.dataset) + + def test_scope_repr(self): + subtest_params = [ + ( + self.instance_scope, + "", + ), + ( + self.project_scope, + f"", + ), + ( + self.project_dataset_scope, + f"", + ), + ] + + for params in subtest_params: + with self.subTest(params=params): + self.assertEqual(repr(params[0]), params[1]) + + def test_scope_authz_repr(self): + subtest_params = [ + (self.instance_scope, {"everything": True}), + (self.project_scope, {"project": str(self.project.identifier)}), + ( + self.project_dataset_scope, + {"project": str(self.project.identifier), "dataset": str(self.dataset.identifier)}, + ), + ] + + for params in subtest_params: + with self.subTest(params=params): + self.assertDictEqual(params[0].as_authz_resource(), params[1]) diff --git a/chord_metadata_service/discovery/types.py b/chord_metadata_service/discovery/types.py index f6677c739..bc3437d1e 100644 --- a/chord_metadata_service/discovery/types.py +++ b/chord_metadata_service/discovery/types.py @@ -5,6 +5,7 @@ "DiscoveryFieldProps", "DiscoveryRules", "DiscoveryConfig", + "EmptyConfig", "OverviewSectionChart", "OverviewSection", ] @@ -46,6 +47,10 @@ class DiscoveryConfig(TypedDict): rules: DiscoveryRules +class EmptyConfig(TypedDict): + pass + + class BinWithValue(TypedDict): label: str value: int diff --git a/chord_metadata_service/discovery/utils.py b/chord_metadata_service/discovery/utils.py index ae5a8c966..deb017380 100644 --- a/chord_metadata_service/discovery/utils.py +++ b/chord_metadata_service/discovery/utils.py @@ -1,4 +1,6 @@ -from bento_lib.auth.resources import RESOURCE_EVERYTHING, build_resource +import uuid + +from bento_lib.auth.resources import build_resource from django.conf import settings from django.core.exceptions import ObjectDoesNotExist, ValidationError from rest_framework.request import Request as DrfRequest @@ -10,68 +12,150 @@ ) from chord_metadata_service.chord import models as cm -from .exceptions import DiscoveryConfigException +from .exceptions import DiscoveryScopeException from .fields_utils import get_public_model_name_and_field_path from .model_lookups import PUBLIC_MODEL_NAMES_TO_DATA_TYPE -from .types import DiscoveryConfig, DiscoveryFieldProps +from .types import DiscoveryConfig, DiscoveryFieldProps, EmptyConfig __all__ = [ - "get_discovery", - "get_request_discovery", - "get_project_id_and_dataset_id_from_request", + "ValidatedDiscoveryScope", + "get_discovery_scope", + "get_request_discovery_scope", "get_discovery_queryable_fields", "get_discovery_data_type_permissions", "get_discovery_field_set_permissions", ] -async def _get_project_discovery(project_id: str | None = None, project: cm.Project | None = None) -> dict: - if not project and project_id: - # retrieve project by ID if not provided - project = await cm.Project.objects.aget(identifier=project_id) - if not project.discovery: - # fallback on global discovery config if project has none - return settings.CONFIG_PUBLIC - return project.discovery - - -async def _get_dataset_discovery(dataset_id: str) -> dict: - dataset = await cm.Dataset.objects.aget(identifier=dataset_id) - if not dataset.discovery: - project = await cm.Project.objects.aget(datasets=dataset_id) - return await _get_project_discovery(project=project) - return dataset.discovery - - -async def get_discovery(project_id: str | None = None, dataset_id: str | None = None) -> DiscoveryConfig: - if dataset_id and project_id: - # check if the dataset belongs to the project - is_scope_valid = await cm.Dataset.objects.filter( - identifier=dataset_id, - project__identifier=project_id, - ).aexists() - if not is_scope_valid: - raise DiscoveryConfigException(dataset_id, project_id) +class ValidatedDiscoveryScope: + """ + Contains discovery scope information (i.e., project and dataset), as well as helper methods for accessing the + scope's discovery configuration, Bento authorization resource representation, and IDs. + + Projects and datasets are passed into the constructor rather than IDs to allow discovery calculations *and* ensure + the project/dataset actually exist before scope object creation, thus the name - the project and dataset's + existences are pre-validated. Of course, a project/dataset could be deleted asynchronously elsewhere, which could + result in this becoming invalid. + """ + + def __init__(self, project: cm.Project | None, dataset: cm.Dataset | None): + """ + Constructor for an already-validated discovery scope - i.e., since we are getting fed project/dataset instances + rather than just string IDs, we know these objects exist at the time of construction. + """ + + self._project = project + self._dataset = dataset + + # Additional validation - make sure we have project set if dataset is set + if self._dataset and not self._project: + raise DiscoveryScopeException(dataset_id=str(self._dataset.identifier)) + + # We can cache get_discovery() after the first call, since instances of this class MUST NOT be mutated. + self._discovery: DiscoveryConfig | EmptyConfig | None = None + + @property + def project_id(self) -> str | None: + """ + String representation of the scope project's ID, if set. + """ + return str(self._project.identifier) if self._project else None + + @property + def dataset_id(self) -> str | None: + """ + String representation of the scope dataset's ID, if set. + """ + return str(self._dataset.identifier) if self._dataset else None + + def __repr__(self): + return f"" + + async def _get_project_discovery_or_fallback(self) -> DiscoveryConfig | EmptyConfig: + if self._project and (d := self._project.discovery): + return d + else: + # fallback on global discovery config if project is not set or has None as discovery + return settings.CONFIG_PUBLIC + + async def _get_dataset_discovery_or_fallback(self) -> DiscoveryConfig | EmptyConfig: + """ + Gets the dataset discovery configuration dictionary, or falls back to the project (and eventually instance) one. + """ + if self._dataset and (d := self._dataset.discovery): + return d + else: + return await self._get_project_discovery_or_fallback() + + async def get_discovery(self) -> DiscoveryConfig | EmptyConfig: + """ + Get the discovery configuration dictionary for this scope, properly handling falling back + (dataset -> project -> instance) as required. + """ + if self._discovery is not None: + return self._discovery + else: + d = await self._get_dataset_discovery_or_fallback() + self._discovery = d + return d + + def as_authz_resource(self) -> dict: + """ + Build a Bento authorization system-compatible resource dictionary from this discovery scope. + """ + return build_resource(self.project_id, self.dataset_id) + + +def _get_project_id_and_dataset_id_from_request(request: DrfRequest) -> tuple[str | None, str | None]: + return request.query_params.get("project") or None, request.query_params.get("dataset") or None + + +async def _get_project_by_id(project_id: str) -> cm.Project: + return await cm.Project.objects.filter(identifier=project_id).aget() + + +async def get_discovery_scope(project_id: str | None, dataset_id: str | None) -> ValidatedDiscoveryScope: + project: cm.Project | None = None + dataset: cm.Dataset | None = None + + is_scope_valid: bool = True + + try: + if project_id: + uuid.UUID(project_id) + if dataset_id: + uuid.UUID(dataset_id) + except ValueError: + # We don't want to facilitate log injection, so replace the true values with placeholders + raise DiscoveryScopeException("", "") + try: if dataset_id: - # get dataset's discovery config if dataset_id is passed - return await _get_dataset_discovery(dataset_id) + qs = cm.Dataset.objects.filter(identifier=dataset_id) + if project_id: + # check if the dataset exists and belongs to the specified project if project ID is specified; + # otherwise, infer the project from the dataset. + qs = qs.filter(project_id=project_id) + + dataset = await qs.aget() + project = await _get_project_by_id(dataset.project_id) + elif project_id: - # get project's discovery config if project_id is passed and dataset_id is not - return await _get_project_discovery(project_id=project_id) + project = await _get_project_by_id(project_id) + except ObjectDoesNotExist: - raise DiscoveryConfigException(dataset_id, project_id) - # fallback to config.json when no dataset or project is in the request - return settings.CONFIG_PUBLIC + is_scope_valid = False + if not is_scope_valid: + # We've already checked these are UUIDs, so they're fine to log + raise DiscoveryScopeException(dataset_id, project_id) -def get_project_id_and_dataset_id_from_request(request: DrfRequest) -> tuple[str | None, str | None]: - return request.query_params.get("project") or None, request.query_params.get("dataset") or None + return ValidatedDiscoveryScope(project=project, dataset=dataset) -async def get_request_discovery(request: DrfRequest) -> DiscoveryConfig: - project_id, dataset_id = get_project_id_and_dataset_id_from_request(request) - return await get_discovery(project_id, dataset_id) +async def get_request_discovery_scope(request: DrfRequest) -> ValidatedDiscoveryScope: + project_id, dataset_id = _get_project_id_and_dataset_id_from_request(request) + return await get_discovery_scope(project_id, dataset_id) def get_discovery_queryable_fields(discovery: DiscoveryConfig) -> dict[str, DiscoveryFieldProps]: @@ -82,21 +166,12 @@ def get_discovery_queryable_fields(discovery: DiscoveryConfig) -> dict[str, Disc async def get_discovery_data_type_permissions( - request: DrfRequest, project_id: str | None = None, dataset_id: str | None = None + request: DrfRequest, scope: ValidatedDiscoveryScope ) -> DataTypeDiscoveryPermissions: # Do here instead of inside get_data_type_query_permissions, since this getter function is specific to discovery - if project_id is None and dataset_id is None: - project_id, dataset_id = get_project_id_and_dataset_id_from_request(request) - - resource: dict = RESOURCE_EVERYTHING - if project_id: - if dataset_id: - resource = build_resource(project_id, dataset_id) - else: - resource = build_resource(project_id) - - dataset_level = "dataset" in resource + resource: dict = scope.as_authz_resource() + dataset_level = scope.dataset_id is not None return await get_data_type_query_permissions( request, diff --git a/chord_metadata_service/metadata/settings.py b/chord_metadata_service/metadata/settings.py index 390b5e6d8..64ce43758 100644 --- a/chord_metadata_service/metadata/settings.py +++ b/chord_metadata_service/metadata/settings.py @@ -21,6 +21,7 @@ from dotenv import load_dotenv from .. import __version__ +from ..discovery.types import DiscoveryConfig, EmptyConfig load_dotenv() @@ -351,6 +352,7 @@ def get_secret(path): # Settings related to the Public APIs # Read project specific config.json that contains custom search fields +CONFIG_PUBLIC: DiscoveryConfig | EmptyConfig if os.path.isfile(os.path.join(BASE_DIR, 'config.json')): with open(os.path.join(BASE_DIR, 'config.json')) as config_file: CONFIG_PUBLIC = json.load(config_file) diff --git a/chord_metadata_service/patients/api_views.py b/chord_metadata_service/patients/api_views.py index 0b26b6b85..644b920dc 100644 --- a/chord_metadata_service/patients/api_views.py +++ b/chord_metadata_service/patients/api_views.py @@ -23,15 +23,14 @@ from chord_metadata_service.chord import data_types as dts from chord_metadata_service.discovery import responses as dres from chord_metadata_service.discovery.censorship import get_max_query_parameters, get_threshold, thresholded_count -from chord_metadata_service.discovery.exceptions import DiscoveryConfigException +from chord_metadata_service.discovery.exceptions import DiscoveryScopeException from chord_metadata_service.discovery.fields import get_field_options, filter_queryset_field_value from chord_metadata_service.discovery.stats import individual_biosample_tissue_stats, individual_experiment_type_stats -from chord_metadata_service.discovery.types import DiscoveryConfig from chord_metadata_service.discovery.utils import ( - get_request_discovery, get_discovery_queryable_fields, get_discovery_data_type_permissions, get_discovery_field_set_permissions, + get_request_discovery_scope, ValidatedDiscoveryScope, ) from chord_metadata_service.logger import logger from chord_metadata_service.phenopackets.api_views import BIOSAMPLE_PREFETCH, PHENOPACKET_PREFETCH @@ -170,7 +169,10 @@ def get_queryset(self): async def public_discovery_filter_queryset( - discovery: DiscoveryConfig, request: DrfRequest, dt_permissions: DataTypeDiscoveryPermissions, queryset: QuerySet + scope: ValidatedDiscoveryScope, + request: DrfRequest, + dt_permissions: DataTypeDiscoveryPermissions, + queryset: QuerySet, ): # Process query parameters and check validity @@ -178,11 +180,17 @@ async def public_discovery_filter_queryset( # - remove project/dataset (i.e., scope) query parameters; otherwise, they get included in the fields and the # response yields an error, as they are (presumably) not queryable fields in the discovery config. + # - store project and dataset before we remove them for logging purposes. if "project" in qp: del qp["project"] if "dataset" in qp: del qp["dataset"] + # log/exception string to re-use for validation errors/log entries. TODO: in the future, these should be structured + scope_str = f"({repr(scope)})" + + discovery = await scope.get_discovery() + queryable_fields = get_discovery_queryable_fields(discovery) queried_fields = list(set(qp.keys())) # deduplicate fields for determining field permissions @@ -191,14 +199,14 @@ async def public_discovery_filter_queryset( # we check against qp, not queried_fields, for max query parameters, since a user may be filtering based on more # than one value for the same field (not that this works most of the time, at the moment.) if len(qp) > get_max_query_parameters(discovery, overall_permissions): - raise ValidationError(f"Wrong number of fields: {len(qp)}") + raise ValidationError(f"Wrong number of fields: {len(qp)} {scope_str}") if not overall_permissions["counts"]: - raise ValidationError("Insufficient permissions to access counts") + raise ValidationError(f"Insufficient permissions to access counts {scope_str}") for field, value in qp.items(): if field not in queryable_fields: - raise ValidationError(f"Unsupported field used in query: {field}") + raise ValidationError(f"Unsupported field used in query: {field} {scope_str}") field_props = queryable_fields[field] options = await get_field_options(field, discovery, qf_permissions[field]) @@ -215,7 +223,7 @@ async def public_discovery_filter_queryset( and field_props["config"]["enum"] is None ) ): - raise ValidationError(f"Invalid value used in query: {value}") + raise ValidationError(f"Invalid value used in query: {value} {scope_str}") # recursion queryset = filter_queryset_field_value(queryset, field_props, value) @@ -242,16 +250,18 @@ class PublicListIndividuals(APIView): async def get(self, request, *_args, **_kwargs): try: - discovery = await get_request_discovery(request) - except DiscoveryConfigException as e: + discovery_scope = await get_request_discovery_scope(request) + except DiscoveryScopeException as e: authz_middleware.mark_authz_done(request) return Response(e.message, status=status.HTTP_404_NOT_FOUND) + discovery = await discovery_scope.get_discovery() + if not discovery: authz_middleware.mark_authz_done(request) return Response(dres.NO_PUBLIC_DATA_AVAILABLE, status=status.HTTP_404_NOT_FOUND) - dt_permissions = await get_discovery_data_type_permissions(request) + dt_permissions = await get_discovery_data_type_permissions(request, discovery_scope) dt_perms_pheno = dt_permissions[dts.DATA_TYPE_PHENOPACKET] dt_perms_exp = dt_permissions[dts.DATA_TYPE_EXPERIMENT] @@ -264,8 +274,9 @@ async def get(self, request, *_args, **_kwargs): base_qs = Individual.objects.all() try: - filtered_qs = await public_discovery_filter_queryset(discovery, request, dt_permissions, base_qs) + filtered_qs = await public_discovery_filter_queryset(discovery_scope, request, dt_permissions, base_qs) except ValidationError as e: + logger.info(f"Public individuals endpoint recieved validation error: {e} ({repr(discovery_scope)})") authz_middleware.mark_authz_done(request) return Response(errors.bad_request_error( *(e.error_list if hasattr(e, "error_list") else e.error_dict.items()), @@ -277,8 +288,9 @@ async def get(self, request, *_args, **_kwargs): # 0 count means insufficient data if we only have counts permissions, but means a true 0 if we have full # data permissions. logger.info( - f"Public individuals endpoint recieved query params {request.query_params} which resulted in " - f"sub-threshold count: {ind_qct} <= {get_threshold(discovery, dt_perms_pheno)}") + f"Public individuals endpoint recieved {len(request.query_params)} query params which resulted in " + f"sub-threshold count: {ind_qct} <= {get_threshold(discovery, dt_perms_pheno)} " + f"({repr(discovery_scope)})") authz_middleware.mark_authz_done(request) return Response(dres.INSUFFICIENT_DATA_AVAILABLE) diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py index 9d9f1df78..a818e3280 100644 --- a/chord_metadata_service/restapi/api_views.py +++ b/chord_metadata_service/restapi/api_views.py @@ -1,7 +1,6 @@ import asyncio from adrf.decorators import api_view -from bento_lib.auth.resources import RESOURCE_EVERYTHING from django.db.models import QuerySet from drf_spectacular.utils import extend_schema, inline_serializer from rest_framework import serializers @@ -14,7 +13,7 @@ from chord_metadata_service.authz.types import DataTypeDiscoveryPermissions from chord_metadata_service.chord.data_types import DATA_TYPE_PHENOPACKET, DATA_TYPE_EXPERIMENT from chord_metadata_service.discovery.types import DiscoveryConfig -from chord_metadata_service.discovery.utils import get_discovery +from chord_metadata_service.discovery.utils import ValidatedDiscoveryScope from chord_metadata_service.experiments import models as experiments_models from chord_metadata_service.experiments.summaries import dt_experiment_summary from chord_metadata_service.metadata.service_info import get_service_info @@ -76,13 +75,14 @@ async def overview(request: DrfRequest): # TODO: permissions based on project - this endpoint should be scrapped / completely rethought # use node level discovery config for private overview - discovery = await get_discovery() + discovery_scope = ValidatedDiscoveryScope(project=None, dataset=None) + discovery = await discovery_scope.get_discovery() phenopackets = pheno_models.Phenopacket.objects.all() experiments = experiments_models.Experiment.objects.all() dt_permissions = await get_data_type_query_permissions( - request, [DATA_TYPE_PHENOPACKET, DATA_TYPE_EXPERIMENT], RESOURCE_EVERYTHING + request, [DATA_TYPE_PHENOPACKET, DATA_TYPE_EXPERIMENT], discovery_scope.as_authz_resource() ) return await build_overview_response(phenopackets, experiments, discovery, dt_permissions) @@ -110,7 +110,8 @@ async def search_overview(request: DrfRequest): # TODO: this should be project / dataset-scoped and probably shouldn't even exist as-is # use node level discovery config for private search overview - discovery = await get_discovery() + discovery_scope = ValidatedDiscoveryScope(project=None, dataset=None) + discovery = await discovery_scope.get_discovery() individual_ids = request.GET.getlist("id") if request.method == "GET" else request.data.get("id", []) phenopackets = pheno_models.Phenopacket.objects.all().filter(subject_id__in=individual_ids) @@ -123,7 +124,7 @@ async def search_overview(request: DrfRequest): # TODO: resource should be tied to search dt_permissions = await get_data_type_query_permissions( - request, [DATA_TYPE_PHENOPACKET, DATA_TYPE_EXPERIMENT], RESOURCE_EVERYTHING + request, [DATA_TYPE_PHENOPACKET, DATA_TYPE_EXPERIMENT], discovery_scope.as_authz_resource() ) return await build_overview_response(phenopackets, experiments, discovery, dt_permissions)