From e189c9b163afe39baa24e10a6e100e28ea971bc6 Mon Sep 17 00:00:00 2001 From: zxenia Date: Thu, 29 Jul 2021 15:16:13 -0400 Subject: [PATCH 1/8] update docs --- README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.md b/README.md index b8e131cfc..2b11d9e3b 100644 --- a/README.md +++ b/README.md @@ -282,4 +282,25 @@ export $(cut -d= -f1 /chord/data/metadata/.environment) DJANGO_SETTINGS_MODULE=chord_metadata_service.metadata.settings django-admin shell ``` +When running katsu within bentoV2: + +``` +docker exec -it bentov2-katsu sh # enter katsu container + +python manage.py shell # activate django shell +``` + From there, you can import models and query the database from the REPL. + +``` +from chord_metadata_service.patients.models import * +from chord_metadata_service.phenopackets.models import * +from chord_metadata_service.resources.models import * +from chord_metadata_service.experiments.models import * + +# e.g. +Individual.objects.all().count() +Phenopacket.objects.all().count() +Resource.objects.all().count() +Experiment.objects.all().count() +``` From d0d67514b2e9ac53705065ac81b328cb26abd858 Mon Sep 17 00:00:00 2001 From: zxenia Date: Thu, 29 Jul 2021 15:18:29 -0400 Subject: [PATCH 2/8] update markup in docs --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2b11d9e3b..17bfd5464 100644 --- a/README.md +++ b/README.md @@ -284,10 +284,14 @@ DJANGO_SETTINGS_MODULE=chord_metadata_service.metadata.settings django-admin she When running katsu within bentoV2: +- enter katsu container +``` +docker exec -it bentov2-katsu sh ``` -docker exec -it bentov2-katsu sh # enter katsu container -python manage.py shell # activate django shell +- activate django shell +``` +python manage.py shell ``` From there, you can import models and query the database from the REPL. From 56ead90a452d49976f7473053a60d66ab227c333 Mon Sep 17 00:00:00 2001 From: zxenia Date: Thu, 29 Jul 2021 15:23:45 -0400 Subject: [PATCH 3/8] update docs 3 --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 17bfd5464..d512b5523 100644 --- a/README.md +++ b/README.md @@ -270,6 +270,8 @@ Builds an ElasticSearch index for Phenopackets in the database. ### Accessing the Django Shell from inside a Bento Container +#### When running katsu with `chord_singularity` + Assuming `chord_singularity` is being used, the following commands can be used to bootstrap your way to a `katsu` environment within a Bento container: @@ -282,14 +284,14 @@ export $(cut -d= -f1 /chord/data/metadata/.environment) DJANGO_SETTINGS_MODULE=chord_metadata_service.metadata.settings django-admin shell ``` -When running katsu within bentoV2: +#### When running katsu with `bentoV2`: -- enter katsu container +- Enter katsu container ``` docker exec -it bentov2-katsu sh ``` -- activate django shell +- Activate django shell ``` python manage.py shell ``` From 8565683ac2359aab4d8706e843933268b6591ce6 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Thu, 29 Jul 2021 17:41:52 -0400 Subject: [PATCH 4/8] dict-ify search dependencies (serializers/query fns) prefetch experiment data add mcode search basic support --- chord_metadata_service/chord/views_search.py | 40 +++++++++++++++++-- .../experiments/api_views.py | 12 +++++- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/chord_metadata_service/chord/views_search.py b/chord_metadata_service/chord/views_search.py index ed48d78f5..a35b04cb6 100644 --- a/chord_metadata_service/chord/views_search.py +++ b/chord_metadata_service/chord/views_search.py @@ -15,12 +15,18 @@ from rest_framework.response import Response from typing import Any, Callable, Dict +from chord_metadata_service.experiments.api_views import EXPERIMENT_PREFETCH from chord_metadata_service.experiments.models import Experiment from chord_metadata_service.experiments.serializers import ExperimentSerializer + from chord_metadata_service.mcode.models import MCodePacket +from chord_metadata_service.mcode.serializers import MCodePacketSerializer + from chord_metadata_service.metadata.elastic import es from chord_metadata_service.metadata.settings import DEBUG, CHORD_SERVICE_ARTIFACT, CHORD_SERVICE_ID + from chord_metadata_service.patients.models import Individual + from chord_metadata_service.phenopackets.api_views import PHENOPACKET_PREFETCH from chord_metadata_service.phenopackets.models import Phenopacket from chord_metadata_service.phenopackets.serializers import PhenopacketSerializer @@ -264,7 +270,21 @@ def data_type_results(query, params, key="id"): def experiment_query_results(query, params): # TODO: possibly a quite inefficient way of doing things... # TODO: Prefetch related biosample or no? - return Experiment.objects.filter(id__in=data_type_results(query, params, "id")) + return Experiment.objects.filter( + id__in=data_type_results(query, params, "id") + ).select_related( + 'instrument', + ).prefetch_related( + *EXPERIMENT_PREFETCH + ) + + +def mcodepacket_query_results(query, params): + # TODO: possibly a quite inefficient way of doing things... + # TODO: select_related / prefetch_related for instant performance boost! + return MCodePacket.objects.filter( + id__in=data_type_results(query, params, "id") + ) def phenopacket_query_results(query, params): @@ -284,6 +304,19 @@ def phenopacket_query_results(query, params): ) +QUERY_RESULTS_FN: Dict[str, Callable] = { + DATA_TYPE_EXPERIMENT: experiment_query_results, + DATA_TYPE_MCODEPACKET: mcodepacket_query_results, + DATA_TYPE_PHENOPACKET: phenopacket_query_results, +} + +QUERY_RESULT_SERIALIZERS = { + DATA_TYPE_EXPERIMENT: ExperimentSerializer, + DATA_TYPE_MCODEPACKET: MCodePacketSerializer, + DATA_TYPE_PHENOPACKET: PhenopacketSerializer, +} + + def search(request, internal_data=False): data_type = request.data.get("data_type") @@ -316,9 +349,8 @@ def search(request, internal_data=False): return Response(build_search_response([{"id": t.identifier, "data_type": DATA_TYPE_PHENOPACKET} for t in tables], start)) - # TODO: Dict-ify - serializer_class = PhenopacketSerializer if data_type == DATA_TYPE_PHENOPACKET else ExperimentSerializer - query_function = phenopacket_query_results if data_type == DATA_TYPE_PHENOPACKET else experiment_query_results + serializer_class = QUERY_RESULT_SERIALIZERS[data_type] + query_function = QUERY_RESULTS_FN[data_type] return Response(build_search_response({ table_id: { diff --git a/chord_metadata_service/experiments/api_views.py b/chord_metadata_service/experiments/api_views.py index cdbfe3fbc..2d383c2a8 100644 --- a/chord_metadata_service/experiments/api_views.py +++ b/chord_metadata_service/experiments/api_views.py @@ -13,6 +13,16 @@ from .filters import ExperimentFilter from chord_metadata_service.restapi.pagination import LargeResultsSetPagination +__all__ = [ + "EXPERIMENT_PREFETCH", + "ExperimentViewSet", + "get_experiment_schema", +] + +EXPERIMENT_PREFETCH = ( + "experiment_results", +) + class ExperimentViewSet(viewsets.ModelViewSet): """ @@ -23,7 +33,7 @@ class ExperimentViewSet(viewsets.ModelViewSet): Create a new experiment """ - queryset = Experiment.objects.all().order_by("id") + queryset = Experiment.objects.all().prefetch_related(*EXPERIMENT_PREFETCH).order_by("id") serializer_class = ExperimentSerializer pagination_class = LargeResultsSetPagination renderer_classes = tuple(api_settings.DEFAULT_RENDERER_CLASSES) From 9820bfabe2e5dbb3c198b8fc3052b03d4c3949fa Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Thu, 29 Jul 2021 17:42:10 -0400 Subject: [PATCH 5/8] remove some hard-coded phenopackets search stuff --- chord_metadata_service/chord/views_search.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chord_metadata_service/chord/views_search.py b/chord_metadata_service/chord/views_search.py index a35b04cb6..830ca29ef 100644 --- a/chord_metadata_service/chord/views_search.py +++ b/chord_metadata_service/chord/views_search.py @@ -346,7 +346,7 @@ def search(request, internal_data=False): params=params, key="table_id" )) # TODO: Maybe can avoid hitting DB here - return Response(build_search_response([{"id": t.identifier, "data_type": DATA_TYPE_PHENOPACKET} + return Response(build_search_response([{"id": t.identifier, "data_type": data_type} for t in tables], start)) serializer_class = QUERY_RESULT_SERIALIZERS[data_type] @@ -491,12 +491,12 @@ def chord_table_search(request, table_id, internal=False): compiled_query, params = postgres.search_query_to_psycopg2_sql(request.data["query"], DATA_TYPES[table.data_type]["schema"]) except (SyntaxError, TypeError, ValueError) as e: - print("[CHORD Metadata] Error encountered compiling query {}:\n {}".format(request.data["query"], str(e))) + print(f"[CHORD Metadata] Error encountered compiling query {request.data['query']}:\n {str(e)}") return Response(errors.bad_request_error(f"Error compiling query (message: {str(e)})"), status=400) debug_log(f"Finished compiling query in {datetime.now() - start}") - query_results = phenopacket_query_results( # TODO: Generic + query_results = QUERY_RESULTS_FN[table.data_type]( query=sql.SQL("{} AND table_id = {}").format(compiled_query, sql.Placeholder()), params=params + (table.identifier,) ) @@ -504,7 +504,7 @@ def chord_table_search(request, table_id, internal=False): debug_log(f"Finished running query in {datetime.now() - start}") if internal: - serialized_data = PhenopacketSerializer(query_results, many=True).data + serialized_data = QUERY_RESULT_SERIALIZERS[table.data_type](query_results, many=True).data debug_log(f"Finished running query and serializing in {datetime.now() - start}") return Response(build_search_response(serialized_data, start)) From 334ac68b96a0023b1a7360c31c57d56fe82ed529 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Fri, 30 Jul 2021 09:45:47 -0400 Subject: [PATCH 6/8] add select related lists for phenopackets/experiments --- chord_metadata_service/chord/views_search.py | 27 +++++++------------ .../experiments/api_views.py | 10 ++++++- .../phenopackets/api_views.py | 6 ++--- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/chord_metadata_service/chord/views_search.py b/chord_metadata_service/chord/views_search.py index 830ca29ef..22611c050 100644 --- a/chord_metadata_service/chord/views_search.py +++ b/chord_metadata_service/chord/views_search.py @@ -15,7 +15,7 @@ from rest_framework.response import Response from typing import Any, Callable, Dict -from chord_metadata_service.experiments.api_views import EXPERIMENT_PREFETCH +from chord_metadata_service.experiments.api_views import EXPERIMENT_SELECT_REL, EXPERIMENT_PREFETCH from chord_metadata_service.experiments.models import Experiment from chord_metadata_service.experiments.serializers import ExperimentSerializer @@ -27,7 +27,7 @@ from chord_metadata_service.patients.models import Individual -from chord_metadata_service.phenopackets.api_views import PHENOPACKET_PREFETCH +from chord_metadata_service.phenopackets.api_views import PHENOPACKET_SELECT_REL, PHENOPACKET_PREFETCH from chord_metadata_service.phenopackets.models import Phenopacket from chord_metadata_service.phenopackets.serializers import PhenopacketSerializer @@ -270,13 +270,10 @@ def data_type_results(query, params, key="id"): def experiment_query_results(query, params): # TODO: possibly a quite inefficient way of doing things... # TODO: Prefetch related biosample or no? - return Experiment.objects.filter( - id__in=data_type_results(query, params, "id") - ).select_related( - 'instrument', - ).prefetch_related( - *EXPERIMENT_PREFETCH - ) + return Experiment.objects \ + .filter(id__in=data_type_results(query, params, "id")) \ + .select_related(*EXPERIMENT_SELECT_REL) \ + .prefetch_related(*EXPERIMENT_PREFETCH) def mcodepacket_query_results(query, params): @@ -294,14 +291,10 @@ def phenopacket_query_results(query, params): # to the DB. prefetch_related works on M2M relationships and makes # sure that, for instance, when querying diseases, we won't make multiple call # for the same set of data - return Phenopacket.objects.filter( - id__in=data_type_results(query, params, "id") - ).select_related( - 'subject', - 'meta_data' - ).prefetch_related( - *PHENOPACKET_PREFETCH - ) + return Phenopacket.objects \ + .filter(id__in=data_type_results(query, params, "id")) \ + .select_related(*PHENOPACKET_SELECT_REL) \ + .prefetch_related(*PHENOPACKET_PREFETCH) QUERY_RESULTS_FN: Dict[str, Callable] = { diff --git a/chord_metadata_service/experiments/api_views.py b/chord_metadata_service/experiments/api_views.py index 2d383c2a8..d99534d27 100644 --- a/chord_metadata_service/experiments/api_views.py +++ b/chord_metadata_service/experiments/api_views.py @@ -14,11 +14,16 @@ from chord_metadata_service.restapi.pagination import LargeResultsSetPagination __all__ = [ + "EXPERIMENT_SELECT_REL", "EXPERIMENT_PREFETCH", "ExperimentViewSet", "get_experiment_schema", ] +EXPERIMENT_SELECT_REL = ( + "instrument", +) + EXPERIMENT_PREFETCH = ( "experiment_results", ) @@ -33,7 +38,10 @@ class ExperimentViewSet(viewsets.ModelViewSet): Create a new experiment """ - queryset = Experiment.objects.all().prefetch_related(*EXPERIMENT_PREFETCH).order_by("id") + queryset = Experiment.objects.all()\ + .select_related(*EXPERIMENT_SELECT_REL)\ + .prefetch_related(*EXPERIMENT_PREFETCH)\ + .order_by("id") serializer_class = ExperimentSerializer pagination_class = LargeResultsSetPagination renderer_classes = tuple(api_settings.DEFAULT_RENDERER_CLASSES) diff --git a/chord_metadata_service/phenopackets/api_views.py b/chord_metadata_service/phenopackets/api_views.py index f78d43a5e..e4dc7f96b 100644 --- a/chord_metadata_service/phenopackets/api_views.py +++ b/chord_metadata_service/phenopackets/api_views.py @@ -158,9 +158,9 @@ class BiosampleViewSet(ExtendedPhenopacketsModelViewSet): post: Create a new biosample """ - queryset = m.Biosample.objects.all()\ - .prefetch_related(*BIOSAMPLE_PREFETCH)\ - .select_related(*BIOSAMPLE_SELECT_REL)\ + queryset = m.Biosample.objects.all() \ + .prefetch_related(*BIOSAMPLE_PREFETCH) \ + .select_related(*BIOSAMPLE_SELECT_REL) \ .order_by("id") serializer_class = s.BiosampleSerializer filter_backends = [DjangoFilterBackend] From c5d7699017ed80dd4e83cdb94ba3f22930e0f2ec Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Fri, 30 Jul 2021 09:46:41 -0400 Subject: [PATCH 7/8] formatting --- chord_metadata_service/experiments/api_views.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chord_metadata_service/experiments/api_views.py b/chord_metadata_service/experiments/api_views.py index d99534d27..77dbe5a13 100644 --- a/chord_metadata_service/experiments/api_views.py +++ b/chord_metadata_service/experiments/api_views.py @@ -38,9 +38,9 @@ class ExperimentViewSet(viewsets.ModelViewSet): Create a new experiment """ - queryset = Experiment.objects.all()\ - .select_related(*EXPERIMENT_SELECT_REL)\ - .prefetch_related(*EXPERIMENT_PREFETCH)\ + queryset = Experiment.objects.all() \ + .select_related(*EXPERIMENT_SELECT_REL) \ + .prefetch_related(*EXPERIMENT_PREFETCH) \ .order_by("id") serializer_class = ExperimentSerializer pagination_class = LargeResultsSetPagination From 669465cbe0614b25572dfff6c30d72564f016524 Mon Sep 17 00:00:00 2001 From: zxenia Date: Fri, 30 Jul 2021 10:12:45 -0400 Subject: [PATCH 8/8] update version to 2.2.3 --- chord_metadata_service/package.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chord_metadata_service/package.cfg b/chord_metadata_service/package.cfg index 1a2fa0e6f..2a639e26d 100644 --- a/chord_metadata_service/package.cfg +++ b/chord_metadata_service/package.cfg @@ -1,4 +1,4 @@ [package] name = katsu -version = 2.2.2 +version = 2.2.3 authors = Ksenia Zaytseva, David Lougheed, Simon Chénard, Romain Grégoire