Skip to content

Commit

Permalink
Merge pull request #253 from bento-platform/develop
Browse files Browse the repository at this point in the history
Version 2.2.3
  • Loading branch information
zxenia authored Jul 30, 2021
2 parents 65bfc82 + 99c15e4 commit 3aa7e9d
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 22 deletions.
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,8 @@ Builds an ElasticSearch index for Phenopackets in the database.

### Accessing the Django Shell from inside a Bento Container

#### When running katsu with `chord_singularity`

Assuming `chord_singularity` is being used, the following commands can be used
to bootstrap your way to a `katsu` environment within a Bento
container:
Expand All @@ -282,4 +284,29 @@ export $(cut -d= -f1 /chord/data/metadata/.environment)
DJANGO_SETTINGS_MODULE=chord_metadata_service.metadata.settings django-admin shell
```

#### When running katsu with `bentoV2`:

- Enter katsu container
```
docker exec -it bentov2-katsu sh
```

- Activate django shell
```
python manage.py shell
```

From there, you can import models and query the database from the REPL.

```
from chord_metadata_service.patients.models import *
from chord_metadata_service.phenopackets.models import *
from chord_metadata_service.resources.models import *
from chord_metadata_service.experiments.models import *
# e.g.
Individual.objects.all().count()
Phenopacket.objects.all().count()
Resource.objects.all().count()
Experiment.objects.all().count()
```
59 changes: 42 additions & 17 deletions chord_metadata_service/chord/views_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,19 @@
from rest_framework.response import Response
from typing import Any, Callable, Dict

from chord_metadata_service.experiments.api_views import EXPERIMENT_SELECT_REL, EXPERIMENT_PREFETCH
from chord_metadata_service.experiments.models import Experiment
from chord_metadata_service.experiments.serializers import ExperimentSerializer

from chord_metadata_service.mcode.models import MCodePacket
from chord_metadata_service.mcode.serializers import MCodePacketSerializer

from chord_metadata_service.metadata.elastic import es
from chord_metadata_service.metadata.settings import DEBUG, CHORD_SERVICE_ARTIFACT, CHORD_SERVICE_ID

from chord_metadata_service.patients.models import Individual
from chord_metadata_service.phenopackets.api_views import PHENOPACKET_PREFETCH

from chord_metadata_service.phenopackets.api_views import PHENOPACKET_SELECT_REL, PHENOPACKET_PREFETCH
from chord_metadata_service.phenopackets.models import Phenopacket
from chord_metadata_service.phenopackets.serializers import PhenopacketSerializer

Expand Down Expand Up @@ -264,7 +270,18 @@ def data_type_results(query, params, key="id"):
def experiment_query_results(query, params):
# TODO: possibly a quite inefficient way of doing things...
# TODO: Prefetch related biosample or no?
return Experiment.objects.filter(id__in=data_type_results(query, params, "id"))
return Experiment.objects \
.filter(id__in=data_type_results(query, params, "id")) \
.select_related(*EXPERIMENT_SELECT_REL) \
.prefetch_related(*EXPERIMENT_PREFETCH)


def mcodepacket_query_results(query, params):
# TODO: possibly a quite inefficient way of doing things...
# TODO: select_related / prefetch_related for instant performance boost!
return MCodePacket.objects.filter(
id__in=data_type_results(query, params, "id")
)


def phenopacket_query_results(query, params):
Expand All @@ -274,14 +291,23 @@ def phenopacket_query_results(query, params):
# to the DB. prefetch_related works on M2M relationships and makes
# sure that, for instance, when querying diseases, we won't make multiple call
# for the same set of data
return Phenopacket.objects.filter(
id__in=data_type_results(query, params, "id")
).select_related(
'subject',
'meta_data'
).prefetch_related(
*PHENOPACKET_PREFETCH
)
return Phenopacket.objects \
.filter(id__in=data_type_results(query, params, "id")) \
.select_related(*PHENOPACKET_SELECT_REL) \
.prefetch_related(*PHENOPACKET_PREFETCH)


QUERY_RESULTS_FN: Dict[str, Callable] = {
DATA_TYPE_EXPERIMENT: experiment_query_results,
DATA_TYPE_MCODEPACKET: mcodepacket_query_results,
DATA_TYPE_PHENOPACKET: phenopacket_query_results,
}

QUERY_RESULT_SERIALIZERS = {
DATA_TYPE_EXPERIMENT: ExperimentSerializer,
DATA_TYPE_MCODEPACKET: MCodePacketSerializer,
DATA_TYPE_PHENOPACKET: PhenopacketSerializer,
}


def search(request, internal_data=False):
Expand Down Expand Up @@ -313,12 +339,11 @@ def search(request, internal_data=False):
params=params,
key="table_id"
)) # TODO: Maybe can avoid hitting DB here
return Response(build_search_response([{"id": t.identifier, "data_type": DATA_TYPE_PHENOPACKET}
return Response(build_search_response([{"id": t.identifier, "data_type": data_type}
for t in tables], start))

# TODO: Dict-ify
serializer_class = PhenopacketSerializer if data_type == DATA_TYPE_PHENOPACKET else ExperimentSerializer
query_function = phenopacket_query_results if data_type == DATA_TYPE_PHENOPACKET else experiment_query_results
serializer_class = QUERY_RESULT_SERIALIZERS[data_type]
query_function = QUERY_RESULTS_FN[data_type]

return Response(build_search_response({
table_id: {
Expand Down Expand Up @@ -459,20 +484,20 @@ def chord_table_search(request, table_id, internal=False):
compiled_query, params = postgres.search_query_to_psycopg2_sql(request.data["query"],
DATA_TYPES[table.data_type]["schema"])
except (SyntaxError, TypeError, ValueError) as e:
print("[CHORD Metadata] Error encountered compiling query {}:\n {}".format(request.data["query"], str(e)))
print(f"[CHORD Metadata] Error encountered compiling query {request.data['query']}:\n {str(e)}")
return Response(errors.bad_request_error(f"Error compiling query (message: {str(e)})"), status=400)

debug_log(f"Finished compiling query in {datetime.now() - start}")

query_results = phenopacket_query_results( # TODO: Generic
query_results = QUERY_RESULTS_FN[table.data_type](
query=sql.SQL("{} AND table_id = {}").format(compiled_query, sql.Placeholder()),
params=params + (table.identifier,)
)

debug_log(f"Finished running query in {datetime.now() - start}")

if internal:
serialized_data = PhenopacketSerializer(query_results, many=True).data
serialized_data = QUERY_RESULT_SERIALIZERS[table.data_type](query_results, many=True).data
debug_log(f"Finished running query and serializing in {datetime.now() - start}")

return Response(build_search_response(serialized_data, start))
Expand Down
20 changes: 19 additions & 1 deletion chord_metadata_service/experiments/api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,21 @@
from .filters import ExperimentFilter
from chord_metadata_service.restapi.pagination import LargeResultsSetPagination

__all__ = [
"EXPERIMENT_SELECT_REL",
"EXPERIMENT_PREFETCH",
"ExperimentViewSet",
"get_experiment_schema",
]

EXPERIMENT_SELECT_REL = (
"instrument",
)

EXPERIMENT_PREFETCH = (
"experiment_results",
)


class ExperimentViewSet(viewsets.ModelViewSet):
"""
Expand All @@ -23,7 +38,10 @@ class ExperimentViewSet(viewsets.ModelViewSet):
Create a new experiment
"""

queryset = Experiment.objects.all().order_by("id")
queryset = Experiment.objects.all() \
.select_related(*EXPERIMENT_SELECT_REL) \
.prefetch_related(*EXPERIMENT_PREFETCH) \
.order_by("id")
serializer_class = ExperimentSerializer
pagination_class = LargeResultsSetPagination
renderer_classes = tuple(api_settings.DEFAULT_RENDERER_CLASSES)
Expand Down
2 changes: 1 addition & 1 deletion chord_metadata_service/package.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[package]
name = katsu
version = 2.2.2
version = 2.2.3
authors = Ksenia Zaytseva, David Lougheed, Simon Chénard, Romain Grégoire
6 changes: 3 additions & 3 deletions chord_metadata_service/phenopackets/api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,9 @@ class BiosampleViewSet(ExtendedPhenopacketsModelViewSet):
post:
Create a new biosample
"""
queryset = m.Biosample.objects.all()\
.prefetch_related(*BIOSAMPLE_PREFETCH)\
.select_related(*BIOSAMPLE_SELECT_REL)\
queryset = m.Biosample.objects.all() \
.prefetch_related(*BIOSAMPLE_PREFETCH) \
.select_related(*BIOSAMPLE_SELECT_REL) \
.order_by("id")
serializer_class = s.BiosampleSerializer
filter_backends = [DjangoFilterBackend]
Expand Down

0 comments on commit 3aa7e9d

Please sign in to comment.