Skip to content

Commit

Permalink
Merge pull request #261 from CanDIG/daisieh/granularity
Browse files Browse the repository at this point in the history
DIG-1143: SampleDRSObjects
  • Loading branch information
daisieh authored May 29, 2023
2 parents 10a3e40 + da5fe78 commit c92359e
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 459 deletions.
1 change: 1 addition & 0 deletions data/files.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ CREATE TABLE content_object (
id INTEGER NOT NULL,
drs_object_id INTEGER,
name VARCHAR,
contents_id VARCHAR,
drs_uri VARCHAR,
contents VARCHAR,
PRIMARY KEY (id),
Expand Down
19 changes: 11 additions & 8 deletions htsget_server/beacon_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def get_search(
},
"query": {
"requestParameters": {}
},
"meta": {
"requestedGranularity": "record"
}
}
if alternate_bases is not None:
Expand All @@ -104,7 +107,6 @@ def get_search(
if variant_min_length is not None:
req['query']['requestParameters']['variant_min_length'] = variant_min_length

req['requestedGranularity'] = 'record'
try:
result = search(req)
return result, 200
Expand All @@ -124,8 +126,8 @@ def post_search():
# $ref: '#/components/schemas/Granularity'
# testMode:
# $ref: '#/components/schemas/TestMode'
if 'requestedGranularity' not in req:
req['requestedGranularity'] = 'record'
if 'requestedGranularity' not in req['meta']:
req['meta']['requestedGranularity'] = 'record'

params = list(req['query']['requestParameters'].keys())
for param in params:
Expand Down Expand Up @@ -157,9 +159,9 @@ def search(raw_req):
}
if 'pagination' in raw_req:
meta['receivedRequestSummary']['pagination'] = raw_req['pagination']
if 'requestedGranularity' in raw_req:
meta['receivedRequestSummary']['requestedGranularity'] = raw_req['requestedGranularity']
meta['returnedGranularity'] = raw_req['requestedGranularity']
if 'requestedGranularity' in raw_req['meta']:
meta['receivedRequestSummary']['requestedGranularity'] = raw_req['meta']['requestedGranularity']
meta['returnedGranularity'] = raw_req['meta']['requestedGranularity']
## not using includeResultsetResponses for now:
# if 'includeResultsetResponses' in raw_req:
# meta['receivedRequestSummary']['includeResultsetResponses'] = raw_req['includeResultsetResponses']
Expand Down Expand Up @@ -278,11 +280,12 @@ def search(raw_req):
if handover is not None:
handover['handoverType'] = {'id': 'CUSTOM', 'label': 'HTSGET'}
response['beaconHandovers'].append(handover)
if len(response['beaconHandovers']) > 0:
if len(response['beaconHandovers']) > 0 and meta['returnedGranularity'] == 'record':
response['response'] = resultset
else:
meta['returnedGranularity'] = 'count'
response.pop('beaconHandovers')
if meta['returnedGranularity'] == 'boolean':
response['responseSummary'].pop('numTotalResults')
else:
response = {
'error': {
Expand Down
9 changes: 6 additions & 3 deletions htsget_server/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
import re
from datetime import datetime
from config import DB_PATH, BUCKET_SIZE
from config import DB_PATH, BUCKET_SIZE, HTSGET_URL


engine = create_engine(DB_PATH, echo=False)
Expand Down Expand Up @@ -347,11 +347,13 @@ class ContentsObject(ObjectDBBase):
drs_object_id = Column(Integer, ForeignKey('drs_object.id'))
drs_object = relationship("DrsObject", back_populates="contents")
name = Column(String, default='') # like a filename
contents_id = Column(String)
drs_uri = Column(String, default='[]') # JSON array of strings of DRS id URIs
contents = Column(String, default='[]') # JSON array of ContentsObject.ids
def __repr__(self):
result = {
'name': self.name,
'id': self.contents_id,
'drs_uri': json.loads(self.drs_uri)
}
if len(json.loads(self.contents)) > 0:
Expand Down Expand Up @@ -399,8 +401,7 @@ def create_drs_object(obj):
new_object.name = obj['id']

# optional string fields
if 'self_uri' in obj:
new_object.self_uri = obj['self_uri']
new_object.self_uri = f'{HTSGET_URL.replace("http://", "drs://").replace("https://", "drs://")}/{new_object.name}'
if 'created_time' in obj:
new_object.created_time = obj['created_time']
if 'updated_time' in obj:
Expand Down Expand Up @@ -456,6 +457,8 @@ def create_drs_object(obj):
new_contents.drs_uri = json.dumps(contents['drs_uri'])
if 'contents' in contents:
new_contents.contents = json.dumps(contents['contents'])
if 'id' in contents:
new_contents.contents_id = contents['id']
session.add(new_contents)
session.add(new_object)
session.commit()
Expand Down
117 changes: 106 additions & 11 deletions htsget_server/drs_openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ paths:
type: array
items:
anyOf:
- $ref: '#/components/schemas/SampleDrsObject'
- $ref: '#/components/schemas/GenomicDrsObject'
- $ref: '#/components/schemas/GenomicDataDrsObject'
- $ref: '#/components/schemas/GenomicIndexDrsObject'
Expand Down Expand Up @@ -242,8 +243,10 @@ components:
'application/json':
schema:
anyOf:
- $ref: "#/components/schemas/SampleDrsObject"
- $ref: "#/components/schemas/GenomicDrsObject"
- $ref: "#/components/schemas/GenomicDataDrsObject"
- $ref: "#/components/schemas/GenomicIndexDrsObject"
DatasetRequest:
content:
'application/json':
Expand All @@ -262,6 +265,88 @@ components:
items:
type: string
description: a DRS object's self_uri
SampleDrsObject:
type: object
description: A DrsObject that describes the clinical sample used for genomic analysis.
required:
- id
- contents
# - self_uri
# - size
# - created_time
# - checksums
properties:
id:
type: string
description: The identifier for the sample, as defined in sample_registration in the MOHCCN data model.
self_uri:
type: string
description: |-
A drs:// hostname-based URI, as defined in the DRS documentation, that tells clients how to access this object.
The intent of this field is to make DRS objects self-contained, and therefore easier for clients to store and pass around. For example, if you arrive at this DRS JSON by resolving a compact identifier-based DRS URI, the `self_uri` presents you with a hostname and properly encoded DRS ID for use in subsequent `access` endpoint calls.
example:
drs://drs.example.org/314159
size:
type: integer
format: int64
description: The cumulative size, in bytes, of items in the `contents` field.
created_time:
type: string
format: date-time
description: |-
Timestamp of content creation in RFC3339.
(This is the creation time of the underlying content, not of the JSON object.)
updated_time:
type: string
format: date-time
description: >-
Timestamp of content update in RFC3339, identical to `created_time` in systems
that do not support updates.
(This is the update time of the underlying content, not of the JSON object.)
version:
type: string
description: >-
A string representing a version.
(Some systems may use checksum, a RFC3339 timestamp, or an incrementing version number.)
checksums:
type: array
# minItems: 1
items:
$ref: '#/components/schemas/Checksum'
description: >-
The checksum of the `DrsObject`. At least one checksum must be provided.
For blobs, the checksum is computed over the bytes in the blob.
For bundles, the checksum is computed over a sorted concatenation of the
checksums of its top-level contained objects (not recursive, names not included).
The list of checksums is sorted alphabetically (hex-code) before concatenation
and a further checksum is performed on the concatenated checksum value.
For example, if a bundle contains blobs with the following checksums:
md5(blob1) = 72794b6d
md5(blob2) = 5e089d29
Then the checksum of the bundle is:
md5( concat( sort( md5(blob1), md5(blob2) ) ) )
= md5( concat( sort( 72794b6d, 5e089d29 ) ) )
= md5( concat( 5e089d29, 72794b6d ) )
= md5( 5e089d2972794b6d )
= f7a29a04
contents:
type: array
description: The specific genomic contents objects that were generated from this sample.
minItems: 1
items:
$ref: '#/components/schemas/GenomicContentsObject'
description:
type: string
description: A human readable description of the `DrsObject`.
aliases:
type: array
items:
type: string
description: >-
A list of strings that can be used to find other metadata
about this `DrsObject` from external metadata sources. These
aliases can be used to represent secondary
accession numbers or external GUIDs.
GenomicDrsObject:
type: object
description: A DrsObject that describes a bundled genomic data entity. It usually will consist of a genomic data file, e.g. a variant or read file, and its associated index file.
Expand Down Expand Up @@ -594,6 +679,24 @@ components:
anyOf:
- $ref: '#/components/schemas/LocalFileAccessMethod'
- $ref: '#/components/schemas/S3AccessMethod'
GenomicContentsObject:
type: object
required:
- name
- id
properties:
name:
type: string
description: The identifier of the genomic object
id:
type: string
enum:
- genomic
drs_uri:
type: array
description: The DRS uri(s) to the GenomicDrsObject
items:
type: string
GenomicDataContentsObject:
type: object
required:
Expand All @@ -610,11 +713,7 @@ components:
- read
drs_uri:
type: array
description: >-
A list of full DRS identifier URI paths
that may be used to obtain the object.
These URIs may be external to this DRS instance.
example: drs://drs.example.org/314159
description: The DRS uri(s) to the GenomicDataDrsObject
items:
type: string
GenomicIndexContentsObject:
Expand All @@ -632,10 +731,6 @@ components:
- index
drs_uri:
type: array
description: >-
A list of full DRS identifier URI paths
that may be used to obtain the object.
These URIs may be external to this DRS instance.
example: drs://drs.example.org/314159
description: The DRS uri(s) to the GenomicIndexDrsObject
items:
type: string
type: string
Loading

0 comments on commit c92359e

Please sign in to comment.