diff --git a/data/files.sql b/data/files.sql index 028b0c36..e5cac833 100644 --- a/data/files.sql +++ b/data/files.sql @@ -29,6 +29,7 @@ CREATE TABLE content_object ( id INTEGER NOT NULL, drs_object_id INTEGER, name VARCHAR, + contents_id VARCHAR, drs_uri VARCHAR, contents VARCHAR, PRIMARY KEY (id), diff --git a/htsget_server/beacon_operations.py b/htsget_server/beacon_operations.py index 3bf03745..95908e23 100644 --- a/htsget_server/beacon_operations.py +++ b/htsget_server/beacon_operations.py @@ -81,6 +81,9 @@ def get_search( }, "query": { "requestParameters": {} + }, + "meta": { + "requestedGranularity": "record" } } if alternate_bases is not None: @@ -104,7 +107,6 @@ def get_search( if variant_min_length is not None: req['query']['requestParameters']['variant_min_length'] = variant_min_length - req['requestedGranularity'] = 'record' try: result = search(req) return result, 200 @@ -124,8 +126,8 @@ def post_search(): # $ref: '#/components/schemas/Granularity' # testMode: # $ref: '#/components/schemas/TestMode' - if 'requestedGranularity' not in req: - req['requestedGranularity'] = 'record' + if 'requestedGranularity' not in req['meta']: + req['meta']['requestedGranularity'] = 'record' params = list(req['query']['requestParameters'].keys()) for param in params: @@ -157,9 +159,9 @@ def search(raw_req): } if 'pagination' in raw_req: meta['receivedRequestSummary']['pagination'] = raw_req['pagination'] - if 'requestedGranularity' in raw_req: - meta['receivedRequestSummary']['requestedGranularity'] = raw_req['requestedGranularity'] - meta['returnedGranularity'] = raw_req['requestedGranularity'] + if 'requestedGranularity' in raw_req['meta']: + meta['receivedRequestSummary']['requestedGranularity'] = raw_req['meta']['requestedGranularity'] + meta['returnedGranularity'] = raw_req['meta']['requestedGranularity'] ## not using includeResultsetResponses for now: # if 'includeResultsetResponses' in raw_req: # meta['receivedRequestSummary']['includeResultsetResponses'] = raw_req['includeResultsetResponses'] @@ -278,11 +280,12 @@ def search(raw_req): if handover is not None: handover['handoverType'] = {'id': 'CUSTOM', 'label': 'HTSGET'} response['beaconHandovers'].append(handover) - if len(response['beaconHandovers']) > 0: + if len(response['beaconHandovers']) > 0 and meta['returnedGranularity'] == 'record': response['response'] = resultset else: - meta['returnedGranularity'] = 'count' response.pop('beaconHandovers') + if meta['returnedGranularity'] == 'boolean': + response['responseSummary'].pop('numTotalResults') else: response = { 'error': { diff --git a/htsget_server/database.py b/htsget_server/database.py index b32553e0..5616e812 100644 --- a/htsget_server/database.py +++ b/htsget_server/database.py @@ -3,7 +3,7 @@ import json import re from datetime import datetime -from config import DB_PATH, BUCKET_SIZE +from config import DB_PATH, BUCKET_SIZE, HTSGET_URL engine = create_engine(DB_PATH, echo=False) @@ -347,11 +347,13 @@ class ContentsObject(ObjectDBBase): drs_object_id = Column(Integer, ForeignKey('drs_object.id')) drs_object = relationship("DrsObject", back_populates="contents") name = Column(String, default='') # like a filename + contents_id = Column(String) drs_uri = Column(String, default='[]') # JSON array of strings of DRS id URIs contents = Column(String, default='[]') # JSON array of ContentsObject.ids def __repr__(self): result = { 'name': self.name, + 'id': self.contents_id, 'drs_uri': json.loads(self.drs_uri) } if len(json.loads(self.contents)) > 0: @@ -399,8 +401,7 @@ def create_drs_object(obj): new_object.name = obj['id'] # optional string fields - if 'self_uri' in obj: - new_object.self_uri = obj['self_uri'] + new_object.self_uri = f'{HTSGET_URL.replace("http://", "drs://").replace("https://", "drs://")}/{new_object.name}' if 'created_time' in obj: new_object.created_time = obj['created_time'] if 'updated_time' in obj: @@ -456,6 +457,8 @@ def create_drs_object(obj): new_contents.drs_uri = json.dumps(contents['drs_uri']) if 'contents' in contents: new_contents.contents = json.dumps(contents['contents']) + if 'id' in contents: + new_contents.contents_id = contents['id'] session.add(new_contents) session.add(new_object) session.commit() diff --git a/htsget_server/drs_openapi.yaml b/htsget_server/drs_openapi.yaml index 5bf68c1c..a439cd33 100644 --- a/htsget_server/drs_openapi.yaml +++ b/htsget_server/drs_openapi.yaml @@ -42,6 +42,7 @@ paths: type: array items: anyOf: + - $ref: '#/components/schemas/SampleDrsObject' - $ref: '#/components/schemas/GenomicDrsObject' - $ref: '#/components/schemas/GenomicDataDrsObject' - $ref: '#/components/schemas/GenomicIndexDrsObject' @@ -242,8 +243,10 @@ components: 'application/json': schema: anyOf: + - $ref: "#/components/schemas/SampleDrsObject" - $ref: "#/components/schemas/GenomicDrsObject" - $ref: "#/components/schemas/GenomicDataDrsObject" + - $ref: "#/components/schemas/GenomicIndexDrsObject" DatasetRequest: content: 'application/json': @@ -262,6 +265,88 @@ components: items: type: string description: a DRS object's self_uri + SampleDrsObject: + type: object + description: A DrsObject that describes the clinical sample used for genomic analysis. + required: + - id + - contents +# - self_uri +# - size +# - created_time +# - checksums + properties: + id: + type: string + description: The identifier for the sample, as defined in sample_registration in the MOHCCN data model. + self_uri: + type: string + description: |- + A drs:// hostname-based URI, as defined in the DRS documentation, that tells clients how to access this object. + The intent of this field is to make DRS objects self-contained, and therefore easier for clients to store and pass around. For example, if you arrive at this DRS JSON by resolving a compact identifier-based DRS URI, the `self_uri` presents you with a hostname and properly encoded DRS ID for use in subsequent `access` endpoint calls. + example: + drs://drs.example.org/314159 + size: + type: integer + format: int64 + description: The cumulative size, in bytes, of items in the `contents` field. + created_time: + type: string + format: date-time + description: |- + Timestamp of content creation in RFC3339. + (This is the creation time of the underlying content, not of the JSON object.) + updated_time: + type: string + format: date-time + description: >- + Timestamp of content update in RFC3339, identical to `created_time` in systems + that do not support updates. + (This is the update time of the underlying content, not of the JSON object.) + version: + type: string + description: >- + A string representing a version. + (Some systems may use checksum, a RFC3339 timestamp, or an incrementing version number.) + checksums: + type: array + # minItems: 1 + items: + $ref: '#/components/schemas/Checksum' + description: >- + The checksum of the `DrsObject`. At least one checksum must be provided. + For blobs, the checksum is computed over the bytes in the blob. + For bundles, the checksum is computed over a sorted concatenation of the + checksums of its top-level contained objects (not recursive, names not included). + The list of checksums is sorted alphabetically (hex-code) before concatenation + and a further checksum is performed on the concatenated checksum value. + For example, if a bundle contains blobs with the following checksums: + md5(blob1) = 72794b6d + md5(blob2) = 5e089d29 + Then the checksum of the bundle is: + md5( concat( sort( md5(blob1), md5(blob2) ) ) ) + = md5( concat( sort( 72794b6d, 5e089d29 ) ) ) + = md5( concat( 5e089d29, 72794b6d ) ) + = md5( 5e089d2972794b6d ) + = f7a29a04 + contents: + type: array + description: The specific genomic contents objects that were generated from this sample. + minItems: 1 + items: + $ref: '#/components/schemas/GenomicContentsObject' + description: + type: string + description: A human readable description of the `DrsObject`. + aliases: + type: array + items: + type: string + description: >- + A list of strings that can be used to find other metadata + about this `DrsObject` from external metadata sources. These + aliases can be used to represent secondary + accession numbers or external GUIDs. GenomicDrsObject: type: object description: A DrsObject that describes a bundled genomic data entity. It usually will consist of a genomic data file, e.g. a variant or read file, and its associated index file. @@ -594,6 +679,24 @@ components: anyOf: - $ref: '#/components/schemas/LocalFileAccessMethod' - $ref: '#/components/schemas/S3AccessMethod' + GenomicContentsObject: + type: object + required: + - name + - id + properties: + name: + type: string + description: The identifier of the genomic object + id: + type: string + enum: + - genomic + drs_uri: + type: array + description: The DRS uri(s) to the GenomicDrsObject + items: + type: string GenomicDataContentsObject: type: object required: @@ -610,11 +713,7 @@ components: - read drs_uri: type: array - description: >- - A list of full DRS identifier URI paths - that may be used to obtain the object. - These URIs may be external to this DRS instance. - example: drs://drs.example.org/314159 + description: The DRS uri(s) to the GenomicDataDrsObject items: type: string GenomicIndexContentsObject: @@ -632,10 +731,6 @@ components: - index drs_uri: type: array - description: >- - A list of full DRS identifier URI paths - that may be used to obtain the object. - These URIs may be external to this DRS instance. - example: drs://drs.example.org/314159 + description: The DRS uri(s) to the GenomicIndexDrsObject items: - type: string + type: string \ No newline at end of file diff --git a/tests/test_htsget_server.py b/tests/test_htsget_server.py index b32656ba..3f293b1d 100644 --- a/tests/test_htsget_server.py +++ b/tests/test_htsget_server.py @@ -1,4 +1,6 @@ +import json import os +import re import sys import pytest import requests @@ -44,40 +46,6 @@ def test_remove_objects(drs_objects): assert response.status_code == 200 -def get_client(): - # in case we're running on the container itself, which might have secrets - try: - with open("/run/secrets/minio-access-key", "r") as f: - minio_access_key = f.read().strip() - except Exception as e: - minio_access_key = MINIO_ACCESS_KEY - try: - with open("/run/secrets/minio-secret-key", "r") as f: - minio_secret_key = f.read().strip() - except Exception as e: - minio_secret_key = MINIO_SECRET_KEY - - client = None - try: - bucket = 'testhtsget' - if MINIO_URL and minio_access_key and minio_secret_key: - if VAULT_URL: - token = get_access_token(username=USERNAME, password=PASSWORD) - credential, status_code = store_aws_credential(token=token, endpoint=MINIO_URL, bucket=bucket, access=minio_access_key, secret=minio_secret_key, vault_url=VAULT_URL) - if status_code == 200: - client = get_minio_client(token=token, s3_endpoint=credential["endpoint"], bucket=bucket) - else: - client = get_minio_client(token=None, s3_endpoint=MINIO_URL, bucket=bucket, access_key=minio_access_key, secret_key=minio_secret_key) - if client is None: - client = get_minio_client(bucket=bucket) - except Exception as e: - print(str(e)) - assert False - return {"message": str(e)}, 500 - - return client - - def test_post_objects(drs_objects): """ Install test objects. Will fail if any post request returns an error. @@ -87,28 +55,7 @@ def test_post_objects(drs_objects): headers = get_headers() response = requests.request("GET", url, headers=headers) - client = get_client() for obj in drs_objects: - url = f"{HOST}/ga4gh/drs/v1/objects/{obj['id']}" - if "contents" not in obj: - # create access_methods: - access_id = f"{client['endpoint']}/{client['bucket']}/{obj['id']}" - if VAULT_URL is None and client['access'] and client['secret']: - access_id += f"?access={client['access']}&secret={client['secret']}" - obj["access_methods"] = [ - { - "type": "s3", - "access_id": access_id - } - ] - try: - file = Path(LOCAL_FILE_PATH).joinpath(obj['id']) - with Path.open(file, "rb") as fp: - result = client['client'].put_object(client['bucket'], obj['id'], fp, file.stat().st_size) - except Exception as e: - print(str(e)) - assert False - return {"message": str(e)}, 500 url = f"{HOST}/ga4gh/drs/v1/objects" response = requests.request("POST", url, json=obj, headers=headers) print(f"POST {obj['name']}: {response.text}") @@ -169,6 +116,7 @@ def test_install_public_object(): token = None client = get_minio_client(token=token, s3_endpoint="http://s3.us-east-1.amazonaws.com", bucket="1000genomes", access_key=None, secret_key=None, public=True) access_id = f"{client['endpoint']}/{client['bucket']}" + drs_url = HOST.replace("http://", "drs://").replace("https://", "drs://") pieces = [ { "aliases": [], @@ -177,7 +125,6 @@ def test_install_public_object(): "id": "ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi", "mime_type": "application/octet-stream", "name": "ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi", - "self_uri": "drs://localhost/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi", "size": 0, "version": "v1", "access_methods": [ @@ -194,7 +141,6 @@ def test_install_public_object(): "id": "ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz", "mime_type": "application/octet-stream", "name": "ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz", - "self_uri": "drs://localhost/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz", "size": 0, "version": "v1", "access_methods": [ @@ -210,14 +156,14 @@ def test_install_public_object(): "contents": [ { "drs_uri": [ - "drs://localhost/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz" + f"{drs_url}/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz" ], "name": "ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz", "id": "variant" }, { "drs_uri": [ - "drs://localhost/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi" + f"{drs_url}/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi" ], "name": "ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi", "id": "index" @@ -227,7 +173,6 @@ def test_install_public_object(): "id": "ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes", "mime_type": "application/octet-stream", "name": "ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes", - "self_uri": "drs://localhost/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes", "size": 0, "version": "v1" } @@ -442,390 +387,126 @@ def test_vcf_json(): @pytest.fixture def drs_objects(): - return [ - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "NA18537.vcf.gz.tbi", - "mime_type": "application/octet-stream", - "name": "NA18537.vcf.gz.tbi", - "self_uri": "drs://localhost/NA18537.vcf.gz.tbi", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "NA18537.vcf.gz", - "mime_type": "application/octet-stream", - "name": "NA18537.vcf.gz", - "self_uri": "drs://localhost/NA18537.vcf.gz", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "contents": [ - { - "drs_uri": [ - "drs://localhost/NA18537.vcf.gz" - ], - "name": "NA18537.vcf.gz", - "id": "variant" - }, - { - "drs_uri": [ - "drs://localhost/NA18537.vcf.gz.tbi" - ], - "name": "NA18537.vcf.gz.tbi", - "id": "index" - } - ], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "NA18537", - "mime_type": "application/octet-stream", - "name": "NA18537", - "self_uri": "drs://localhost/NA18537", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "multisample_1.vcf.gz.tbi", - "mime_type": "application/octet-stream", - "name": "multisample_1.vcf.gz.tbi", - "self_uri": "drs://localhost/multisample_1.vcf.gz.tbi", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "multisample_1.vcf.gz", - "mime_type": "application/octet-stream", - "name": "multisample_1.vcf.gz", - "self_uri": "drs://localhost/multisample_1.vcf.gz", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "contents": [ - { - "drs_uri": [ - "drs://localhost/multisample_1.vcf.gz" - ], - "name": "multisample_1.vcf.gz", - "id": "variant" - }, - { - "drs_uri": [ - "drs://localhost/multisample_1.vcf.gz.tbi" - ], - "name": "multisample_1.vcf.gz.tbi", - "id": "index" - } - ], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "multisample_1", - "mime_type": "application/octet-stream", - "name": "multisample_1", - "self_uri": "drs://localhost/multisample_1", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "multisample_2.vcf.gz.tbi", - "mime_type": "application/octet-stream", - "name": "multisample_2.vcf.gz.tbi", - "self_uri": "drs://localhost/multisample_2.vcf.gz.tbi", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "multisample_2.vcf.gz", - "mime_type": "application/octet-stream", - "name": "multisample_2.vcf.gz", - "self_uri": "drs://localhost/multisample_2.vcf.gz", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "contents": [ - { - "drs_uri": [ - "drs://localhost/multisample_2.vcf.gz" - ], - "name": "multisample_2.vcf.gz", - "id": "variant" - }, - { - "drs_uri": [ - "drs://localhost/multisample_2.vcf.gz.tbi" - ], - "name": "multisample_2.vcf.gz.tbi", - "id": "index" - } - ], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "multisample_2", - "mime_type": "application/octet-stream", - "name": "multisample_2", - "self_uri": "drs://localhost/multisample_2", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "sample.compressed.vcf.gz.tbi", - "mime_type": "application/octet-stream", - "name": "sample.compressed.vcf.gz.tbi", - "self_uri": "drs://localhost/sample.compressed.vcf.gz.tbi", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "sample.compressed.vcf.gz", - "mime_type": "application/octet-stream", - "name": "sample.compressed.vcf.gz", - "self_uri": "drs://localhost/sample.compressed.vcf.gz", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "contents": [ - { - "drs_uri": [ - "drs://localhost/sample.compressed.vcf.gz" - ], - "name": "sample.compressed.vcf.gz", - "id": "variant" - }, - { - "drs_uri": [ - "drs://localhost/sample.compressed.vcf.gz.tbi" - ], - "name": "sample.compressed.vcf.gz.tbi", - "id": "index" - } - ], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "sample.compressed", - "mime_type": "application/octet-stream", - "name": "sample.compressed", - "self_uri": "drs://localhost/sample.compressed", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:58:56.663378", - "description": "", - "id": "NA20787.vcf.gz.tbi", + drs_objects = {} + for root, dirs, files in os.walk(LOCAL_FILE_PATH): + for f in files: + print(f) + name_match = re.match(r"^(.+?)\.(vcf|vcf\.gz|bcf|bcf\.gz|sam|bam)(\.tbi|\.bai)*$", f) + if name_match is not None: + genomic_id = name_match.group(1) + if genomic_id not in drs_objects: + drs_objects[genomic_id] = {} + if name_match.group(3) is not None: + drs_objects[genomic_id]["index"] = name_match.group(0) + else: + key = "variant" + if name_match.group(2) in ["sam", "bam"]: + key = "read" + drs_objects[genomic_id][key] = name_match.group(0) + break + result = [] + drs_url = HOST.replace("http://", "drs://").replace("https://", "drs://") + for drs_obj in drs_objects: + # make a genomicdrsobj: + genomic_drs_obj = { + "id": drs_obj, "mime_type": "application/octet-stream", - "name": "NA20787.vcf.gz.tbi", - "self_uri": "drs://localhost/NA20787.vcf.gz.tbi", - "size": 0, - "updated_time": "2021-09-27T18:58:56.663442", + "name": drs_obj, + "contents": [], "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:58:56.663378", - "description": "", - "id": "NA20787.vcf.gz", + } + result.append(genomic_drs_obj) + + # make a genomicindexdrsobj: + index_file = drs_objects[drs_obj].pop("index") + result.append({ + "id": index_file, "mime_type": "application/octet-stream", - "name": "NA20787.vcf.gz", - "self_uri": "drs://localhost/NA20787.vcf.gz", - "size": 0, - "updated_time": "2021-09-27T18:58:56.663442", + "name": index_file, "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "contents": [ - { - "drs_uri": [ - "drs://localhost/NA20787.vcf.gz" - ], - "name": "NA20787.vcf.gz", - "id": "variant" - }, - { - "drs_uri": [ - "drs://localhost/NA20787.vcf.gz.tbi" - ], - "name": "NA20787.vcf.gz.tbi", - "id": "index" - } + }) + # add it to the contents of the genomic_drs_obj: + genomic_drs_obj['contents'].append({ + "drs_uri": [ + f"{drs_url}/{index_file}" ], - "created_time": "2021-09-27T18:58:56.663378", - "description": "", - "id": "NA20787", + "name": index_file, + "id": "index" + }) + + # make a genomicdatadrsobj: + type = list(drs_objects[drs_obj].keys()).pop() + data_file = drs_objects[drs_obj].pop(type) + result.append({ + "id": data_file, "mime_type": "application/octet-stream", - "name": "NA20787", - "self_uri": "drs://localhost/NA20787", - "size": 0, - "updated_time": "2021-09-27T18:58:56.663442", + "name": data_file, "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:58:56.663378", - "description": "", - "id": "NA02102.bam.bai", - "mime_type": "application/octet-stream", - "name": "NA02102.bam.bai", - "self_uri": "drs://localhost/NA02102.bam.bai", - "size": 0, - "updated_time": "2021-09-27T18:58:56.663442", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:58:56.663378", - "description": "", - "id": "NA02102.bam", - "mime_type": "application/octet-stream", - "name": "NA02102.bam", - "self_uri": "drs://localhost/NA02102.bam", - "size": 0, - "updated_time": "2021-09-27T18:58:56.663442", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "contents": [ - { - "drs_uri": [ - "drs://localhost/NA02102.bam" - ], - "name": "NA02102.bam", - "id": "read" - }, - { - "drs_uri": [ - "drs://localhost/NA02102.bam.bai" - ], - "name": "NA02102.bam.bai", - "id": "index" - } + }) + # add it to the contents of the genomic_drs_obj: + genomic_drs_obj['contents'].append({ + "drs_uri": [ + f"{drs_url}/{data_file}" ], - "created_time": "2021-09-27T18:58:56.663378", - "description": "", - "id": "NA02102", - "mime_type": "application/octet-stream", - "name": "NA02102", - "self_uri": "drs://localhost/NA02102", - "size": 0, - "updated_time": "2021-09-27T18:58:56.663442", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "test.vcf.gz.tbi", - "mime_type": "application/octet-stream", - "name": "test.vcf.gz.tbi", - "self_uri": "drs://localhost/test.vcf.gz.tbi", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "test.vcf.gz", - "mime_type": "application/octet-stream", - "name": "test.vcf.gz", - "self_uri": "drs://localhost/test.vcf.gz", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - }, - { - "aliases": [], - "checksums": [], - "contents": [ - { - "drs_uri": [ - "drs://localhost/test.vcf.gz" - ], - "name": "test.vcf.gz", - "id": "variant" - }, + "name": data_file, + "id": type + }) + + client = get_client() + + for obj in result: + if "contents" not in obj: + # create access_methods: + access_id = f"{client['endpoint']}/{client['bucket']}/{obj['id']}" + if VAULT_URL is None and client['access'] and client['secret']: + access_id += f"?access={client['access']}&secret={client['secret']}" + obj["access_methods"] = [ { - "drs_uri": [ - "drs://localhost/test.vcf.gz.tbi" - ], - "name": "test.vcf.gz.tbi", - "id": "index" + "type": "s3", + "access_id": access_id } - ], - "created_time": "2021-09-27T18:40:00.538843", - "description": "", - "id": "test", - "mime_type": "application/octet-stream", - "name": "test", - "self_uri": "drs://localhost/test", - "size": 0, - "updated_time": "2021-09-27T18:40:00.539022", - "version": "v1" - } - ] + ] + try: + file = Path(LOCAL_FILE_PATH).joinpath(obj['id']) + obj['size'] = file.stat().st_size + with Path.open(file, "rb") as fp: + res = client['client'].put_object(client['bucket'], obj['id'], fp, file.stat().st_size) + except Exception as e: + print(str(e)) + assert False + return {"message": str(e)}, 500 + return result + + +def get_client(): + # in case we're running on the container itself, which might have secrets + try: + with open("/run/secrets/minio-access-key", "r") as f: + minio_access_key = f.read().strip() + except Exception as e: + minio_access_key = MINIO_ACCESS_KEY + try: + with open("/run/secrets/minio-secret-key", "r") as f: + minio_secret_key = f.read().strip() + except Exception as e: + minio_secret_key = MINIO_SECRET_KEY + + client = None + try: + bucket = 'testhtsget' + if MINIO_URL and minio_access_key and minio_secret_key: + if VAULT_URL: + token = get_access_token(username=USERNAME, password=PASSWORD) + credential, status_code = store_aws_credential(token=token, endpoint=MINIO_URL, bucket=bucket, access=minio_access_key, secret=minio_secret_key, vault_url=VAULT_URL) + if status_code == 200: + client = get_minio_client(token=token, s3_endpoint=credential["endpoint"], bucket=bucket) + else: + client = get_minio_client(token=None, s3_endpoint=MINIO_URL, bucket=bucket, access_key=minio_access_key, secret_key=minio_secret_key) + if client is None: + client = get_minio_client(bucket=bucket) + except Exception as e: + print(str(e)) + assert False + return {"message": str(e)}, 500 + + return client