Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge model_3 into develop #49

Merged
merged 10 commits into from
Aug 21, 2024
2 changes: 2 additions & 0 deletions query_server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

try:
SERVICE_TOKEN = create_service_token()
if DEBUG_MODE:
print(f"SERVICE_TOKEN: {SERVICE_TOKEN}")
except:
print("Could not obtain a service token")
SERVICE_TOKEN = ""
31 changes: 9 additions & 22 deletions query_server/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ paths:
parameters:
- $ref: "#/components/parameters/treatmentParam"
- $ref: "#/components/parameters/primarySiteParam"
- $ref: "#/components/parameters/chemotherapyParam"
- $ref: "#/components/parameters/immunotherapyParam"
- $ref: "#/components/parameters/hormoneTherapyParam"
- $ref: "#/components/parameters/drugNameParam"
- $ref: "#/components/parameters/chrParam"
- $ref: "#/components/parameters/geneParam"
- $ref: "#/components/parameters/assemblyParam"
Expand Down Expand Up @@ -89,9 +87,7 @@ paths:
parameters:
- $ref: "#/components/parameters/treatmentParam"
- $ref: "#/components/parameters/primarySiteParam"
- $ref: "#/components/parameters/chemotherapyParam"
- $ref: "#/components/parameters/immunotherapyParam"
- $ref: "#/components/parameters/hormoneTherapyParam"
- $ref: "#/components/parameters/drugNameParam"
- $ref: "#/components/parameters/chrParam"
- $ref: "#/components/parameters/geneParam"
- $ref: "#/components/parameters/assemblyParam"
Expand Down Expand Up @@ -140,30 +136,21 @@ components:
required: false
schema:
$ref: '#/components/schemas/Fields'
chemotherapyParam:
drugNameParam:
in: query
name: chemotherapy
name: drug_name
style: pipeDelimited
description: A pipe-separated list of chemotherapy treatments to look for
description: A pipe-separated list of drug names to look for
example: FLUOROURACIL|LEUCOVORIN
required: false
schema:
$ref: '#/components/schemas/Fields'
immunotherapyParam:
systemicTherapyTypeParam:
in: query
name: immunotherapy
name: systemic_therapy_type
style: pipeDelimited
description: A pipe-separated list of immunotherapy treatments to look for
example: Necitumumab|Pembrolizumab
required: false
schema:
$ref: '#/components/schemas/Fields'
hormoneTherapyParam:
in: query
name: hormone_therapy
style: pipeDelimited
description: A pipe-separated list of hormone therapy treatments to look for
example: Goserelin|Leuprolide
description: A pipe-separated list of systemic therapies that we accept
example: Chemotherapy|Immunotherapy
required: false
schema:
$ref: '#/components/schemas/Fields'
Expand Down
93 changes: 62 additions & 31 deletions query_server/query_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,26 +36,43 @@ def safe_get_request_json(request, name):
return request.json()

# Grab a list of donors matching a given filter from the given URL
def get_donors_from_katsu(url, param_name, parameter_list, headers):
def get_donors_from_katsu(url, param_name, parameter_list, headers, therapy_type=None, keep_all=False):
permissible_donors = set()
all_results = []
for parameter in parameter_list:
# TODO: Fix the page_size call here -- use a consume_all() query like in the frontend
parameters = {
param_name: parameter,
'page_size': PAGE_SIZE
}
if therapy_type != None:
parameters['systemic_therapy_type'] = therapy_type
treatments = requests.get(f"{url}?{urllib.parse.urlencode(parameters)}", headers=headers)
results = safe_get_request_json(treatments, f'Katsu {param_name}')['items']
permissible_donors |= set([result['submitter_donor_id'] for result in results])
return permissible_donors
if keep_all:
all_results.extend(results)

# If we are required to return all results, query at least once
if not parameter_list and keep_all:
parameters = {
'page_size': PAGE_SIZE
}
if therapy_type != None:
parameters['systemic_therapy_type'] = therapy_type
treatments = requests.get(f"{url}?{urllib.parse.urlencode(parameters)}", headers=headers)
results = safe_get_request_json(treatments, f'Katsu {param_name}')['items']
permissible_donors |= set([result['submitter_donor_id'] for result in results])
all_results.extend(results)
return permissible_donors, all_results

def add_or_increment(dict, key):
if key in dict:
dict[key] += 1
else:
dict[key] = 1

def get_summary_stats(donors, headers):
def get_summary_stats(donors, primary_diagnoses, headers):
# Perform (and cache) summary statistics
age_at_diagnosis = {}
donors_by_id = {}
Expand All @@ -75,22 +92,23 @@ def get_summary_stats(donors, headers):
else:
add_or_increment(age_at_diagnosis, f'{age}-{age+9} Years')

# Cancer types
if donor['primary_site']:
for cancer_type in donor['primary_site']:
if cancer_type in primary_site_count:
primary_site_count[cancer_type] += 1
else:
primary_site_count[cancer_type] = 1
program_id = donor['program_id']
if program_id in patients_per_cohort:
patients_per_cohort[program_id] += 1
elif program_id is not None:
patients_per_cohort[program_id] = 1

# primary sites
for primary_diagnosis in primary_diagnoses:
primary_site = primary_diagnosis['primary_site']
if primary_site in primary_site_count:
primary_site_count[primary_site] += 1
else:
primary_site_count[primary_site] = 1

# Treatment types
# http://candig.docker.internal:8008/v2/authorized/treatments/
treatments = requests.get(f"{config.KATSU_URL}/v2/authorized/treatments/?page_size={PAGE_SIZE}",
# http://candig.docker.internal:8008/v3/authorized/treatments/
treatments = requests.get(f"{config.KATSU_URL}/v3/authorized/treatments/?page_size={PAGE_SIZE}",
headers=headers)
treatments = safe_get_request_json(treatments, 'Katsu treatments')['items']
treatment_type_count = {}
Expand Down Expand Up @@ -193,7 +211,7 @@ def fix_dicts(to_fix):
return to_fix

@app.route('/query')
def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", hormone_therapy="", chrom="", gene="", page=0, page_size=10, assembly="hg38", exclude_cohorts=[], session_id=""):
def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="", chrom="", gene="", page=0, page_size=10, assembly="hg38", exclude_cohorts=[], session_id=""):
# Add a service token to the headers so that other services will know this is from the query service:
headers = {}
for k in request.headers.keys():
Expand All @@ -205,9 +223,7 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm

# Query the appropriate Katsu endpoint
params = { 'page_size': PAGE_SIZE }
url = f"{config.KATSU_URL}/v2/authorized/donors/"
if primary_site != "":
params['primary_site'] = primary_site
url = f"{config.KATSU_URL}/v3/authorized/donors/"
r = safe_get_request_json(requests.get(f"{url}?{urllib.parse.urlencode(params, True)}",
# Reuse their bearer token
headers=headers), 'Katsu Donors')
Expand All @@ -218,20 +234,32 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm

# Will need to look into how to go about this -- ideally we implement this into the SQL in Katsu's side
filters = [
(treatment, f"{config.KATSU_URL}/v2/authorized/treatments/", 'treatment_type'),
(chemotherapy, f"{config.KATSU_URL}/v2/authorized/chemotherapies/", 'drug_name'),
(immunotherapy, f"{config.KATSU_URL}/v2/authorized/immunotherapies/", 'drug_name'),
(hormone_therapy, f"{config.KATSU_URL}/v2/authorized/hormone_therapies/", 'drug_name')
(treatment, f"{config.KATSU_URL}/v3/authorized/treatments/", 'treatment_type', None)
]
for (this_filter, url, param_name) in filters:
if type(systemic_therapy_type) is list:
for this_type in systemic_therapy_type:
filters.append((drug_name, f"{config.KATSU_URL}/v3/authorized/systemic_therapies/", 'drug_name', this_type))
else:
filters.append((drug_name, f"{config.KATSU_URL}/v3/authorized/systemic_therapies/", 'drug_name', None))

for (this_filter, url, param_name, therapy_type) in filters:
if this_filter != "":
permissible_donors = get_donors_from_katsu(
permissible_donors, _ = get_donors_from_katsu(
url,
param_name,
this_filter,
headers
headers,
therapy_type
)
donors = [donor for donor in donors if donor['submitter_donor_id'] in permissible_donors]
permissible_donors, primary_diagnoses = get_donors_from_katsu(
f"{config.KATSU_URL}/v3/authorized/primary_diagnoses/",
'primary_site',
primary_site if primary_site != "" else [],
headers,
keep_all=True
)
donors = [donor for donor in donors if donor['submitter_donor_id'] in permissible_donors]

# Now we combine this with HTSGet, if any
genomic_query = []
Expand All @@ -241,7 +269,7 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm
htsget = query_htsget(headers, gene, assembly, chrom)

# We need to be able to map specimens, so we'll grab it from Katsu
specimen_query_req = requests.get(f"{config.KATSU_URL}/v2/authorized/sample_registrations/?page_size=10000000", headers=headers)
specimen_query_req = requests.get(f"{config.KATSU_URL}/v3/authorized/sample_registrations/?page_size=10000000", headers=headers)
specimen_query = safe_get_request_json(specimen_query_req, 'Katsu sample registrations')
specimen_mapping = {}
for specimen in specimen_query['items']:
Expand Down Expand Up @@ -295,8 +323,12 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm
except Exception as ex:
print(f"Error while reading HTSGet response: {ex}")

# We also need to cut down the list of primary diagnoses based on the filtered list of donors at this point
permissible_donors = set([donor['submitter_donor_id'] for donor in donors])
primary_diagnoses = [primary_diagnosis for primary_diagnosis in primary_diagnoses if primary_diagnosis['submitter_donor_id'] in permissible_donors]

# TODO: Cache the above list of donor IDs and summary statistics
summary_stats = get_summary_stats(donors, headers)
summary_stats = get_summary_stats(donors, primary_diagnoses, headers)

# Determine which part of the filtered donors to send back
full_data = {
Expand Down Expand Up @@ -341,7 +373,7 @@ def genomic_completeness():
@app.route('/discovery/programs')
def discovery_programs():
# Grab all programs from Katsu
url = f"{config.KATSU_URL}/v2/discovery/programs/"
url = f"{config.KATSU_URL}/v3/discovery/programs/"
r = safe_get_request_json(requests.get(url), 'Katsu sample registrations')

# Aggregate all of the programs' return values into one value for the entire site
Expand Down Expand Up @@ -413,8 +445,8 @@ def discovery_programs():
return fix_dicts(ret_val), 200

@app.route('/discovery/query')
def discovery_query(treatment="", primary_site="", chemotherapy="", immunotherapy="", hormone_therapy="", chrom="", gene="", assembly="hg38", exclude_cohorts=[]):
url = f"{config.KATSU_URL}/v2/explorer/donors/"
def discovery_query(treatment="", primary_site="", systemic_therapy="", systemic_therapy_type="", chrom="", gene="", assembly="hg38", exclude_cohorts=[]):
url = f"{config.KATSU_URL}/v3/explorer/donors/"
headers = {}
for k in request.headers.keys():
headers[k] = request.headers[k]
Expand All @@ -423,9 +455,8 @@ def discovery_query(treatment="", primary_site="", chemotherapy="", immunotherap
param_mapping = [
(treatment, "treatment_type"),
(primary_site, "primary_site"),
(chemotherapy, "chemotherapy_drug_name"),
(immunotherapy, "immunotherapy_drug_name"),
(hormone_therapy, "hormone_therapy_drug_name"),
(systemic_therapy, "systemic_therapy_drug_name"),
(systemic_therapy_type, "systemic_therapy_type"),
(exclude_cohorts, "exclude_cohorts")
]
params = {}
Expand Down