Skip to content

Commit

Permalink
Merge pull request #40 from CanDIG/feature/censoring
Browse files Browse the repository at this point in the history
DIG-1533: cancer_type->primary_site & DIG-1583: Implement censoring on aggregate count threshold
  • Loading branch information
OrdiNeu authored Jun 10, 2024
2 parents cbb5c89 + 6a1236c commit e079eb7
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 7 deletions.
1 change: 1 addition & 0 deletions query_server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

AUTHZ = config['authz']
QUERY_URL = os.getenv("QUERY_URL", f"http://localhost:{config['DEFAULT']['Port']}")
AGGREGATE_COUNT_THRESHOLD = int(os.getenv("AGGREGATE_COUNT_THRESHOLD", "5"))

PORT = config['DEFAULT']['Port']

Expand Down
34 changes: 27 additions & 7 deletions query_server/query_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def get_summary_stats(donors, headers):
# Perform (and cache) summary statistics
age_at_diagnosis = {}
donors_by_id = {}
cancer_type_count = {}
primary_site_count = {}
patients_per_cohort = {}
for donor in donors:
# A donor's date of birth is defined as the (negative) interval between actual DOB and the date of first diagnosis
Expand All @@ -77,10 +77,10 @@ def get_summary_stats(donors, headers):
# Cancer types
if donor['primary_site']:
for cancer_type in donor['primary_site']:
if cancer_type in cancer_type_count:
cancer_type_count[cancer_type] += 1
if cancer_type in primary_site_count:
primary_site_count[cancer_type] += 1
else:
cancer_type_count[cancer_type] = 1
primary_site_count[cancer_type] = 1
program_id = donor['program_id']
if program_id in patients_per_cohort:
patients_per_cohort[program_id] += 1
Expand All @@ -107,7 +107,7 @@ def get_summary_stats(donors, headers):
return {
'age_at_diagnosis': age_at_diagnosis,
'treatment_type_count': treatment_type_count,
'cancer_type_count': cancer_type_count,
'primary_site_count': primary_site_count,
'patients_per_cohort': patients_per_cohort
}

Expand Down Expand Up @@ -157,6 +157,22 @@ def query_htsget(headers, gene, assembly, chrom):
search = re.search(r'(chr)*([XY0-9]{1,2}):(\d+)-(\d+)', chrom)
return query_htsget_pos(headers, assembly, search.group(2), int(search.group(3)), int(search.group(4)))

# Recursively deep search an object or list for any values less than the aggregate threshold
# NB: does not handle tuples
def censor_response(object):
if type(object) is list:
return [censor_response(value) for value in object]
elif isinstance(object, dict):
new_dict = {}
for key, val in object.items():
new_dict[key] = censor_response(val)
return new_dict
elif isinstance(object, int):
return f"<{config.AGGREGATE_COUNT_THRESHOLD}" if object < config.AGGREGATE_COUNT_THRESHOLD else object

# Unknown -- leave as-is
return object

# The return value does not like None being used as a key, so this helper function recursively
# goes through the dictionary provided, and changes all keys to strings
# NB: This overwrites any keys that were previously not strings, and can cause data deletion
Expand Down Expand Up @@ -453,14 +469,14 @@ def discovery_query(treatment="", primary_site="", chemotherapy="", immunotherap
summary_stats = {
'age_at_diagnosis': {},
'treatment_type_count': {},
'cancer_type_count': {},
'primary_site_count': {},
'patients_per_cohort': {}
}
summary_stat_mapping = [
('age_at_diagnosis', 'age_at_diagnosis'),
('treatment_type_count', 'treatment_type'),
('patients_per_cohort', 'program_id'),
('cancer_type_count', 'primary_site')
('primary_site_count', 'primary_site')
]
for donor in donors:
for mapping in summary_stat_mapping:
Expand All @@ -469,4 +485,8 @@ def discovery_query(treatment="", primary_site="", chemotherapy="", immunotherap
add_or_increment(summary_stats[mapping[0]], item)
else:
add_or_increment(summary_stats[mapping[0]], donor[mapping[1]])

# Censor if necessary
summary_stats = censor_response(summary_stats)

return fix_dicts(summary_stats), 200

0 comments on commit e079eb7

Please sign in to comment.