From 86c99ea52af557f7c04e6fa1b7a94c4dba4e8af3 Mon Sep 17 00:00:00 2001 From: David Brownlee Date: Mon, 19 Feb 2024 16:26:21 -0500 Subject: [PATCH] For site-specific manifests, mappings.csv and functions. No data is to ever be included. --- site_datasets/c3g/manifest.yml | 15 ++ site_datasets/c3g/moh_muhcMelanoma.csv | 184 +++++++++++++++++++++++++ site_datasets/c3g/muhc_mappings.py | 99 +++++++++++++ 3 files changed, 298 insertions(+) create mode 100644 site_datasets/c3g/manifest.yml create mode 100644 site_datasets/c3g/moh_muhcMelanoma.csv create mode 100644 site_datasets/c3g/muhc_mappings.py diff --git a/site_datasets/c3g/manifest.yml b/site_datasets/c3g/manifest.yml new file mode 100644 index 0000000..4c07d65 --- /dev/null +++ b/site_datasets/c3g/manifest.yml @@ -0,0 +1,15 @@ +description: Mapping of MUHC Melanoma (1st batch) dataset to MOHCCN format for CanDIG +# mapping is the csv file that contains the list of fields and mapping functions +mapping: moh_muhcMelanoma.csv +# the name of the top-level identifier column in the input data +identifier: submitter_donor_id +# a link to the openapi schema +schema: https://raw.githubusercontent.com/CanDIG/katsu/develop/chord_metadata_service/mohpackets/docs/schema.yml +# class of schema for validation: +schema_class: MoHSchema +# a reference date used to calculate date intervals, formatted as a mapping entry for the mapping template +reference_date: earliest_date(donor.date_resolution, donor.date_of_birth) # NEEDS TO BE CHANGED, ONCE DATES ARE REAL. +# one or more files (dataset_functions.py) that implement the mappings +# described in mapping file +functions: + - muhc_mappings diff --git a/site_datasets/c3g/moh_muhcMelanoma.csv b/site_datasets/c3g/moh_muhcMelanoma.csv new file mode 100644 index 0000000..81710bf --- /dev/null +++ b/site_datasets/c3g/moh_muhcMelanoma.csv @@ -0,0 +1,184 @@ +## Schema generated from https://raw.githubusercontent.com/CanDIG/katsu/develop/chord_metadata_service/mohpackets/docs/schema.yml +## Based on repo commit sha "29fd55d173b7a01daa72fcc89187e3aabd1fb51e" +## MoH template is manually updated to match the MoH clinical data model +## Items are comma separated: element, mapping method +DONOR.INDEX, {indexed_on(donor.submitter_donor_id)} +DONOR.INDEX.submitter_donor_id, {single_val(donor.submitter_donor_id)} +DONOR.INDEX.program_id, {single_val(donor.program_id)} +DONOR.INDEX.date_resolution, {single_val(donor.date_resolution)} +DONOR.INDEX.lost_to_followup_after_clinical_event_identifier, {single_val(donor.lost_to_followup_after_clinical_event_identifier)} +DONOR.INDEX.lost_to_followup_reason, {single_val(donor.lost_to_followup_reason)} +DONOR.INDEX.date_alive_after_lost_to_followup, {date_interval(donor.date_alive_after_lost_to_followup)} +DONOR.INDEX.is_deceased, {boolean(donor.is_deceased)} +DONOR.INDEX.cause_of_death, {single_val(donor.cause_of_death)} +DONOR.INDEX.date_of_birth, {date_interval(donor.date_of_birth)} +DONOR.INDEX.date_of_death, {date_interval(donor.date_of_death)} +DONOR.INDEX.gender, {single_val(donor.gender)} +DONOR.INDEX.sex_at_birth, {single_val(donor.sex_at_birth)} +DONOR.INDEX.primary_site, {muhc_mappings.map_primary_site(donor.primary_site)} +DONOR.INDEX.primary_diagnoses.INDEX, {indexed_on(primary_diagnosis.submitter_donor_id)} +DONOR.INDEX.primary_diagnoses.INDEX.submitter_primary_diagnosis_id, {single_val(primary_diagnosis.submitter_primary_diagnosis_id)} +DONOR.INDEX.primary_diagnoses.INDEX.date_of_diagnosis, {date_interval(primary_diagnosis.date_of_diagnosis)} +DONOR.INDEX.primary_diagnoses.INDEX.cancer_type_code, {single_val(primary_diagnosis.cancer_type_code)} +DONOR.INDEX.primary_diagnoses.INDEX.basis_of_diagnosis, {single_val(primary_diagnosis.basis_of_diagnosis)} +DONOR.INDEX.primary_diagnoses.INDEX.lymph_nodes_examined_status, {single_val(primary_diagnosis.lymph_nodes_examined_status)} +DONOR.INDEX.primary_diagnoses.INDEX.lymph_nodes_examined_method, {single_val(primary_diagnosis.lymph_nodes_examined_method)} +DONOR.INDEX.primary_diagnoses.INDEX.number_lymph_nodes_positive, {integer(primary_diagnosis.number_lymph_nodes_positive)} +DONOR.INDEX.primary_diagnoses.INDEX.clinical_tumour_staging_system, {single_val(primary_diagnosis.clinical_tumour_staging_system)} +DONOR.INDEX.primary_diagnoses.INDEX.clinical_t_category, {single_val(primary_diagnosis.clinical_t_category)} +DONOR.INDEX.primary_diagnoses.INDEX.clinical_n_category, {single_val(primary_diagnosis.clinical_n_category)} +DONOR.INDEX.primary_diagnoses.INDEX.clinical_m_category, {single_val(primary_diagnosis.clinical_m_category)} +DONOR.INDEX.primary_diagnoses.INDEX.clinical_stage_group, {single_val(primary_diagnosis.clinical_stage_group)} +DONOR.INDEX.primary_diagnoses.INDEX.laterality, {single_val(primary_diagnosis.laterality)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX, {indexed_on(specimen.submitter_primary_diagnosis_id)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.submitter_specimen_id, {single_val(specimen.submitter_specimen_id)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.pathological_tumour_staging_system, {single_val(specimen.pathological_tumour_staging_system)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.pathological_t_category, {single_val(specimen.pathological_t_category)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.pathological_n_category, {single_val(specimen.pathological_n_category)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.pathological_m_category, {single_val(specimen.pathological_m_category)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.pathological_stage_group, {single_val(specimen.pathological_stage_group)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.specimen_collection_date, {date_interval(specimen.specimen_collection_date)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.specimen_storage, {single_val(specimen.specimen_storage)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.tumour_histological_type, {single_val(specimen.tumour_histological_type)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.specimen_anatomic_location, {single_val(specimen.specimen_anatomic_location)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.reference_pathology_confirmed_diagnosis, {single_val(specimen.reference_pathology_confirmed_diagnosis)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.reference_pathology_confirmed_tumour_presence, {single_val(specimen.reference_pathology_confirmed_tumour_presence)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.tumour_grading_system, {single_val(specimen.tumour_grading_system)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.tumour_grade, {single_val(specimen.tumour_grade)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.percent_tumour_cells_range, {single_val(specimen.percent_tumour_cells_range)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.percent_tumour_cells_measurement_method, {single_val(specimen.percent_tumour_cells_measurement_method)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.specimen_processing, {single_val(specimen.specimen_processing)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.specimen_laterality, {single_val(specimen.specimen_laterality)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.sample_registrations.INDEX, {indexed_on(sample_registration.submitter_specimen_id)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.sample_registrations.INDEX.submitter_sample_id, {single_val(sample_registration.submitter_sample_id)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.sample_registrations.INDEX.specimen_tissue_source, {single_val(sample_registration.specimen_tissue_source)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.sample_registrations.INDEX.tumour_normal_designation, {single_val(sample_registration.tumour_normal_designation)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.sample_registrations.INDEX.specimen_type, {single_val(sample_registration.specimen_type)} +DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.sample_registrations.INDEX.sample_type, {single_val(sample_registration.sample_type)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX, {indexed_on(treatment.submitter_primary_diagnosis_id)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.submitter_treatment_id, {single_val(treatment.submitter_treatment_id)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.is_primary_treatment, {single_val(treatment.is_primary_treatment)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.treatment_start_date, {date_interval(treatment.treatment_start_date)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.treatment_end_date, {date_interval(treatment.treatment_end_date)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.treatment_setting, {single_val(treatment.treatment_setting)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.treatment_intent, {single_val(treatment.treatment_intent)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.days_per_cycle, {integer(treatment.days_per_cycle)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.number_of_cycles, {integer(treatment.number_of_cycles)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.line_of_treatment, {integer(treatment.line_of_treatment)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.status_of_treatment, {single_val(treatment.status_of_treatment)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.treatment_type, {pipe_delim(treatment.treatment_type)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.response_to_treatment_criteria_method, {single_val(treatment.response_to_treatment_criteria_method)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.response_to_treatment, {single_val(treatment.response_to_treatment)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX, {indexed_on(chemotherapy.submitter_treatment_id)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.chemotherapy_drug_dose_units, {single_val(chemotherapy.chemotherapy_drug_dose_units)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.drug_reference_database, {single_val(chemotherapy.drug_reference_database)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.drug_name, {single_val(chemotherapy.drug_name)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.drug_reference_identifier, {single_val(chemotherapy.drug_reference_identifier)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.prescribed_cumulative_drug_dose, {integer(chemotherapy.prescribed_cumulative_drug_dose)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.actual_cumulative_drug_dose, {integer(chemotherapy.actual_cumulative_drug_dose)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX, {indexed_on(hormone_therapy.submitter_treatment_id)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.hormone_drug_dose_units, {single_val(hormone_therapy.hormone_drug_dose_units)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_database, {single_val(hormone_therapy.drug_reference_database)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_name, {single_val(hormone_therapy.drug_name)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_identifier, {single_val(hormone_therapy.drug_reference_identifier)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.prescribed_cumulative_drug_dose, {integer(hormone_therapy.prescribed_cumulative_drug_dose)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.actual_cumulative_drug_dose, {integer(hormone_therapy.actual_cumulative_drug_dose)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.immunotherapies.INDEX, {indexed_on(immunotherapy.submitter_treatment_id)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.immunotherapies.INDEX.immunotherapy_type, {single_val(immunotherapy.immunotherapy_type)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.immunotherapies.INDEX.drug_reference_database, {single_val(immunotherapy.drug_reference_database)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.immunotherapies.INDEX.immunotherapy_drug_dose_units, {single_val(immunotherapy.immunotherapy_drug_dose_units)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.immunotherapies.INDEX.drug_name, {single_val(immunotherapy.drug_name)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.immunotherapies.INDEX.drug_reference_identifier, {single_val(immunotherapy.drug_reference_identifier)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.immunotherapies.INDEX.prescribed_cumulative_drug_dose, {integer(immunotherapy.prescribed_cumulative_drug_dose)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.immunotherapies.INDEX.actual_cumulative_drug_dose, {integer(immunotherapy.actual_cumulative_drug_dose)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiations.INDEX, {indexed_on(radiation.submitter_treatment_id)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiations.INDEX.radiation_therapy_modality, {single_val(radiation.radiation_therapy_modality)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiations.INDEX.radiation_therapy_type, {single_val(radiation.radiation_therapy_type)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiations.INDEX.anatomical_site_irradiated, {single_val(radiation.anatomical_site_irradiated)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiations.INDEX.radiation_therapy_fractions, {integer(radiation.radiation_therapy_fractions)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiations.INDEX.radiation_therapy_dosage, {integer(radiation.radiation_therapy_dosage)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiations.INDEX.radiation_boost, {boolean(radiation.radiation_boost)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiations.INDEX.reference_radiation_treatment_id, {single_val(radiation.reference_radiation_treatment_id)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX, {indexed_on(surgery.submitter_treatment_id)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.surgery_type, {single_val(surgery.surgery_type)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.surgery_site, {single_val(surgery.surgery_site)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.surgery_location, {single_val(surgery.surgery_location)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.tumour_focality, {single_val(surgery.tumour_focality)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.residual_tumour_classification, {single_val(surgery.residual_tumour_classification)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.margin_types_involved, {pipe_delim(surgery.margin_types_involved)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.margin_types_not_involved, {pipe_delim(surgery.margin_types_not_involved)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.margin_types_not_assessed, {pipe_delim(surgery.margin_types_not_assessed)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.lymphovascular_invasion, {single_val(surgery.lymphovascular_invasion)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.perineural_invasion, {single_val(surgery.perineural_invasion)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.submitter_specimen_id, {single_val(surgery.submitter_specimen_id)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.tumour_length, {integer(surgery.tumour_length)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.tumour_width, {integer(surgery.tumour_width)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgeries.INDEX.greatest_dimension_tumour, {integer(surgery.greatest_dimension_tumour)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX, {indexed_on(follow_up.submitter_treatment_id)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.submitter_follow_up_id, {single_val(follow_up.submitter_follow_up_id)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.date_of_followup, {date_interval(follow_up.date_of_followup)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.disease_status_at_followup, {single_val(follow_up.disease_status_at_followup)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.relapse_type, {single_val(follow_up.relapse_type)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.date_of_relapse, {date_interval(follow_up.date_of_relapse)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.method_of_progression_status, {pipe_delim(follow_up.method_of_progression_status)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.anatomic_site_progression_or_recurrence, {pipe_delim(follow_up.anatomic_site_progression_or_recurrence)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.recurrence_tumour_staging_system, {single_val(follow_up.recurrence_tumour_staging_system)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.recurrence_t_category, {single_val(follow_up.recurrence_t_category)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.recurrence_n_category, {single_val(follow_up.recurrence_n_category)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.recurrence_m_category, {single_val(follow_up.recurrence_m_category)} +DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.recurrence_stage_group, {single_val(follow_up.recurrence_stage_group)} +DONOR.INDEX.primary_diagnoses.INDEX.followups.INDEX, {indexed_on(follow_up.submitter_primary_diagnosis_id)} +DONOR.INDEX.primary_diagnoses.INDEX.followups.INDEX.submitter_follow_up_id, {single_val(follow_up.submitter_follow_up_id)} +DONOR.INDEX.primary_diagnoses.INDEX.followups.INDEX.date_of_followup, {date_interval(follow_up.date_of_followup)} +DONOR.INDEX.primary_diagnoses.INDEX.followups.INDEX.disease_status_at_followup, {single_val(follow_up.disease_status_at_followup)} +DONOR.INDEX.primary_diagnoses.INDEX.followups.INDEX.relapse_type, {single_val(follow_up.relapse_type)} +DONOR.INDEX.primary_diagnoses.INDEX.followups.INDEX.date_of_relapse, {date_interval(follow_up.date_of_relapse)} +DONOR.INDEX.primary_diagnoses.INDEX.followups.INDEX.method_of_progression_status, {pipe_delim(follow_up.method_of_progression_status)} +DONOR.INDEX.primary_diagnoses.INDEX.followups.INDEX.anatomic_site_progression_or_recurrence, {pipe_delim(follow_up.anatomic_site_progression_or_recurrence)} +DONOR.INDEX.primary_diagnoses.INDEX.followups.INDEX.recurrence_tumour_staging_system, {single_val(follow_up.recurrence_tumour_staging_system)} +DONOR.INDEX.primary_diagnoses.INDEX.followups.INDEX.recurrence_t_category, {single_val(follow_up.recurrence_t_category)} +DONOR.INDEX.primary_diagnoses.INDEX.followups.INDEX.recurrence_n_category, {single_val(follow_up.recurrence_n_category)} +DONOR.INDEX.primary_diagnoses.INDEX.followups.INDEX.recurrence_m_category, {single_val(follow_up.recurrence_m_category)} +DONOR.INDEX.primary_diagnoses.INDEX.followups.INDEX.recurrence_stage_group, {single_val(follow_up.recurrence_stage_group)} +## DONOR.INDEX.comorbidities.INDEX, {indexed_on(COMORBIDITIES_SHEET.submitter_donor_id)} +## DONOR.INDEX.comorbidities.INDEX.prior_malignancy, {single_val(COMORBIDITIES_SHEET.prior_malignancy)} +## DONOR.INDEX.comorbidities.INDEX.laterality_of_prior_malignancy, {single_val(COMORBIDITIES_SHEET.laterality_of_prior_malignancy)} +## DONOR.INDEX.comorbidities.INDEX.comorbidity_type_code, {single_val(COMORBIDITIES_SHEET.comorbidity_type_code)} +## DONOR.INDEX.comorbidities.INDEX.comorbidity_treatment_status, {single_val(COMORBIDITIES_SHEET.comorbidity_treatment_status)} +## DONOR.INDEX.comorbidities.INDEX.comorbidity_treatment, {single_val(COMORBIDITIES_SHEET.comorbidity_treatment)} +## DONOR.INDEX.comorbidities.INDEX.age_at_comorbidity_diagnosis, {integer(COMORBIDITIES_SHEET.age_at_comorbidity_diagnosis)} +## DONOR.INDEX.exposures.INDEX, {indexed_on(EXPOSURES_SHEET.submitter_donor_id)} +## DONOR.INDEX.exposures.INDEX.tobacco_smoking_status, {single_val(EXPOSURES_SHEET.tobacco_smoking_status)} +## DONOR.INDEX.exposures.INDEX.tobacco_type.INDEX, {indexed_on(EXPOSURES_SHEET.tobacco_type)} +## DONOR.INDEX.exposures.INDEX.pack_years_smoked, {float(EXPOSURES_SHEET.pack_years_smoked)} +## DONOR.INDEX.biomarkers.INDEX, {indexed_on(BIOMARKERS_SHEET.submitter_donor_id)} +## DONOR.INDEX.biomarkers.INDEX.er_status, {single_val(BIOMARKERS_SHEET.er_status)} +## DONOR.INDEX.biomarkers.INDEX.pr_status, {single_val(BIOMARKERS_SHEET.pr_status)} +## DONOR.INDEX.biomarkers.INDEX.her2_ihc_status, {single_val(BIOMARKERS_SHEET.her2_ihc_status)} +## DONOR.INDEX.biomarkers.INDEX.her2_ish_status, {single_val(BIOMARKERS_SHEET.her2_ish_status)} +## DONOR.INDEX.biomarkers.INDEX.hpv_ihc_status, {single_val(BIOMARKERS_SHEET.hpv_ihc_status)} +## DONOR.INDEX.biomarkers.INDEX.hpv_pcr_status, {single_val(BIOMARKERS_SHEET.hpv_pcr_status)} +## DONOR.INDEX.biomarkers.INDEX.hpv_strain, {pipe_delim(BIOMARKERS_SHEET.hpv_strain)} +## DONOR.INDEX.biomarkers.INDEX.submitter_specimen_id, {single_val(BIOMARKERS_SHEET.submitter_specimen_id)} +## DONOR.INDEX.biomarkers.INDEX.submitter_primary_diagnosis_id, {single_val(BIOMARKERS_SHEET.submitter_primary_diagnosis_id)} +## DONOR.INDEX.biomarkers.INDEX.submitter_treatment_id, {single_val(BIOMARKERS_SHEET.submitter_treatment_id)} +## DONOR.INDEX.biomarkers.INDEX.submitter_follow_up_id, {single_val(BIOMARKERS_SHEET.submitter_follow_up_id)} +## DONOR.INDEX.biomarkers.INDEX.test_date, {date_interval(BIOMARKERS_SHEET.test_date)} +## DONOR.INDEX.biomarkers.INDEX.psa_level, {integer(BIOMARKERS_SHEET.psa_level)} +## DONOR.INDEX.biomarkers.INDEX.ca125, {integer(BIOMARKERS_SHEET.ca125)} +## DONOR.INDEX.biomarkers.INDEX.cea, {integer(BIOMARKERS_SHEET.cea)} +## DONOR.INDEX.biomarkers.INDEX.er_percent_positive, {float(BIOMARKERS_SHEET.er_percent_positive)} +## DONOR.INDEX.biomarkers.INDEX.pr_percent_positive, {float(BIOMARKERS_SHEET.pr_percent_positive)} +DONOR.INDEX.followups.INDEX, {moh_indexed_on_donor_if_others_absent(follow_up.submitter_donor_id, follow_up.submitter_primary_diagnosis_id, follow_up.submitter_treatment_id)} +DONOR.INDEX.followups.INDEX.submitter_follow_up_id, {single_val(follow_up.submitter_follow_up_id)} +DONOR.INDEX.followups.INDEX.date_of_followup, {date_interval(follow_up.date_of_followup)} +DONOR.INDEX.followups.INDEX.disease_status_at_followup, {single_val(follow_up.disease_status_at_followup)} +DONOR.INDEX.followups.INDEX.relapse_type, {single_val(follow_up.relapse_type)} +DONOR.INDEX.followups.INDEX.date_of_relapse, {date_interval(follow_up.date_of_relapse)} +DONOR.INDEX.followups.INDEX.method_of_progression_status, {pipe_delim(follow_up.method_of_progression_status)} +DONOR.INDEX.followups.INDEX.anatomic_site_progression_or_recurrence, {pipe_delim(follow_up.anatomic_site_progression_or_recurrence)} +DONOR.INDEX.followups.INDEX.recurrence_tumour_staging_system, {single_val(follow_up.recurrence_tumour_staging_system)} +DONOR.INDEX.followups.INDEX.recurrence_t_category, {single_val(follow_up.recurrence_t_category)} +DONOR.INDEX.followups.INDEX.recurrence_n_category, {single_val(follow_up.recurrence_n_category)} +DONOR.INDEX.followups.INDEX.recurrence_m_category, {single_val(follow_up.recurrence_m_category)} +DONOR.INDEX.followups.INDEX.recurrence_stage_group, {single_val(follow_up.recurrence_stage_group)} diff --git a/site_datasets/c3g/muhc_mappings.py b/site_datasets/c3g/muhc_mappings.py new file mode 100644 index 0000000..7ec1a79 --- /dev/null +++ b/site_datasets/c3g/muhc_mappings.py @@ -0,0 +1,99 @@ +import os +import sys +# Include src/ directory in the module search path. +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(current_dir) +grandparent_dir = os.path.dirname(parent_dir) +sys.path.append(os.sep.join([grandparent_dir, "src"])) +import clinical_etl.mappings as mappings + +def map_primary_site(data_values): + """Converts ICD-O codes into textual body locations, consistent with the schema's PrimarySiteEnum. + Args: + data_values: values dict with single pipe-delimited string, e.g. "a|b|c" + Returns: + a list of strings e.g. ["Skin","Lip","Gum"] + """ + ICDO_dict = { + 'C31': 'Accessory sinuses', + 'C74': 'Adrenal gland', + 'C21': 'Anus and anal canal', + 'C01': 'Base of tongue', + 'C67': 'Bladder', + 'C40': 'Bones, joints and articular cartilage of limbs', + 'C41': 'Bones, joints and articular cartilage of other and unspecified sites', + 'C71': 'Brain', + 'C50': 'Breast', + 'C34': 'Bronchus and lung', + 'C53': 'Cervix uteri', + 'C18': 'Colon', + 'C49': 'Connective, subcutaneous and other soft tissues', + 'C54': 'Corpus uteri', + 'C15': 'Esophagus', + 'C69': 'Eye and adnexa', + 'C04': 'Floor of mouth', + 'C23': 'Gallbladder', + 'C03': 'Gum', + 'C38': 'Heart, mediastinum, and pleura', + 'C42': 'Hematopoietic and reticuloendothelial systems', + 'C13': 'Hypopharynx', + 'C64': 'Kidney', + 'C32': 'Larynx', + 'C00': 'Lip', + 'C22': 'Liver and intrahepatic bile ducts', + 'C77': 'Lymph nodes', + 'C70': 'Meninges', + 'C30': 'Nasal cavity and middle ear', + 'C11': 'Nasopharynx', + 'C10': 'Oropharynx', + 'C26': 'Other and ill-defined digestive organs', + 'C76': 'Other and ill-defined sites', + 'C14': 'Other and ill-defined sites in lip oral cavity and pharynx', + 'C39': 'Other and ill-defined sites within respiratory system and intrathoracic organs', + 'C57': 'Other and unspecified female genital organs', + 'C08': 'Other and unspecified major salivary glands', + 'C63': 'Other and unspecified male genital organs', + 'C24': 'Other and unspecified parts of biliary tract', + 'C06': 'Other and unspecified parts of mouth', + 'C02': 'Other and unspecified parts of tongue', + 'C68': 'Other and unspecified urinary organs', + 'C75': 'Other endocrine glands and related structures', + 'C56': 'Ovary', + 'C05': 'Palate', + 'C25': 'Pancreas', + 'C07': 'Parotid gland', + 'C60': 'Penis', + 'C47': 'Peripheral nerves and autonomic nervous system', + 'C58': 'Placenta', + 'C61': 'Prostate gland', + 'C12': 'Pyriform sinus', + 'C19': 'Rectosigmoid junction', + 'C20': 'Rectum', + 'C65': 'Renal pelvis', + 'C48': 'Retroperitoneum and peritoneum', + 'C44': 'Skin', + 'C17': 'Small intestine', + 'C72': 'Spinal cord, cranial nerves,and other parts of the nervous system', + 'C16': 'Stomach', + 'C62': 'Testis', + 'C37': 'Thymus', + 'C73': 'Thyroid gland', + 'C09': 'Tonsil', + 'C33': 'Trachea', + 'C66': 'Ureter', + 'C66': 'Uterus, NOS', + 'C52': 'Vagina', + 'C51': 'Vulva', + 'C80': 'Unknown primary site', + } + # Check for Null input + mapping_vals = mappings.pipe_delim(data_values) + if mapping_vals is None: + return None + if len(mapping_vals) == 0: + return None + result = [] + for mapping_val in mapping_vals: + if mapping_val in ICDO_dict: + result.append(ICDO_dict[mapping_val]) + return result