Skip to content

Commit

Permalink
Merge pull request #68 from CanDIG/yavyx/moh-v3
Browse files Browse the repository at this point in the history
DIG-1684: Update clinical_etl to v3 clinical data model
  • Loading branch information
yavyx authored Aug 9, 2024
2 parents 75046bf + 59c0cfe commit b849e87
Show file tree
Hide file tree
Showing 25 changed files with 874 additions and 83 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ The `manifest.yml` file contains settings for the cohort mapping. There is a sam
| mapping | the mapping template csv file that lists the mappings for each field based on `moh_template.csv`, assumed to be in the same directory as the `manifest.yml` file |
| identifier | the unique identifier for the donor or root node |
| schema | a URL to the openapi schema file |
| schema_class | The name of the class in the schema used as the model for creating the map.json. Currently supported: `MoHSchema` - for clinical MoH data and `GenomicSchema` for creating a genomic ingest linking file. |
| schema_class | The name of the class in the schema used as the model for creating the map.json. Currently supported: `MoHSchemaV2` and `MoHSchemaV3` - for clinical MoH data and `GenomicSchema` for creating a genomic ingest linking file. |
| reference_date | a reference date used to calculate date intervals, formatted as a mapping entry for the mapping template |
| date_format | Specify the format of the dates in your input data. Use any combination of the characters `DMY`to specify the order (e.g. `DMY`, `MDY`, `YMD`, etc). |
| functions | A list of one or more filenames containing additional mapping functions, can be omitted if not needed. Assumed to be in the same directory as the `manifest.yml` file |
Expand Down Expand Up @@ -121,6 +121,7 @@ usage: generate_schema.py [-h] --url URL [--out OUT]
options:
-h, --help show this help message and exit
--url URL URL to openAPI schema file (raw github link)
--schema Name of schema class. Default is MoHSchemaV3
--out OUT name of output file; csv extension will be added. Default is template
```
</details>
Expand Down
File renamed without changes.
168 changes: 168 additions & 0 deletions moh_v3_template.csv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion sample_inputs/generic_example/manifest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ identifier: submitter_donor_id
# a link to the openapi schema
schema: https://raw.githubusercontent.com/CanDIG/katsu/develop/chord_metadata_service/mohpackets/docs/schema.yml
# class of schema for validation:
schema_class: MoHSchema
schema_class: MoHSchemaV3
# a reference date used to calculate date intervals, formatted as a mapping entry for the mapping template
reference_date: earliest_date(Donor.date_resolution, PrimaryDiagnosis.date_of_diagnosis)
# one or more files (dataset_functions.py) that implement the mappings
Expand Down
2 changes: 1 addition & 1 deletion src/clinical_etl/CSVConvert.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ def check_for_sheet_inconsistencies(template_sheets, csv_sheets):
def load_manifest(manifest_file):
"""Given a manifest file's path, return the data inside it."""
identifier = None
schema_class = "MoHSchema"
schema_class = "MoHSchemaV2"
mapping_path = None
result = {}
try:
Expand Down
5 changes: 3 additions & 2 deletions src/clinical_etl/generate_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@
import pandas
import sys
import argparse
from mohschema import MoHSchema
from mohschemav2 import MoHSchemaV2
from mohschemav3 import MoHSchemaV3
import re


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--url', type=str, help="URL to openAPI schema file (raw github link)", default="https://raw.githubusercontent.com/CanDIG/katsu/develop/chord_metadata_service/mohpackets/docs/schema.yml")
parser.add_argument('--schema', type=str, help="Name of schema class", default="MoHSchema")
parser.add_argument('--schema', type=str, help="Name of schema class", default="MoHSchemaV3")
parser.add_argument('--out', type=str, help="name of output file; csv extension will be added. Default is template", default="template")
args = parser.parse_args()
return args
Expand Down
2 changes: 1 addition & 1 deletion src/clinical_etl/mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def int_to_date_interval_json(data_values):
return
# Either month or day date resolutions are permitted.
try:
resolution = INDEXED_DATA["data"]["CALCULATED"][IDENTIFIER]["date_resolution"][0]
resolution = INDEXED_DATA["data"]["Donor"][IDENTIFIER]["date_resolution"][0]
except KeyError:
raise MappingError("No date_resolution found to specify date interval resolution: is there a date_resolution specified in the donor file?", field_level=2)
# Format as JSON. Always include a month_interval. day_interval is optional.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
A class for the representation of a DonorWithClinicalData (MoHCCN data model v2) object in Katsu.
"""

class MoHSchema(BaseSchema):
class MoHSchemaV2(BaseSchema):
schema_name = "DonorWithClinicalDataSchema"
base_name = "DONOR"

Expand Down
Loading

0 comments on commit b849e87

Please sign in to comment.