Skip to content

Commit

Permalink
Merge branch 'develop' into mshadbolt/reference-date-warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
yavyx authored Nov 8, 2024
2 parents e69f63c + 3ae6c6f commit 85e4b5c
Show file tree
Hide file tree
Showing 11 changed files with 21 additions and 26 deletions.
1 change: 0 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ jobs:
- name: Install dependencies
run: |
pip install -r requirements.txt
python -m pip install -e .
- name: Test with pytest
run: |
pytest
Expand Down
8 changes: 1 addition & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,6 @@ Install the repo's requirements in your virtual environment
pip install -r requirements.txt
```

>[!NOTE]
> If Python can't find the `clinical_etl` module when running `CSVConvert`, install the depencency manually:
> ```
> pip install -e clinical_ETL_code/
> ```
Before running the script, you will need to have your input files, this will be clinical data in a tabular format (`xlsx`/`csv`) that can be read into program and a cohort directory containing the files that define the schema and mapping configurations.

### Input file/s format
Expand All @@ -65,7 +59,7 @@ If you are working with exports from RedCap, the sample files in the [`sample_in

### Setting up a cohort directory

For each dataset (cohort) that you want to convert, create a directory outside of this repository. For CanDIG devs, this will be in the private `data` repository. This cohort directory should contain the same files as shown in the [`sample_inputs/generic_example`](sample_inputs/generic_example) directory, which are:
For each dataset (cohort) that you want to convert, create a directory outside of this repository. For CanDIG devs, this will be in the private `clinical_ETL_data` repository. This cohort directory should contain the same files as shown in the [`sample_inputs/generic_example`](sample_inputs/generic_example) directory, which are:

* a [`manifest.yml`](#Manifest-file) file with configuration settings for the mapping and schema validation
* a [mapping template](#Mapping-template) csv that lists custom mappings for each field (based on `moh_template.csv`)
Expand Down
2 changes: 1 addition & 1 deletion src/clinical_ETL.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ If you are working with exports from RedCap, the sample files in the [`sample_in

### Setting up a cohort directory

For each dataset (cohort) that you want to convert, create a directory outside of this repository. For CanDIG devs, this will be in the private `data` repository. This cohort directory should contain the same files as shown in the [`sample_inputs/generic_example`](sample_inputs/generic_example) directory, which are:
For each dataset (cohort) that you want to convert, create a directory outside of this repository. For CanDIG devs, this will be in the private `clinical_ETL_data` repository. This cohort directory should contain the same files as shown in the [`sample_inputs/generic_example`](sample_inputs/generic_example) directory, which are:

* a [`manifest.yml`](#Manifest-file) file with configuration settings for the mapping and schema validation
* a [mapping template](#Mapping-template) csv that lists custom mappings for each field (based on `moh_template.csv`)
Expand Down
12 changes: 4 additions & 8 deletions src/clinical_etl/CSVConvert.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,7 @@
import yaml
import argparse
from tqdm import tqdm
from clinical_etl import mappings
# Include clinical_etl parent directory in the module search path.
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
import mappings


def verbose_print(message):
Expand Down Expand Up @@ -277,7 +273,7 @@ def eval_mapping(node_name, rownum):
"""
verbose_print(f" Evaluating {mappings.IDENTIFIER}: {node_name}")
if "mappings" not in mappings.MODULES:
mappings.MODULES["mappings"] = importlib.import_module("clinical_etl.mappings")
mappings.MODULES["mappings"] = importlib.import_module("mappings")
modulename = "mappings"

method, parameters = parse_mapping_function(node_name)
Expand Down Expand Up @@ -596,7 +592,7 @@ def load_manifest(manifest_file):

# programatically load schema class based on manifest value:
# schema class definition will be in a file named schema_class.lower()
schema_mod = importlib.import_module(f"clinical_etl.{schema_class.lower()}")
schema_mod = importlib.import_module(f"{schema_class.lower()}")
schema = getattr(schema_mod, schema_class)(manifest["schema"])
if schema.json_schema is None:
sys.exit(f"Could not read an openapi schema at {manifest['schema']};\n"
Expand Down Expand Up @@ -633,7 +629,7 @@ def load_manifest(manifest_file):
f"{manifest_dir} and has the correct name.\n---")
sys.exit(e)
# mappings is a standard module: add it
mappings.MODULES["mappings"] = importlib.import_module("clinical_etl.mappings")
mappings.MODULES["mappings"] = importlib.import_module("mappings")
return result


Expand Down
3 changes: 3 additions & 0 deletions src/clinical_etl/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Allows relative imports from current directory to work.
import os, sys
sys.path.append(os.path.dirname(os.path.realpath(__file__)))
6 changes: 6 additions & 0 deletions src/clinical_etl/generate_mapping_docs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Updates the ../../mapping_functions.md
# Prior to running, set the PYTHONPATH for use by the subprocess with:
# export PYTHONPATH="$PWD"
# Then run:
# python generate_mapping_docs.py

import subprocess


Expand Down
2 changes: 1 addition & 1 deletion src/clinical_etl/genomicschema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import dateparser
from clinical_etl.schema import BaseSchema, ValidationError
from schema import BaseSchema, ValidationError


"""
Expand Down
2 changes: 1 addition & 1 deletion src/clinical_etl/mohschemav2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import dateparser
from clinical_etl.schema import BaseSchema, ValidationError
from schema import BaseSchema, ValidationError


"""
Expand Down
2 changes: 1 addition & 1 deletion src/clinical_etl/mohschemav3.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import dateparser
from clinical_etl.schema import BaseSchema, ValidationError
from schema import BaseSchema, ValidationError


"""
Expand Down
9 changes: 3 additions & 6 deletions src/clinical_etl/validate_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,15 @@
import mappings
import importlib.util
import os
# Include clinical_etl parent directory in the module search path for a later import.
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
# from jsoncomparison import Compare
# from copy import deepcopy
# import yaml
# import jsonschema
# import os
# import re
# import CSVConvert
import mohschemav2
import mohschemav3


def parse_args():
Expand Down Expand Up @@ -215,8 +213,7 @@ def validate_coverage(map_json, verbose=False):
schema_class = "MoHSchemaV3"
if "schema_class" in map_json:
schema_class = map_json["schema_class"]
schema_mod = importlib.import_module(f"clinical_etl.{schema_class.lower()}")
schema = getattr(schema_mod, schema_class)(map_json["openapi_url"])
schema = getattr(eval(schema_class.lower()), schema_class)(map_json["openapi_url"])

if schema.json_schema is None:
sys.exit(f"Did not find an openapi schema at {map_json['openapi_url']}; please check the 'openapi_url' in the map json file.")
Expand Down
Empty file modified update_moh_template.sh
100644 → 100755
Empty file.

0 comments on commit 85e4b5c

Please sign in to comment.