From c4a1624f6f07ae5806c81005a40bc346a1b52651 Mon Sep 17 00:00:00 2001 From: David Brownlee Date: Fri, 9 Feb 2024 16:21:13 -0500 Subject: [PATCH 1/4] Module path changes to allow running standalone (not as an included module.) --- src/clinical_etl/CSVConvert.py | 10 +++++++--- tests/test_data_ingest.py | 9 +++++++-- tests/testmap.py | 6 ++++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/clinical_etl/CSVConvert.py b/src/clinical_etl/CSVConvert.py index 4a7c8d6..08256fe 100644 --- a/src/clinical_etl/CSVConvert.py +++ b/src/clinical_etl/CSVConvert.py @@ -1,17 +1,21 @@ #!/usr/bin/env python # coding: utf-8 +import sys +import os from copy import deepcopy import importlib.util import json -from clinical_etl import mappings -import os import pandas import csv import re -import sys import yaml import argparse +# Include clinical_etl parent directory in the module search path. +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(current_dir) +sys.path.append(parent_dir) +from clinical_etl import mappings def verbose_print(message): diff --git a/tests/test_data_ingest.py b/tests/test_data_ingest.py index 455c220..65d4a16 100644 --- a/tests/test_data_ingest.py +++ b/tests/test_data_ingest.py @@ -1,9 +1,14 @@ import pytest import yaml +import os +import sys +import json +# Include src/clinical_etl directory in the module search path. +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(current_dir) +sys.path.append(os.sep.join([parent_dir, "src"])) from clinical_etl import CSVConvert from clinical_etl import mappings -import json -import os from clinical_etl.mohschema import MoHSchema # read sheet from given data pathway diff --git a/tests/testmap.py b/tests/testmap.py index 95ef12d..c78abcb 100644 --- a/tests/testmap.py +++ b/tests/testmap.py @@ -1,3 +1,9 @@ +import os +import sys +# Include src/ directory in the module search path. +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(current_dir) +sys.path.append(os.sep.join([parent_dir, "src"])) import clinical_etl.mappings def indexed_on_if_absent(data_values): From 96d624957325cb1b78b9fa2a79c714d1050e7f72 Mon Sep 17 00:00:00 2001 From: David Brownlee Date: Mon, 12 Feb 2024 12:54:24 -0500 Subject: [PATCH 2/4] module import path fixed for validate_coverage.py as well. --- README.md | 2 +- src/clinical_etl/validate_coverage.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 06b5750..1684f2c 100644 --- a/README.md +++ b/README.md @@ -225,7 +225,7 @@ You can validate the generated json mapping file against the MoH data model. The ``` $ python src/clinical_etl/validate_coverage.py -h -validate_coverage.py [-h] [--input map.json] [--manifest MAPPING] +usage: validate_coverage.py [-h] --json JSON [--verbose] options: -h, --help show this help message and exit diff --git a/src/clinical_etl/validate_coverage.py b/src/clinical_etl/validate_coverage.py index 97ebe21..30ceb14 100644 --- a/src/clinical_etl/validate_coverage.py +++ b/src/clinical_etl/validate_coverage.py @@ -3,6 +3,11 @@ import sys import mappings import importlib.util +import os +# Include clinical_etl parent directory in the module search path for a later import. +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(current_dir) +sys.path.append(parent_dir) # from jsoncomparison import Compare # from copy import deepcopy # import yaml From c90eca02d4f6a5b0245f515fd2a16bca88272747 Mon Sep 17 00:00:00 2001 From: David Brownlee Date: Mon, 19 Feb 2024 10:44:00 -0500 Subject: [PATCH 3/4] Updte sample_inputs to work as standalone (not imported library). --- sample_inputs/new_cohort.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sample_inputs/new_cohort.py b/sample_inputs/new_cohort.py index 13a00ed..ef8b5e5 100644 --- a/sample_inputs/new_cohort.py +++ b/sample_inputs/new_cohort.py @@ -1,5 +1,12 @@ -## Additional mappings customised to my special cohort +import os +import sys +# Include src/ directory in the module search path. +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(current_dir) +sys.path.append(os.sep.join([parent_dir, "src"])) +import clinical_etl.mappings +## Additional mappings customised to my special cohort def sex(data_value): # make sure we only have one value mapping_val = mappings.single_val(data_value) From e25e0cf7b49abc4ed602f8f500f098d5220953af Mon Sep 17 00:00:00 2001 From: David Brownlee Date: Mon, 19 Feb 2024 12:22:13 -0500 Subject: [PATCH 4/4] Added documentation clarity for writing and using custom functions. --- mapping_functions.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/mapping_functions.md b/mapping_functions.md index 850ea6a..ba22a03 100644 --- a/mapping_functions.md +++ b/mapping_functions.md @@ -66,15 +66,23 @@ A detailed index of all standard functions can be viewed below in the [Standard ## Writing your own custom functions -If the data cannot be transformed with one of the standard functions, you can define your own. In your data directory (the one that contains `manifest.yml`) create a python file (let's assume you called it `new_cohort.py`) and add the name of that file as the `mapping` entry in the manifest. +If the data cannot be transformed with one of the standard functions, you can define your own. -Following the format in the generic `mappings.py`, write your own functions in your python file for how to translate the data. To specify a custom mapping function in the template: +In your data directory (the one that contains `manifest.yml`) create a python file (let's assume you called it `new_cohort.py`) and add the name of that file as a .yml list after `functions` in the manifest. For example: +``` +functions: + - new_cohort +``` + +Following the format in the generic `mappings.py`, write your own functions in your python file to translate the data. + +To use a custom mapping function in the template, you must specify the file and function using dot-separated notation: -`DONOR.INDEX.primary_diagnoses.INDEX.basis_of_diagnosis,{new_cohort.custom_function(DATA_SHEET.field_name)}` +DONOR.INDEX.primary_diagnoses.INDEX.basis_of_diagnosis,{**new_cohort.custom_function**(DATA_SHEET.field_name)} Examples: -To map input values to output values (in case your data capture used different values than the model): +Map input values to output values (in case your data capture used different values than the model): ``` def sex(data_value):