diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1c3cf2e..5156f5a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -19,7 +19,6 @@ jobs: - name: Install dependencies run: | pip install -r requirements.txt - python -m pip install -e . - name: Test with pytest run: | pytest diff --git a/README.md b/README.md index f5afbf3..91b980a 100644 --- a/README.md +++ b/README.md @@ -45,12 +45,6 @@ Install the repo's requirements in your virtual environment pip install -r requirements.txt ``` ->[!NOTE] -> If Python can't find the `clinical_etl` module when running `CSVConvert`, install the depencency manually: -> ``` -> pip install -e clinical_ETL_code/ -> ``` - Before running the script, you will need to have your input files, this will be clinical data in a tabular format (`xlsx`/`csv`) that can be read into program and a cohort directory containing the files that define the schema and mapping configurations. ### Input file/s format @@ -65,7 +59,7 @@ If you are working with exports from RedCap, the sample files in the [`sample_in ### Setting up a cohort directory -For each dataset (cohort) that you want to convert, create a directory outside of this repository. For CanDIG devs, this will be in the private `data` repository. This cohort directory should contain the same files as shown in the [`sample_inputs/generic_example`](sample_inputs/generic_example) directory, which are: +For each dataset (cohort) that you want to convert, create a directory outside of this repository. For CanDIG devs, this will be in the private `clinical_ETL_data` repository. This cohort directory should contain the same files as shown in the [`sample_inputs/generic_example`](sample_inputs/generic_example) directory, which are: * a [`manifest.yml`](#Manifest-file) file with configuration settings for the mapping and schema validation * a [mapping template](#Mapping-template) csv that lists custom mappings for each field (based on `moh_template.csv`) diff --git a/src/clinical_ETL.egg-info/PKG-INFO b/src/clinical_ETL.egg-info/PKG-INFO index fad2ec5..415ea0b 100644 --- a/src/clinical_ETL.egg-info/PKG-INFO +++ b/src/clinical_ETL.egg-info/PKG-INFO @@ -84,7 +84,7 @@ If you are working with exports from RedCap, the sample files in the [`sample_in ### Setting up a cohort directory -For each dataset (cohort) that you want to convert, create a directory outside of this repository. For CanDIG devs, this will be in the private `data` repository. This cohort directory should contain the same files as shown in the [`sample_inputs/generic_example`](sample_inputs/generic_example) directory, which are: +For each dataset (cohort) that you want to convert, create a directory outside of this repository. For CanDIG devs, this will be in the private `clinical_ETL_data` repository. This cohort directory should contain the same files as shown in the [`sample_inputs/generic_example`](sample_inputs/generic_example) directory, which are: * a [`manifest.yml`](#Manifest-file) file with configuration settings for the mapping and schema validation * a [mapping template](#Mapping-template) csv that lists custom mappings for each field (based on `moh_template.csv`) diff --git a/src/clinical_etl/CSVConvert.py b/src/clinical_etl/CSVConvert.py index ae5b31e..7fddfd4 100644 --- a/src/clinical_etl/CSVConvert.py +++ b/src/clinical_etl/CSVConvert.py @@ -12,11 +12,7 @@ import yaml import argparse from tqdm import tqdm -from clinical_etl import mappings -# Include clinical_etl parent directory in the module search path. -current_dir = os.path.dirname(os.path.abspath(__file__)) -parent_dir = os.path.dirname(current_dir) -sys.path.append(parent_dir) +import mappings def verbose_print(message): @@ -277,7 +273,7 @@ def eval_mapping(node_name, rownum): """ verbose_print(f" Evaluating {mappings.IDENTIFIER}: {node_name}") if "mappings" not in mappings.MODULES: - mappings.MODULES["mappings"] = importlib.import_module("clinical_etl.mappings") + mappings.MODULES["mappings"] = importlib.import_module("mappings") modulename = "mappings" method, parameters = parse_mapping_function(node_name) @@ -596,7 +592,7 @@ def load_manifest(manifest_file): # programatically load schema class based on manifest value: # schema class definition will be in a file named schema_class.lower() - schema_mod = importlib.import_module(f"clinical_etl.{schema_class.lower()}") + schema_mod = importlib.import_module(f"{schema_class.lower()}") schema = getattr(schema_mod, schema_class)(manifest["schema"]) if schema.json_schema is None: sys.exit(f"Could not read an openapi schema at {manifest['schema']};\n" @@ -633,7 +629,7 @@ def load_manifest(manifest_file): f"{manifest_dir} and has the correct name.\n---") sys.exit(e) # mappings is a standard module: add it - mappings.MODULES["mappings"] = importlib.import_module("clinical_etl.mappings") + mappings.MODULES["mappings"] = importlib.import_module("mappings") return result diff --git a/src/clinical_etl/__init__.py b/src/clinical_etl/__init__.py index e69de29..8cff63f 100644 --- a/src/clinical_etl/__init__.py +++ b/src/clinical_etl/__init__.py @@ -0,0 +1,3 @@ +# Allows relative imports from current directory to work. +import os, sys +sys.path.append(os.path.dirname(os.path.realpath(__file__))) \ No newline at end of file diff --git a/src/clinical_etl/generate_mapping_docs.py b/src/clinical_etl/generate_mapping_docs.py index c0c7b19..4aef862 100644 --- a/src/clinical_etl/generate_mapping_docs.py +++ b/src/clinical_etl/generate_mapping_docs.py @@ -1,3 +1,9 @@ +# Updates the ../../mapping_functions.md +# Prior to running, set the PYTHONPATH for use by the subprocess with: +# export PYTHONPATH="$PWD" +# Then run: +# python generate_mapping_docs.py + import subprocess diff --git a/src/clinical_etl/genomicschema.py b/src/clinical_etl/genomicschema.py index 9a4e548..12ea505 100644 --- a/src/clinical_etl/genomicschema.py +++ b/src/clinical_etl/genomicschema.py @@ -1,6 +1,6 @@ import json import dateparser -from clinical_etl.schema import BaseSchema, ValidationError +from schema import BaseSchema, ValidationError """ diff --git a/src/clinical_etl/mohschemav2.py b/src/clinical_etl/mohschemav2.py index 71733b7..0847e4b 100644 --- a/src/clinical_etl/mohschemav2.py +++ b/src/clinical_etl/mohschemav2.py @@ -1,6 +1,6 @@ import json import dateparser -from clinical_etl.schema import BaseSchema, ValidationError +from schema import BaseSchema, ValidationError """ diff --git a/src/clinical_etl/mohschemav3.py b/src/clinical_etl/mohschemav3.py index e1cd6c1..8476ae5 100644 --- a/src/clinical_etl/mohschemav3.py +++ b/src/clinical_etl/mohschemav3.py @@ -1,6 +1,6 @@ import json import dateparser -from clinical_etl.schema import BaseSchema, ValidationError +from schema import BaseSchema, ValidationError """ diff --git a/src/clinical_etl/validate_coverage.py b/src/clinical_etl/validate_coverage.py index 0d4d67c..0626eff 100644 --- a/src/clinical_etl/validate_coverage.py +++ b/src/clinical_etl/validate_coverage.py @@ -4,10 +4,6 @@ import mappings import importlib.util import os -# Include clinical_etl parent directory in the module search path for a later import. -current_dir = os.path.dirname(os.path.abspath(__file__)) -parent_dir = os.path.dirname(current_dir) -sys.path.append(parent_dir) # from jsoncomparison import Compare # from copy import deepcopy # import yaml @@ -15,6 +11,8 @@ # import os # import re # import CSVConvert +import mohschemav2 +import mohschemav3 def parse_args(): @@ -215,8 +213,7 @@ def validate_coverage(map_json, verbose=False): schema_class = "MoHSchemaV3" if "schema_class" in map_json: schema_class = map_json["schema_class"] - schema_mod = importlib.import_module(f"clinical_etl.{schema_class.lower()}") - schema = getattr(schema_mod, schema_class)(map_json["openapi_url"]) + schema = getattr(eval(schema_class.lower()), schema_class)(map_json["openapi_url"]) if schema.json_schema is None: sys.exit(f"Did not find an openapi schema at {map_json['openapi_url']}; please check the 'openapi_url' in the map json file.") diff --git a/update_moh_template.sh b/update_moh_template.sh old mode 100644 new mode 100755