Merge branch 'develop' into mshadbolt/reference-date-warnings

CanDIG · Nov 8, 2024 · 85e4b5c · 85e4b5c
2 parents e69f63c + 3ae6c6f
commit 85e4b5c
Show file tree

Hide file tree

Showing 11 changed files with 21 additions and 26 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -19,7 +19,6 @@ jobs:
       - name: Install dependencies
         run: |
           pip install -r requirements.txt
-          python -m pip install -e .
       - name: Test with pytest
         run: |
           pytest

diff --git a/README.md b/README.md
@@ -45,12 +45,6 @@ Install the repo's requirements in your virtual environment
 pip install -r requirements.txt
 ```
 
->[!NOTE]
-> If Python can't find the `clinical_etl` module when running `CSVConvert`, install the depencency manually:
-> ```
-> pip install -e clinical_ETL_code/
-> ```
-
 Before running the script, you will need to have your input files, this will be clinical data in a tabular format (`xlsx`/`csv`) that can be read into program and a cohort directory containing the files that define the schema and mapping configurations.
 
 ### Input file/s format
@@ -65,7 +59,7 @@ If you are working with exports from RedCap, the sample files in the [`sample_in
 
 ### Setting up a cohort directory
 
-For each dataset (cohort) that you want to convert, create a directory outside of this repository. For CanDIG devs, this will be in the private `data` repository. This cohort directory should contain the same files as shown in the [`sample_inputs/generic_example`](sample_inputs/generic_example) directory, which are:
+For each dataset (cohort) that you want to convert, create a directory outside of this repository. For CanDIG devs, this will be in the private `clinical_ETL_data` repository. This cohort directory should contain the same files as shown in the [`sample_inputs/generic_example`](sample_inputs/generic_example) directory, which are:
 
 * a [`manifest.yml`](#Manifest-file) file with configuration settings for the mapping and schema validation
 * a [mapping template](#Mapping-template) csv that lists custom mappings for each field (based on `moh_template.csv`)

diff --git a/src/clinical_ETL.egg-info/PKG-INFO b/src/clinical_ETL.egg-info/PKG-INFO
@@ -84,7 +84,7 @@ If you are working with exports from RedCap, the sample files in the [`sample_in
 
 ### Setting up a cohort directory
 
-For each dataset (cohort) that you want to convert, create a directory outside of this repository. For CanDIG devs, this will be in the private `data` repository. This cohort directory should contain the same files as shown in the [`sample_inputs/generic_example`](sample_inputs/generic_example) directory, which are:
+For each dataset (cohort) that you want to convert, create a directory outside of this repository. For CanDIG devs, this will be in the private `clinical_ETL_data` repository. This cohort directory should contain the same files as shown in the [`sample_inputs/generic_example`](sample_inputs/generic_example) directory, which are:
 
 * a [`manifest.yml`](#Manifest-file) file with configuration settings for the mapping and schema validation
 * a [mapping template](#Mapping-template) csv that lists custom mappings for each field (based on `moh_template.csv`)

diff --git a/src/clinical_etl/CSVConvert.py b/src/clinical_etl/CSVConvert.py
@@ -12,11 +12,7 @@
 import yaml
 import argparse
 from tqdm import tqdm
-from clinical_etl import mappings
-# Include clinical_etl parent directory in the module search path.
-current_dir = os.path.dirname(os.path.abspath(__file__))
-parent_dir = os.path.dirname(current_dir)
-sys.path.append(parent_dir)
+import mappings
 
 
 def verbose_print(message):
@@ -277,7 +273,7 @@ def eval_mapping(node_name, rownum):
     """
     verbose_print(f"  Evaluating {mappings.IDENTIFIER}: {node_name}")
     if "mappings" not in mappings.MODULES:
-        mappings.MODULES["mappings"] = importlib.import_module("clinical_etl.mappings")
+        mappings.MODULES["mappings"] = importlib.import_module("mappings")
     modulename = "mappings"
 
     method, parameters = parse_mapping_function(node_name)
@@ -596,7 +592,7 @@ def load_manifest(manifest_file):
 
     # programatically load schema class based on manifest value:
     # schema class definition will be in a file named schema_class.lower()
-    schema_mod = importlib.import_module(f"clinical_etl.{schema_class.lower()}")
+    schema_mod = importlib.import_module(f"{schema_class.lower()}")
     schema = getattr(schema_mod, schema_class)(manifest["schema"])
     if schema.json_schema is None:
         sys.exit(f"Could not read an openapi schema at {manifest['schema']};\n"
@@ -633,7 +629,7 @@ def load_manifest(manifest_file):
                     f"{manifest_dir} and has the correct name.\n---")
                 sys.exit(e)
     # mappings is a standard module: add it
-    mappings.MODULES["mappings"] = importlib.import_module("clinical_etl.mappings")
+    mappings.MODULES["mappings"] = importlib.import_module("mappings")
     return result
 
 

diff --git a/src/clinical_etl/__init__.py b/src/clinical_etl/__init__.py
@@ -0,0 +1,3 @@
+# Allows relative imports from current directory to work.
+import os, sys
+sys.path.append(os.path.dirname(os.path.realpath(__file__)))
diff --git a/src/clinical_etl/generate_mapping_docs.py b/src/clinical_etl/generate_mapping_docs.py
@@ -1,3 +1,9 @@
+# Updates the ../../mapping_functions.md
+# Prior to running, set the PYTHONPATH for use by the subprocess with:
+# export PYTHONPATH="$PWD"
+# Then run:
+# python generate_mapping_docs.py
+
 import subprocess
 
 

diff --git a/src/clinical_etl/genomicschema.py b/src/clinical_etl/genomicschema.py
@@ -1,6 +1,6 @@
 import json
 import dateparser
-from clinical_etl.schema import BaseSchema, ValidationError
+from schema import BaseSchema, ValidationError
 
 
 """

diff --git a/src/clinical_etl/mohschemav2.py b/src/clinical_etl/mohschemav2.py
@@ -1,6 +1,6 @@
 import json
 import dateparser
-from clinical_etl.schema import BaseSchema, ValidationError
+from schema import BaseSchema, ValidationError
 
 
 """

diff --git a/src/clinical_etl/mohschemav3.py b/src/clinical_etl/mohschemav3.py
@@ -1,6 +1,6 @@
 import json
 import dateparser
-from clinical_etl.schema import BaseSchema, ValidationError
+from schema import BaseSchema, ValidationError
 
 
 """

diff --git a/src/clinical_etl/validate_coverage.py b/src/clinical_etl/validate_coverage.py
@@ -4,17 +4,15 @@
 import mappings
 import importlib.util
 import os
-# Include clinical_etl parent directory in the module search path for a later import.
-current_dir = os.path.dirname(os.path.abspath(__file__))
-parent_dir = os.path.dirname(current_dir)
-sys.path.append(parent_dir)
 # from jsoncomparison import Compare
 # from copy import deepcopy
 # import yaml
 # import jsonschema
 # import os
 # import re
 # import CSVConvert
+import mohschemav2
+import mohschemav3
 
 
 def parse_args():
@@ -215,8 +213,7 @@ def validate_coverage(map_json, verbose=False):
     schema_class = "MoHSchemaV3"
     if "schema_class" in map_json:
         schema_class = map_json["schema_class"]
-    schema_mod = importlib.import_module(f"clinical_etl.{schema_class.lower()}")
-    schema = getattr(schema_mod, schema_class)(map_json["openapi_url"])
+    schema = getattr(eval(schema_class.lower()), schema_class)(map_json["openapi_url"])
 
     if schema.json_schema is None:
         sys.exit(f"Did not find an openapi schema at {map_json['openapi_url']}; please check the 'openapi_url' in the map json file.")

diff --git a/update_moh_template.sh b/update_moh_template.sh