From 077b6170ce8a760ad49d21bc31b91eb71dcdd7f2 Mon Sep 17 00:00:00 2001 From: David Brownlee Date: Fri, 25 Oct 2024 10:00:38 -0400 Subject: [PATCH 1/2] Trying to fix .github/workflow Compare to moh_v3_template.csv test. Incomplete. --- .github/workflows/test.yml | 14 ++++++++------ README.md | 2 +- src/clinical_etl/generate_schema.py | 4 ++-- tests/moh_diffs.txt | 4 ++++ update_moh_template.sh | 2 +- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5156f5a..a0175c5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -22,14 +22,16 @@ jobs: - name: Test with pytest run: | pytest - - name: Compare moh_template.csv + - name: Compare to moh_v3_template.csv shell: bash {0} + # Script based largely on update_moh_template.sh run: | - python generate_schema.py - diff template.csv moh_template.csv > curr_diff.txt - bytes=$(head -5 curr_diff.txt | wc -c) - dd if=curr_diff.txt bs="$bytes" skip=1 conv=notrunc of=new_diff.txt - diff new_diff.txt test_data/moh_diffs.txt + python src/clinical_etl/generate_schema.py --out tmp_template + diff tmp_template.csv moh_v3_template.csv > tests/moh_diffs.txt + rm tmp_template.csv + bytes=$(head -5 tests/moh_diffs.txt | wc -c) + dd if=tests/moh_diffs.txt bs="$bytes" skip=1 conv=notrunc of=tests/moh_diffs1.txt + diff tests/moh_diffs1.txt tests/moh_diffs.txt if [[ $? == 1 ]]; then echo MoH template checking needs to be updated! See https://github.com/CanDIG/clinical_ETL_code#mapping-template for information. exit 1 fi \ No newline at end of file diff --git a/README.md b/README.md index 91b980a..70f9230 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ You'll need to create a mapping template that defines the mapping between the fi Each line in the mapping template is composed of comma separated values with two components. The first value is an `element` or field from the target schema and the second value contains a suggested `mapping method` or function to map a field from an input sheet to a valid value for the identified `element`. Each `element`, shows the full object linking path to each field required by the model. These values should not be edited. -If you are generating a mapping for the current CanDIG MoH model, you can use the pre-generated [`moh_template.csv`](moh_template.csv) file. This file is modified from the auto-generated template to update a few fields that require specific handling. +If you are generating a mapping for the current CanDIG MoH model, you can use the pre-generated [`moh_v3_template.csv`](moh_v3_template.csv) file. This file is modified from the auto-generated template to update a few fields that require specific handling. You will need to edit the `mapping method` values in each line in the following ways: 1. Replace the generic sheet names (e.g. `DONOR_SHEET`, `SAMPLE_REGISTRATIONS_SHEET`) with the sheet/csv names you are using as your input to `CSVConvert.py` diff --git a/src/clinical_etl/generate_schema.py b/src/clinical_etl/generate_schema.py index 4da6914..1808219 100644 --- a/src/clinical_etl/generate_schema.py +++ b/src/clinical_etl/generate_schema.py @@ -21,8 +21,8 @@ def parse_args(): default="https://raw.githubusercontent.com/CanDIG/katsu/develop/chord_metadata_service/mohpackets/docs/schemas/schema.json") parser.add_argument('--schema', type=str, help="Name of schema class", default="MoHSchemaV3") parser.add_argument('--out', type=str, - help="name of output file; csv extension will be added. Default is template", - default="template") + help="name of output file; csv extension will be added. Default is moh_template", + default="moh_template") args = parser.parse_args() return args diff --git a/tests/moh_diffs.txt b/tests/moh_diffs.txt index fcf6f49..196bc70 100644 --- a/tests/moh_diffs.txt +++ b/tests/moh_diffs.txt @@ -37,3 +37,7 @@ < DONOR.INDEX.biomarkers.INDEX.pr_percent_positive, {floating(BIOMARKERS_SHEET.pr_percent_positive)} --- > DONOR.INDEX.biomarkers.INDEX.pr_percent_positive, {set_neg_99_blank_float(BIOMARKERS_SHEET.pr_percent_positive)} +161c161 +< DONOR.INDEX.followups.INDEX, {indexed_on(FOLLOWUPS_SHEET.submitter_donor_id)} +--- +> DONOR.INDEX.followups.INDEX, {moh_indexed_on_donor_if_others_absent(FOLLOWUPS_SHEET.submitter_donor_id)} diff --git a/update_moh_template.sh b/update_moh_template.sh index 6d7b7b2..3c8c3d4 100755 --- a/update_moh_template.sh +++ b/update_moh_template.sh @@ -3,7 +3,7 @@ python src/clinical_etl/generate_schema.py --out tmp_template diff tmp_template.csv moh_v3_template.csv > tests/moh_diffs.txt +rm tmp_template.csv bytes=$(head -5 tests/moh_diffs.txt | wc -c) dd if=tests/moh_diffs.txt bs="$bytes" skip=1 conv=notrunc of=tests/moh_diffs1.txt mv tests/moh_diffs1.txt tests/moh_diffs.txt -rm tmp_template.csv From 4da62907d39729a2e2854cf9fcd3c8aa0f93faf6 Mon Sep 17 00:00:00 2001 From: David Brownlee Date: Fri, 25 Oct 2024 15:04:32 -0400 Subject: [PATCH 2/2] github workflow corrected. --- .github/workflows/test.yml | 13 ++++++------- update_moh_template.sh | 13 +++++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a0175c5..f80aa57 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,14 +24,13 @@ jobs: pytest - name: Compare to moh_v3_template.csv shell: bash {0} - # Script based largely on update_moh_template.sh run: | - python src/clinical_etl/generate_schema.py --out tmp_template - diff tmp_template.csv moh_v3_template.csv > tests/moh_diffs.txt - rm tmp_template.csv - bytes=$(head -5 tests/moh_diffs.txt | wc -c) - dd if=tests/moh_diffs.txt bs="$bytes" skip=1 conv=notrunc of=tests/moh_diffs1.txt - diff tests/moh_diffs1.txt tests/moh_diffs.txt + # Script based largely on update_moh_template.sh + python src/clinical_etl/generate_schema.py --out moh_template + diff moh_template.csv moh_v3_template.csv > curr_diff.txt + bytes=$(head -5 curr_diff.txt | wc -c) + dd if=curr_diff.txt bs="$bytes" skip=1 conv=notrunc of=new_diff.txt + diff new_diff.txt tests/moh_diffs.txt if [[ $? == 1 ]]; then echo MoH template checking needs to be updated! See https://github.com/CanDIG/clinical_ETL_code#mapping-template for information. exit 1 fi \ No newline at end of file diff --git a/update_moh_template.sh b/update_moh_template.sh index 3c8c3d4..95f4e8c 100755 --- a/update_moh_template.sh +++ b/update_moh_template.sh @@ -1,9 +1,10 @@ #!/usr/bin/env bash +# Updates the moh_template based on the schema. +# Manual differences are recorded in tests/moh_diffs.txt - -python src/clinical_etl/generate_schema.py --out tmp_template -diff tmp_template.csv moh_v3_template.csv > tests/moh_diffs.txt -rm tmp_template.csv -bytes=$(head -5 tests/moh_diffs.txt | wc -c) -dd if=tests/moh_diffs.txt bs="$bytes" skip=1 conv=notrunc of=tests/moh_diffs1.txt +python src/clinical_etl/generate_schema.py --out moh_template +diff moh_template.csv moh_v3_template.csv > curr_diff.txt +bytes=$(head -5 curr_diff.txt | wc -c) +dd if=curr_diff.txt bs="$bytes" skip=1 conv=notrunc of=tests/moh_diffs1.txt mv tests/moh_diffs1.txt tests/moh_diffs.txt +rm curr_diff.txt