Skip to content

Commit

Permalink
Add index and minify command line arguments (#55)
Browse files Browse the repository at this point in the history
* add minification option

* add index arg

* check before opening file

* fix args

* Update src/clinical_etl/CSVConvert.py

Co-authored-by: Daisie Huang <[email protected]>

---------

Co-authored-by: Daisie Huang <[email protected]>
  • Loading branch information
mshadbolt and daisieh authored Mar 4, 2024
1 parent 815cbfc commit 8a01ff9
Showing 1 changed file with 24 additions and 9 deletions.
33 changes: 24 additions & 9 deletions src/clinical_etl/CSVConvert.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def parse_args():
parser.add_argument('--manifest', type=str, required=True, help="Path to a manifest file describing the mapping. See README for more information")
parser.add_argument('--test', action="store_true", help="Use exact template specified in manifest: do not remove extra lines")
parser.add_argument('--verbose', '--v', action="store_true", help="Print extra information, useful for debugging and understanding how the code runs.")
parser.add_argument('--index', '--i', action="store_true", help="Output 'indexed' file, useful for debugging and seeing relationships.")
parser.add_argument('--minify', action="store_true", help="Remove white space and line breaks from json outputs to reduce file size. Less readable for humans.")
args = parser.parse_args()
return args

Expand Down Expand Up @@ -619,7 +621,7 @@ def load_manifest(manifest_file):
return result


def csv_convert(input_path, manifest_file, verbose=False):
def csv_convert(input_path, manifest_file, minify=False, index_output=False, verbose=False):
mappings.VERBOSE = verbose
# read manifest data
print("Starting conversion")
Expand Down Expand Up @@ -651,8 +653,12 @@ def csv_convert(input_path, manifest_file, verbose=False):

print("Indexing data")
mappings.INDEXED_DATA = process_data(raw_csv_dfs)
with open(f"{mappings.OUTPUT_FILE}_indexed.json", 'w') as f:
json.dump(mappings.INDEXED_DATA, f, indent=4)
if index_output:
with open(f"{mappings.OUTPUT_FILE}_indexed.json", 'w') as f:
if minify:
json.dump(mappings.INDEXED_DATA, f)
else:
json.dump(mappings.INDEXED_DATA, f, indent=4)

# if verbose flag is set, warn if column name is present in multiple sheets:
for col in mappings.INDEXED_DATA["columns"]:
Expand Down Expand Up @@ -693,9 +699,12 @@ def csv_convert(input_path, manifest_file, verbose=False):
if mappings._pop_from_stack() is not None:
raise Exception(
f"Stack not empty\n{mappings.IDENTIFIER_FIELD}: {mappings.IDENTIFIER}\n {mappings.INDEX_STACK}")

with open(f"{mappings.OUTPUT_FILE}_indexed.json", 'w') as f:
json.dump(mappings.INDEXED_DATA, f, indent=4)
if index_output:
with open(f"{mappings.OUTPUT_FILE}_indexed.json", 'w') as f:
if minify:
json.dump(mappings.INDEXED_DATA, f)
else:
json.dump(mappings.INDEXED_DATA, f, indent=4)

result_key = list(schema.validation_schema.keys()).pop(0)

Expand All @@ -707,15 +716,21 @@ def csv_convert(input_path, manifest_file, verbose=False):
if schema.katsu_sha is not None:
result["katsu_sha"] = schema.katsu_sha
with open(f"{mappings.OUTPUT_FILE}_map.json", 'w') as f: # write to json file for ingestion
json.dump(result, f, indent=4)
if minify:
json.dump(result, f)
else:
json.dump(result, f, indent=4)

# add validation data:
schema.validate_ingest_map(result)
result["validation_errors"] = schema.validation_errors
result["validation_warnings"] = schema.validation_warnings
result["statistics"] = schema.statistics
with open(f"{mappings.OUTPUT_FILE}_map.json", 'w') as f: # write to json file for ingestion
json.dump(result, f, indent=4)
if minify:
json.dump(result, f)
else:
json.dump(result, f, indent=4)

if len(result["validation_warnings"]) > 0:
print(
Expand All @@ -732,4 +747,4 @@ def csv_convert(input_path, manifest_file, verbose=False):
args = parse_args()
input_path = args.input
manifest_file = args.manifest
csv_convert(input_path, manifest_file, verbose=args.verbose)
csv_convert(input_path, manifest_file, minify=args.minify, index_output=args.index, verbose=args.verbose)

0 comments on commit 8a01ff9

Please sign in to comment.