diff --git a/src/clinical_etl/CSVConvert.py b/src/clinical_etl/CSVConvert.py index 19bdd68..08256fe 100644 --- a/src/clinical_etl/CSVConvert.py +++ b/src/clinical_etl/CSVConvert.py @@ -32,8 +32,6 @@ def parse_args(): parser.add_argument('--manifest', type=str, required=True, help="Path to a manifest file describing the mapping. See README for more information") parser.add_argument('--test', action="store_true", help="Use exact template specified in manifest: do not remove extra lines") parser.add_argument('--verbose', '--v', action="store_true", help="Print extra information, useful for debugging and understanding how the code runs.") - parser.add_argument('--index', '--i', action="store_true", help="Output 'indexed' file, useful for debugging and seeing relationships.") - parser.add_argument('--minify', action="store_true", help="Remove white space and line breaks from json outputs to reduce file size. Less readable for humans.") args = parser.parse_args() return args @@ -621,7 +619,7 @@ def load_manifest(manifest_file): return result -def csv_convert(input_path, manifest_file, minify=False, index_output=False, verbose=False): +def csv_convert(input_path, manifest_file, verbose=False): mappings.VERBOSE = verbose # read manifest data print("Starting conversion") @@ -653,12 +651,8 @@ def csv_convert(input_path, manifest_file, minify=False, index_output=False, ver print("Indexing data") mappings.INDEXED_DATA = process_data(raw_csv_dfs) - if index_output: - with open(f"{mappings.OUTPUT_FILE}_indexed.json", 'w') as f: - if minify: - json.dump(mappings.INDEXED_DATA, f) - else: - json.dump(mappings.INDEXED_DATA, f, indent=4) + with open(f"{mappings.OUTPUT_FILE}_indexed.json", 'w') as f: + json.dump(mappings.INDEXED_DATA, f, indent=4) # if verbose flag is set, warn if column name is present in multiple sheets: for col in mappings.INDEXED_DATA["columns"]: @@ -699,12 +693,9 @@ def csv_convert(input_path, manifest_file, minify=False, index_output=False, ver if mappings._pop_from_stack() is not None: raise Exception( f"Stack not empty\n{mappings.IDENTIFIER_FIELD}: {mappings.IDENTIFIER}\n {mappings.INDEX_STACK}") - if index_output: - with open(f"{mappings.OUTPUT_FILE}_indexed.json", 'w') as f: - if minify: - json.dump(mappings.INDEXED_DATA, f) - else: - json.dump(mappings.INDEXED_DATA, f, indent=4) + + with open(f"{mappings.OUTPUT_FILE}_indexed.json", 'w') as f: + json.dump(mappings.INDEXED_DATA, f, indent=4) result_key = list(schema.validation_schema.keys()).pop(0) @@ -716,10 +707,7 @@ def csv_convert(input_path, manifest_file, minify=False, index_output=False, ver if schema.katsu_sha is not None: result["katsu_sha"] = schema.katsu_sha with open(f"{mappings.OUTPUT_FILE}_map.json", 'w') as f: # write to json file for ingestion - if minify: - json.dump(result, f) - else: - json.dump(result, f, indent=4) + json.dump(result, f, indent=4) # add validation data: schema.validate_ingest_map(result) @@ -727,10 +715,7 @@ def csv_convert(input_path, manifest_file, minify=False, index_output=False, ver result["validation_warnings"] = schema.validation_warnings result["statistics"] = schema.statistics with open(f"{mappings.OUTPUT_FILE}_map.json", 'w') as f: # write to json file for ingestion - if minify: - json.dump(result, f) - else: - json.dump(result, f, indent=4) + json.dump(result, f, indent=4) if len(result["validation_warnings"]) > 0: print( @@ -747,4 +732,4 @@ def csv_convert(input_path, manifest_file, minify=False, index_output=False, ver args = parse_args() input_path = args.input manifest_file = args.manifest - csv_convert(input_path, manifest_file, minify=args.minify, index_output=args.index, verbose=args.verbose) + csv_convert(input_path, manifest_file, verbose=args.verbose)