Skip to content

Commit

Permalink
Revert "Add index and minify command line arguments (#55)"
Browse files Browse the repository at this point in the history
This reverts commit 8a01ff9.
  • Loading branch information
mshadbolt authored Mar 4, 2024
1 parent 8a01ff9 commit 8c93c09
Showing 1 changed file with 9 additions and 24 deletions.
33 changes: 9 additions & 24 deletions src/clinical_etl/CSVConvert.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ def parse_args():
parser.add_argument('--manifest', type=str, required=True, help="Path to a manifest file describing the mapping. See README for more information")
parser.add_argument('--test', action="store_true", help="Use exact template specified in manifest: do not remove extra lines")
parser.add_argument('--verbose', '--v', action="store_true", help="Print extra information, useful for debugging and understanding how the code runs.")
parser.add_argument('--index', '--i', action="store_true", help="Output 'indexed' file, useful for debugging and seeing relationships.")
parser.add_argument('--minify', action="store_true", help="Remove white space and line breaks from json outputs to reduce file size. Less readable for humans.")
args = parser.parse_args()
return args

Expand Down Expand Up @@ -621,7 +619,7 @@ def load_manifest(manifest_file):
return result


def csv_convert(input_path, manifest_file, minify=False, index_output=False, verbose=False):
def csv_convert(input_path, manifest_file, verbose=False):
mappings.VERBOSE = verbose
# read manifest data
print("Starting conversion")
Expand Down Expand Up @@ -653,12 +651,8 @@ def csv_convert(input_path, manifest_file, minify=False, index_output=False, ver

print("Indexing data")
mappings.INDEXED_DATA = process_data(raw_csv_dfs)
if index_output:
with open(f"{mappings.OUTPUT_FILE}_indexed.json", 'w') as f:
if minify:
json.dump(mappings.INDEXED_DATA, f)
else:
json.dump(mappings.INDEXED_DATA, f, indent=4)
with open(f"{mappings.OUTPUT_FILE}_indexed.json", 'w') as f:
json.dump(mappings.INDEXED_DATA, f, indent=4)

# if verbose flag is set, warn if column name is present in multiple sheets:
for col in mappings.INDEXED_DATA["columns"]:
Expand Down Expand Up @@ -699,12 +693,9 @@ def csv_convert(input_path, manifest_file, minify=False, index_output=False, ver
if mappings._pop_from_stack() is not None:
raise Exception(
f"Stack not empty\n{mappings.IDENTIFIER_FIELD}: {mappings.IDENTIFIER}\n {mappings.INDEX_STACK}")
if index_output:
with open(f"{mappings.OUTPUT_FILE}_indexed.json", 'w') as f:
if minify:
json.dump(mappings.INDEXED_DATA, f)
else:
json.dump(mappings.INDEXED_DATA, f, indent=4)

with open(f"{mappings.OUTPUT_FILE}_indexed.json", 'w') as f:
json.dump(mappings.INDEXED_DATA, f, indent=4)

result_key = list(schema.validation_schema.keys()).pop(0)

Expand All @@ -716,21 +707,15 @@ def csv_convert(input_path, manifest_file, minify=False, index_output=False, ver
if schema.katsu_sha is not None:
result["katsu_sha"] = schema.katsu_sha
with open(f"{mappings.OUTPUT_FILE}_map.json", 'w') as f: # write to json file for ingestion
if minify:
json.dump(result, f)
else:
json.dump(result, f, indent=4)
json.dump(result, f, indent=4)

# add validation data:
schema.validate_ingest_map(result)
result["validation_errors"] = schema.validation_errors
result["validation_warnings"] = schema.validation_warnings
result["statistics"] = schema.statistics
with open(f"{mappings.OUTPUT_FILE}_map.json", 'w') as f: # write to json file for ingestion
if minify:
json.dump(result, f)
else:
json.dump(result, f, indent=4)
json.dump(result, f, indent=4)

if len(result["validation_warnings"]) > 0:
print(
Expand All @@ -747,4 +732,4 @@ def csv_convert(input_path, manifest_file, minify=False, index_output=False, ver
args = parse_args()
input_path = args.input
manifest_file = args.manifest
csv_convert(input_path, manifest_file, minify=args.minify, index_output=args.index, verbose=args.verbose)
csv_convert(input_path, manifest_file, verbose=args.verbose)

0 comments on commit 8c93c09

Please sign in to comment.