Skip to content

Commit

Permalink
Suppress Excessive Errors in File Check Script (#20)
Browse files Browse the repository at this point in the history
* converted empty_criteria list to int dtype and added more breaks to limit outputted error count

* changed int to bool
  • Loading branch information
jp3477 authored Nov 22, 2021
1 parent 9b05310 commit fffb648
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 20 deletions.
9 changes: 7 additions & 2 deletions omop_file_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,12 @@ def find_blank_lines(f):
:rtype: list
"""
df = pd.read_csv(f)
indices = df.index[df.apply(
indices = []
empty_criteria = df.apply(
lambda row: all(row.apply(lambda col: pd.isnull(col))),
axis=1)].tolist()
axis=1).astype(bool)

indices = df.index[empty_criteria].tolist()

return [i + 1 for i in indices]

Expand Down Expand Up @@ -243,10 +246,12 @@ def check_csv_format(f, column_names):
'Please replace newline "\\n" characters with space " "' % (str(idx), line)
print(newline_msg)
results.append([newline_msg, None, None])
break
if len(line) != len(column_names):
column_mismatch_msg = 'Incorrect number of columns on line %s: %s' % (
str(idx), line)
results.append([column_mismatch_msg, None, None])
break
except (ValueError, csv.Error):
print(traceback.format_exc())
if not line:
Expand Down
2 changes: 0 additions & 2 deletions tests/resources/examples_erroneous/errors/results.csv
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
"drug_exposure.csv","Drug Exposure","Column missing in file","visit_detail_id","","visit_detail_id"
"drug_exposure.csv","Drug Exposure","Column not in expected order","drug_concept_id","drug_concept_id","person_id"
"death.csv","Death","Incorrect number of columns on line 2: ['', '', '', '', '', '']","","",""
"death.csv","Death","Incorrect number of columns on line 3: ['', '', '', '', '', '']","","",""
"death.csv","Death","File contains blank lines on lines 1,2. If there is no data, please only submit the header line.","","",""
"device_exposure.csv","Device Exposure","Please add/fix incorrect headers at the top of the file, enclosed in double quotes","","['device_exposure_id', 'person_id', 'device_concept_id', 'device_exposure_start_date', 'device_exposure_start_datetime', 'device_exposure_end_date', 'device_exposure_end_datetime', 'device_type_concept_id', 'unique_device_id', 'quantity', 'provider_id', 'visit_occurrence_id', 'device_source_value', 'device_source_concept_id']","['device_exposure_id', 'person_id', 'device_concept_id', 'device_exposure_start_date', 'device_exposure_start_datetime', 'device_exposure_end_date', 'device_exposure_end_datetime', 'device_type_concept_id', 'unique_device_id', 'quantity', 'provider_id', 'visit_occurrence_id', 'visit_detail_id', 'device_source_value', 'device_source_concept_id']"
"device_exposure.csv","Device Exposure","Incorrect number of columns on line 2: ['11', '2', '31', '2020-01-01', '2020-01-01 01:00:00', '', '', '4', '', '5', '6', '7', 'Supply/PACK BASIC:Supply/PACK BASIC', '']","","",""
"device_exposure.csv","Device Exposure","Incorrect number of columns on line 3: ['12', '2', '32', '2020-02-02', '2020-02-02 02:00:00', '2020-02-03', '2020-02-03 03:00:00', '4', '', ' ', ' ', ' 8', '77334:PR 77334 TREATMENT DEVICES, DESIGN AND CONST', '10']","","",""
"device_exposure.csv","Device Exposure","Column missing in file","visit_detail_id","","visit_detail_id"
"device_exposure.csv","Device Exposure","Column not in expected order","device_source_value","device_source_value","visit_detail_id"
"measurement.csv","Measurement","NULL values are not allowed for column","person_id","",""
Expand Down
16 changes: 0 additions & 16 deletions tests/resources/examples_erroneous/errors/results.html
Original file line number Diff line number Diff line change
Expand Up @@ -122,14 +122,6 @@ <h1>Local File Validation Error Results</h1><table id="dataframe" style="width:8
<td></td>
<td></td>
</tr>
<tr>
<td>death.csv</td>
<td>Death</td>
<td>Incorrect number of columns on line 3: ['', '', '', '', '', '']</td>
<td></td>
<td></td>
<td></td>
</tr>
<tr>
<td>death.csv</td>
<td>Death</td>
Expand All @@ -154,14 +146,6 @@ <h1>Local File Validation Error Results</h1><table id="dataframe" style="width:8
<td></td>
<td></td>
</tr>
<tr>
<td>device_exposure.csv</td>
<td>Device Exposure</td>
<td>Incorrect number of columns on line 3: ['12', '2', '32', '2020-02-02', '2020-02-02 02:00:00', '2020-02-03', '2020-02-03 03:00:00', '4', '', ' ', ' ', ' 8', '77334:PR 77334 TREATMENT DEVICES, DESIGN AND CONST', '10']</td>
<td></td>
<td></td>
<td></td>
</tr>
<tr>
<td>device_exposure.csv</td>
<td>Device Exposure</td>
Expand Down

0 comments on commit fffb648

Please sign in to comment.