diff --git a/omop_file_validator.py b/omop_file_validator.py
index f52b237..69d1a75 100644
--- a/omop_file_validator.py
+++ b/omop_file_validator.py
@@ -160,6 +160,22 @@ def find_error_in_file(column_name, cdm_column_type, submission_column_type,
return index
+def find_blank_lines(f):
+ """Check for rows in a csv file with only empty values
+
+ :param f: A file object
+ :type f: file-like object
+ :return: List of rows with all empty values
+ :rtype: list
+ """
+ df = pd.read_csv(f)
+ indices = df.index[df.apply(
+ lambda row: all(row.apply(lambda col: pd.isnull(col))),
+ axis=1)].tolist()
+
+ return [i + 1 for i in indices]
+
+
def check_csv_format(f, column_names):
results = []
idx = 1
@@ -255,6 +271,18 @@ def run_checks(file_path, f):
]
f.seek(0)
+ blank_lines = find_blank_lines(f)
+ if blank_lines:
+ blank_lines_str = ",".join(map(str, blank_lines))
+ line_str = 'lines' if len(blank_lines) > 1 else 'line'
+ blank_lines_msg = f'File contains blank {line_str} on {line_str} {blank_lines_str}. ' \
+ 'If there is no data, please only submit the header line.'
+
+ result['errors'].append(dict(message=blank_lines_msg))
+ return result
+
+ f.seek(0)
+
# check columns if looks good process file
if not _check_columns(cdm_column_names, csv_columns, result):
return result
diff --git a/tests/resources/examples_erroneous/death.csv b/tests/resources/examples_erroneous/death.csv
new file mode 100644
index 0000000..b53712a
--- /dev/null
+++ b/tests/resources/examples_erroneous/death.csv
@@ -0,0 +1,3 @@
+"person_id","death_date","death_datetime","death_type_concept_id","cause_concept_id","cause_source_value","cause_source_concept_id"
+,,,,,
+,,,,,
\ No newline at end of file
diff --git a/tests/resources/examples_erroneous/errors/results.csv b/tests/resources/examples_erroneous/errors/results.csv
index 2cfbedf..c371fd6 100644
--- a/tests/resources/examples_erroneous/errors/results.csv
+++ b/tests/resources/examples_erroneous/errors/results.csv
@@ -9,8 +9,13 @@
"drug_exposure.csv","Drug Exposure","Column not in table definition","drug_id","drug_id",""
"drug_exposure.csv","Drug Exposure","Column missing in file","person_id","","person_id"
"drug_exposure.csv","Drug Exposure","Column not in expected order","drug_concept_id","drug_concept_id","person_id"
-"measurement.csv","Measurement","Type mismatch line number 3","person_id","","integer"
+"death.csv","Death","Incorrect number of columns on line 2: ['', '', '', '', '', '']","","",""
+"death.csv","Death","Incorrect number of columns on line 3: ['', '', '', '', '', '']","","",""
+"death.csv","Death","File contains blank lines on lines 1,2. If there is no data, please only submit the header line.","","",""
"measurement.csv","Measurement","NULL values are not allowed for column","person_id","",""
+"measurement.csv","Measurement","Invalid timestamp format. Expecting ""YYYY-MM-DD hh:mm:ss"": line numbers (1,2,3,4,5)","measurement_datetime","",""
"person.csv","Person","Please add/fix incorrect headers at the top of the file, enclosed in double quotes","","['person_id', 'gender_concept_id', 'year_of_birth', 'month_of_birth', 'birth_datetime', 'day_of_birth', 'race_concept_id', 'ethnicity_concept_id', 'location_id', 'provider_id', 'care_site_id', 'person_source_value', 'gender_source_value', 'gender_source_concept_id', 'race_source_value', 'race_source_concept_id', 'ethnicity_source_value', 'ethnicity_source_concept_id']","['person_id', 'gender_concept_id', 'year_of_birth', 'month_of_birth', 'day_of_birth', 'birth_datetime', 'race_concept_id', 'ethnicity_concept_id', 'location_id', 'provider_id', 'care_site_id', 'person_source_value', 'gender_source_value', 'gender_source_concept_id', 'race_source_value', 'race_source_concept_id', 'ethnicity_source_value', 'ethnicity_source_concept_id']"
"person.csv","Person","Column not in expected order","birth_datetime","birth_datetime","day_of_birth"
+"observation.csv","Observation","Invalid date format. Expecting ""YYYY-MM-DD"": line numbers (4,5)","observation_date","",""
+"observation.csv","Observation","Invalid timestamp format. Expecting ""YYYY-MM-DD hh:mm:ss"": line numbers (1,3,5)","observation_datetime","",""
"observation.csv","Observation","Type mismatch line number 3","observation_type_concept_id","unknown","integer"
diff --git a/tests/resources/examples_erroneous/errors/results.html b/tests/resources/examples_erroneous/errors/results.html
index 443b9ee..043f7d3 100644
--- a/tests/resources/examples_erroneous/errors/results.html
+++ b/tests/resources/examples_erroneous/errors/results.html
@@ -146,19 +146,43 @@
Local File Validation Error Results
+ measurement_datetime |
|
|
@@ -178,6 +202,22 @@ Local File Validation Error Results
+ observation_date |
+ |
+ |
+
+
+ observation.csv |
+ Observation |
+ Invalid timestamp format. Expecting "YYYY-MM-DD hh:mm:ss": line numbers (1,3,5) |
+ observation_datetime |
+ |
+ |
+
observation.csv |
Observation |