From 995356456a761661e76b08a3b38dad09f941cc07 Mon Sep 17 00:00:00 2001 From: Daisie Huang Date: Tue, 31 Oct 2023 17:46:50 -0700 Subject: [PATCH 1/3] comment out radiation/surgery counts --- mohschema.py | 12 ++++++------ test_data_ingest.py | 5 ++--- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/mohschema.py b/mohschema.py index d00edc8..3343450 100644 --- a/mohschema.py +++ b/mohschema.py @@ -359,9 +359,9 @@ def validate_immunotherapies(self, map_json): def validate_radiations(self, map_json): - index = self.validation_schema["radiations"]["extra_args"]["index"] - if index > 0: - self.fail("Only one radiation is allowed per treatment") + # index = self.validation_schema["radiations"]["extra_args"]["index"] + # if index > 0: + # self.fail("Only one radiation is allowed per treatment") for prop in map_json: match prop: @@ -373,9 +373,9 @@ def validate_radiations(self, map_json): def validate_surgeries(self, map_json): specimen_ids = self.validation_schema["primary_diagnoses"]["extra_args"]["specimen_ids"] - index = self.validation_schema["surgeries"]["extra_args"]["index"] - if index > 0: - self.fail("Only one surgery is allowed per treatment") + # index = self.validation_schema["surgeries"]["extra_args"]["index"] + # if index > 0: + # self.fail("Only one surgery is allowed per treatment") if "submitter_specimen_id" not in map_json: if "surgery_site" not in map_json or map_json["surgery_site"] is None: diff --git a/test_data_ingest.py b/test_data_ingest.py index 6b393ca..51d4fe1 100644 --- a/test_data_ingest.py +++ b/test_data_ingest.py @@ -114,9 +114,8 @@ def test_validation(packets, schema): # DONOR_5 > PD_5 > TR_10: treatment type Immunotherapy should have one or more immunotherapies submitted print(schema.validation_errors) - assert len(schema.validation_errors) == 3 - # should be the following 3 errors: - # DONOR_5 > PD_5 > TR_5 > Radiation 1: Only one radiation is allowed per treatment + assert len(schema.validation_errors) == 2 + # should be the following 2 errors: # DONOR_6 > PD_6 > TR_9 > Surgery 0: submitter_specimen_id SPECIMEN_43 does not correspond to one of the available specimen_ids ['SPECIMEN_3'] # Duplicated IDs: in schema followups, FOLLOW_UP_4 occurs 2 times From 12b515e16b47d55c71562af7e41c12dd74969d33 Mon Sep 17 00:00:00 2001 From: Daisie Huang Date: Mon, 6 Nov 2023 22:03:52 -0800 Subject: [PATCH 2/3] add statistics to validation output --- validate_coverage.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/validate_coverage.py b/validate_coverage.py index b317951..070c764 100644 --- a/validate_coverage.py +++ b/validate_coverage.py @@ -202,10 +202,13 @@ def validate_coverage(map_json, input_path=None, verbose=False): mappings.VERBOSE = True # read the schema and generate a scaffold + if "openapi_url" not in map_json: + print("No openapi_url schema available") + return None schema = MoHSchema(map_json["openapi_url"]) if schema is None: print(f"Did not find an openapi schema at {map_json['openapi_url']}; please check link") - return + return None # if --input was specified, we can check data frame completeness coverage: # if input_path is not None: @@ -221,7 +224,8 @@ def validate_coverage(map_json, input_path=None, verbose=False): # print(json.dumps(schema.validation_results, indent=4)) return { "errors": schema.validation_errors, - "warnings": schema.validation_warnings + "warnings": schema.validation_warnings, + "statistics": schema.statistics } def main(args): From 97c31b51118a6538c878d5aff77d09be618fe00d Mon Sep 17 00:00:00 2001 From: Daisie Huang Date: Mon, 6 Nov 2023 22:19:32 -0800 Subject: [PATCH 3/3] better errors --- validate_coverage.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/validate_coverage.py b/validate_coverage.py index 070c764..df0d17b 100644 --- a/validate_coverage.py +++ b/validate_coverage.py @@ -203,12 +203,10 @@ def validate_coverage(map_json, input_path=None, verbose=False): # read the schema and generate a scaffold if "openapi_url" not in map_json: - print("No openapi_url schema available") - return None + return {"message": "No openapi_url schema available"} schema = MoHSchema(map_json["openapi_url"]) if schema is None: - print(f"Did not find an openapi schema at {map_json['openapi_url']}; please check link") - return None + return {"message": f"Did not find an openapi schema at {map_json['openapi_url']}; please check link"} # if --input was specified, we can check data frame completeness coverage: # if input_path is not None: