From 9a195d7dde675e2138f2a9663c264be413c0e3ca Mon Sep 17 00:00:00 2001 From: Julian Date: Tue, 29 Oct 2024 11:24:59 -0400 Subject: [PATCH 1/4] add HTML support to document ingestion workflow --- chord_metadata_service/chord/workflows/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chord_metadata_service/chord/workflows/metadata.py b/chord_metadata_service/chord/workflows/metadata.py index 81e416568..74d91b999 100644 --- a/chord_metadata_service/chord/workflows/metadata.py +++ b/chord_metadata_service/chord/workflows/metadata.py @@ -147,7 +147,7 @@ def boolean_input(id_: str, required: bool = True): wm.WorkflowFileArrayInput( id="document_files", required=True, - pattern=r"^.*\.(pdf|csv|tsv|txt|docx|xlsx|jpeg|jpg|png|gif|md|markdown|mp3|m4a|mp4)$", + pattern=r"^.*\.(pdf|csv|tsv|txt|docx|xlsx|jpeg|jpg|png|gif|md|html|markdown|mp3|m4a|mp4)$", ), ], )) From 491d13eefb3d689dcb51cce545933592401afeda Mon Sep 17 00:00:00 2001 From: Julian Date: Tue, 29 Oct 2024 11:25:39 -0400 Subject: [PATCH 2/4] expand experiment result schema for html --- chord_metadata_service/experiments/schemas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index cad38f7c4..0ab643503 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -86,7 +86,7 @@ "file_format": { "type": "string", "enum": ["SAM", "BAM", "CRAM", "VCF", "BCF", "MAF", "GVCF", "BigWig", "BigBed", "FASTA", "FASTQ", "TAB", - "SRA", "SRF", "SFF", "GFF", "PDF", "CSV", "TSV", "JPEG", "PNG", "GIF", "MARKDOWN", "MP3", "M4A", + "SRA", "SRF", "SFF", "GFF", "PDF", "CSV", "TSV", "JPEG", "PNG", "GIF", "HTML", "MARKDOWN", "MP3", "M4A", "MP4", "DOCX", "XLS", "XLSX", "UNKNOWN", "OTHER"] }, "data_output_type": { From 3967cafb642bb9024cb6e10b0e12ec92d9984c25 Mon Sep 17 00:00:00 2001 From: Julian Date: Tue, 29 Oct 2024 11:32:32 -0400 Subject: [PATCH 3/4] lint --- chord_metadata_service/experiments/schemas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index 0ab643503..798a7decf 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -86,8 +86,8 @@ "file_format": { "type": "string", "enum": ["SAM", "BAM", "CRAM", "VCF", "BCF", "MAF", "GVCF", "BigWig", "BigBed", "FASTA", "FASTQ", "TAB", - "SRA", "SRF", "SFF", "GFF", "PDF", "CSV", "TSV", "JPEG", "PNG", "GIF", "HTML", "MARKDOWN", "MP3", "M4A", - "MP4", "DOCX", "XLS", "XLSX", "UNKNOWN", "OTHER"] + "SRA", "SRF", "SFF", "GFF", "PDF", "CSV", "TSV", "JPEG", "PNG", "GIF", "HTML", "MARKDOWN", + "MP3", "M4A", "MP4", "DOCX", "XLS", "XLSX", "UNKNOWN", "OTHER"] }, "data_output_type": { "type": "string", From 1ba0364616f2712bdb1d97db6dbcb311fe9240c2 Mon Sep 17 00:00:00 2001 From: Julian Date: Tue, 29 Oct 2024 11:57:52 -0400 Subject: [PATCH 4/4] reorder files extension in regex pattern --- chord_metadata_service/chord/workflows/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chord_metadata_service/chord/workflows/metadata.py b/chord_metadata_service/chord/workflows/metadata.py index 74d91b999..48a353e87 100644 --- a/chord_metadata_service/chord/workflows/metadata.py +++ b/chord_metadata_service/chord/workflows/metadata.py @@ -147,7 +147,7 @@ def boolean_input(id_: str, required: bool = True): wm.WorkflowFileArrayInput( id="document_files", required=True, - pattern=r"^.*\.(pdf|csv|tsv|txt|docx|xlsx|jpeg|jpg|png|gif|md|html|markdown|mp3|m4a|mp4)$", + pattern=r"^.*\.(pdf|csv|tsv|txt|docx|xlsx|jpeg|jpg|png|gif|md|markdown|html|mp3|m4a|mp4)$", ), ], ))