diff --git a/README.md b/README.md index c031286..bb97dbf 100644 --- a/README.md +++ b/README.md @@ -184,7 +184,7 @@ These files are hosted publicly in each of the cloud backends; see `backends/${b | Type | Name | Description | Notes | | :- | :- | :- | :- | | String | name | Reference name; used to name outputs (e.g., "GRCh38") | | -| [IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl) | fasta | Reference genome and index | | +| Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)] | fastas | Reference genomes and associatedindex | | ## Other inputs @@ -209,9 +209,9 @@ These files will be output if `cohort.samples[sample]` is set to `true` for any | Array[Array[File]?] | assembly_noseq_gfas | Assembly graphs in [GFA format](https://github.com/chhylp123/hifiasm/blob/master/docs/source/interpreting-output.rst). | | | Array[Array[File]?] | assembly_lowQ_beds | Coordinates of low quality regions in BED format. | | | Array[Array[File]?] | assembly_stats | Assembly size and NG50 stats generated by [calN50](https://github.com/lh3/calN50). | | -| Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)?] | asm_bam | [minimap2](https://github.com/lh3/minimap2) alignment of assembly to reference. | | -| Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)?] | htsbox_vcf | Naive pileup variant calling of assembly against reference with [`htsbox`](https://github.com/lh3/htsbox) | | -| Array[File?] | htsbox_vcf_stats | [`bcftools stats`](https://samtools.github.io/bcftools/bcftools.html#stats) summary statistics for `htsbox` variant calls | | +| Array[Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)?]] | asm_bam | [minimap2](https://github.com/lh3/minimap2) alignment of assembly to reference. | | +| Array[Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)?]] | htsbox_vcf | Naive pileup variant calling of assembly against reference with [`htsbox`](https://github.com/lh3/htsbox) | | +| Array[Array[File?]] | htsbox_vcf_stats | [`bcftools stats`](https://samtools.github.io/bcftools/bcftools.html#stats) summary statistics for `htsbox` variant calls | | ## De novo assembly - trio @@ -223,7 +223,7 @@ These files will be output if `cohort.de_novo_assembly_trio` is set to `true` an | Array[Array[File]]? | trio_assembly_noseq_gfas | Assembly graphs in [GFA format](https://github.com/chhylp123/hifiasm/blob/master/docs/source/interpreting-output.rst). | | | Array[Array[File]]? | trio_assembly_lowQ_beds | Coordinates of low quality regions in BED format. | | | Array[Array[File]]? | trio_assembly_stats | Assembly size and NG50 stats generated by [calN50](https://github.com/lh3/calN50). | | -| Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)]? | trio_asm_bams | [minimap2](https://github.com/lh3/minimap2) alignment of assembly to reference. | | +| Array[Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)]?] | trio_asm_bams | [minimap2](https://github.com/lh3/minimap2) alignment of assembly to reference. | | | Array[Map[String, String]]? | haplotype_key | Indication of which haplotype (`hap1`/`hap2`) corresponds to which parent. | | # Tool versions and Docker images diff --git a/backends/aws/inputs.aws.json b/backends/aws/inputs.aws.json index bd89ebf..44c4a52 100644 --- a/backends/aws/inputs.aws.json +++ b/backends/aws/inputs.aws.json @@ -13,13 +13,14 @@ ], "run_de_novo_assembly_trio": "Boolean" }, - "de_novo_assembly.reference": { - "name": "GRCh38", + "de_novo_assembly.references": [ + { + "name": "String", "fasta": { - "data": "s3://dnastack-resources/dataset/GRCh38/human_GRCh38_no_alt_analysis_set.fasta", - "data_index": "s3://dnastack-resources/dataset/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai" + "data": "File", + "data_index": "File" } - }, + ], "de_novo_assembly.backend": "AWS", "de_novo_assembly.zones": "us-east-2a us-east-2b us-east-2c", "de_novo_assembly.aws_spot_queue_arn": "", diff --git a/backends/azure/inputs.azure.json b/backends/azure/inputs.azure.json index 64a1911..5d63fa6 100644 --- a/backends/azure/inputs.azure.json +++ b/backends/azure/inputs.azure.json @@ -13,13 +13,14 @@ ], "run_de_novo_assembly_trio": "Boolean" }, - "de_novo_assembly.reference": { - "name": "GRCh38", + "de_novo_assembly.references": [ + { + "name": "String", "fasta": { - "data": "/datasetpbrarediseases/dataset/GRCh38/human_GRCh38_no_alt_analysis_set.fasta", - "data_index": "/datasetpbrarediseases/dataset/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai" + "data": "File", + "data_index": "File" } - }, + ], "de_novo_assembly.backend": "Azure", "de_novo_assembly.preemptible": "Boolean" } diff --git a/backends/gcp/inputs.gcp.json b/backends/gcp/inputs.gcp.json index f4cc3c5..723742a 100644 --- a/backends/gcp/inputs.gcp.json +++ b/backends/gcp/inputs.gcp.json @@ -13,13 +13,14 @@ ], "run_de_novo_assembly_trio": "Boolean" }, - "de_novo_assembly.reference": { - "name": "GRCh38", + "de_novo_assembly.references": [ + { + "name": "String", "fasta": { - "data": "gs:///dataset/GRCh38/human_GRCh38_no_alt_analysis_set.fasta", - "data_index": "gs:///dataset/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai" + "data": "File", + "data_index": "File" } - }, + ], "de_novo_assembly.backend": "GCP", "de_novo_assembly.zones": "String", "de_novo_assembly.preemptible": "Boolean" diff --git a/backends/hpc/inputs.hpc.json b/backends/hpc/inputs.hpc.json index 338e58d..e0979de 100644 --- a/backends/hpc/inputs.hpc.json +++ b/backends/hpc/inputs.hpc.json @@ -13,13 +13,14 @@ ], "run_de_novo_assembly_trio": "Boolean" }, - "de_novo_assembly.reference": { - "name": "GRCh38", + "de_novo_assembly.references": [ + { + "name": "String", "fasta": { - "data": "/dataset/GRCh38/human_GRCh38_no_alt_analysis_set.fasta", - "data_index": "/dataset/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai" + "data": "File", + "data_index": "File" } - }, + ], "de_novo_assembly.backend": "HPC", "de_novo_assembly.preemptible": false } diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 97c0734..081474a 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -137,7 +137,7 @@ }, "yak_count": { "key": "yak_count", - "digest": "2ovi7jh4btl4sb7xr23ga6mxtd7nlq4s", + "digest": "6hlh6n3b3cqohtmjweg57of626he4c4v", "tests": [ { "inputs": { @@ -150,6 +150,8 @@ "${resources_file_path}/m64017_200108_232219.hifi_reads.fasta", "${resources_file_path}/m64017_200112_090459.hifi_reads.fasta" ], + "yak_params": "-b37", + "mem_gb": 70, "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { @@ -173,7 +175,7 @@ "tasks": { "hifiasm_assemble": { "key": "hifiasm_assemble", - "digest": "r4ikydzmdaed4hzsmc3t7efh6mz5e4mx", + "digest": "vhkzwee3f754jcjksog22uyps3j6myow", "tests": [ { "inputs": { @@ -230,7 +232,7 @@ }, "gfa2fa": { "key": "gfa2fa", - "digest": "liyb2m4cbkovxctcgaxwunqkn5az77ev", + "digest": "es7l5kyje3fiy5vxjnnsqg4fw6sitmdo", "tests": [ { "inputs": { @@ -262,7 +264,7 @@ }, "align_hifiasm": { "key": "align_hifiasm", - "digest": "77gs34t4c2i6epsg2epukfoaign2fmnt", + "digest": "4qf5jeepfn3jv3g2socql6xh7vmd4b7s", "tests": [ { "inputs": { @@ -325,6 +327,156 @@ "name": "", "description": "", "tasks": {} + }, + "workflows/wdl-common/wdl/tasks/glnexus.wdl": { + "key": "workflows/wdl-common/wdl/tasks/glnexus.wdl", + "name": "", + "description": "", + "tasks": { + "glnexus": { + "key": "glnexus", + "digest": "", + "tests": [] + } + } + }, + "workflows/wdl-common/wdl/tasks/mosdepth.wdl": { + "key": "workflows/wdl-common/wdl/tasks/mosdepth.wdl", + "name": "", + "description": "", + "tasks": { + "mosdepth": { + "key": "mosdepth", + "digest": "", + "tests": [] + } + } + }, + "workflows/wdl-common/wdl/tasks/pbsv_call.wdl": { + "key": "workflows/wdl-common/wdl/tasks/pbsv_call.wdl", + "name": "", + "description": "", + "tasks": { + "pbsv_call": { + "key": "pbsv_call", + "digest": "", + "tests": [] + } + } + }, + "workflows/wdl-common/wdl/tasks/pbsv_discover.wdl": { + "key": "workflows/wdl-common/wdl/tasks/pbsv_discover.wdl", + "name": "", + "description": "", + "tasks": { + "pbsv_discover": { + "key": "pbsv_discover", + "digest": "", + "tests": [] + } + } + }, + "workflows/wdl-common/wdl/tasks/pharmcat.wdl": { + "key": "workflows/wdl-common/wdl/tasks/pharmcat.wdl", + "name": "", + "description": "", + "tasks": { + "pangu_cyp2d6": { + "key": "pangu_cyp2d6", + "digest": "", + "tests": [] + }, + "pharmcat_preprocess": { + "key": "pharmcat_preprocess", + "digest": "", + "tests": [] + }, + "filter_preprocessed_vcf": { + "key": "filter_preprocessed_vcf", + "digest": "", + "tests": [] + }, + "run_pharmcat": { + "key": "run_pharmcat", + "digest": "", + "tests": [] + } + } + }, + "workflows/wdl-common/wdl/tasks/whatshap_haplotag.wdl": { + "key": "workflows/wdl-common/wdl/tasks/whatshap_haplotag.wdl", + "name": "", + "description": "", + "tasks": { + "whatshap_haplotag": { + "key": "whatshap_haplotag", + "digest": "", + "tests": [] + } + } + }, + "workflows/wdl-common/wdl/tasks/whatshap_phase.wdl": { + "key": "workflows/wdl-common/wdl/tasks/whatshap_phase.wdl", + "name": "", + "description": "", + "tasks": { + "whatshap_phase": { + "key": "whatshap_phase", + "digest": "", + "tests": [] + } + } + }, + "workflows/wdl-common/wdl/tasks/whatshap_stats.wdl": { + "key": "workflows/wdl-common/wdl/tasks/whatshap_stats.wdl", + "name": "", + "description": "", + "tasks": { + "whatshap_stats": { + "key": "whatshap_stats", + "digest": "", + "tests": [] + } + } + }, + "workflows/wdl-common/wdl/workflows/deepvariant/deepvariant.wdl": { + "key": "workflows/wdl-common/wdl/workflows/deepvariant/deepvariant.wdl", + "name": "", + "description": "", + "tasks": { + "deepvariant_make_examples": { + "key": "deepvariant_make_examples", + "digest": "", + "tests": [] + }, + "deepvariant_call_variants": { + "key": "deepvariant_call_variants", + "digest": "", + "tests": [] + }, + "deepvariant_postprocess_variants": { + "key": "deepvariant_postprocess_variants", + "digest": "", + "tests": [] + } + } + }, + "workflows/wdl-common/wdl/workflows/phase_vcf/phase_vcf.wdl": { + "key": "workflows/wdl-common/wdl/workflows/phase_vcf/phase_vcf.wdl", + "name": "", + "description": "", + "tasks": { + "split_vcf": { + "key": "split_vcf", + "digest": "", + "tests": [] + }, + "bcftools_concat": { + "key": "bcftools_concat", + "digest": "", + "tests": [] + } + } } }, "engines": { diff --git a/workflows/assemble_genome/assemble_genome.wdl b/workflows/assemble_genome/assemble_genome.wdl index 96a733d..1e6807f 100644 --- a/workflows/assemble_genome/assemble_genome.wdl +++ b/workflows/assemble_genome/assemble_genome.wdl @@ -9,7 +9,7 @@ workflow assemble_genome { String sample_id Array[File] reads_fastas - ReferenceData reference + Array[ReferenceData] references String? hifiasm_extra_params File? father_yak @@ -38,32 +38,42 @@ workflow assemble_genome { call gfa2fa { input: gfa = gfa, - reference_index = reference.fasta.data_index, - runtime_attributes = default_runtime_attributes + runtime_attributes = default_runtime_attributes } } + + scatter (ref in references) { + call align_hifiasm { + input: + sample_id = sample_id, + query_sequences = gfa2fa.zipped_fasta, + reference = ref.fasta.data, + reference_name = ref.name, + runtime_attributes = default_runtime_attributes + } - call align_hifiasm { - input: - sample_id = sample_id, - query_sequences = gfa2fa.zipped_fasta, - reference = reference.fasta.data, - reference_name = reference.name, - runtime_attributes = default_runtime_attributes + IndexData sample_aligned_bam = { + "data": align_hifiasm.asm_bam, + "data_index": align_hifiasm.asm_bam_index + } + + Pair[ReferenceData,IndexData] align_data = (ref, sample_aligned_bam) } + output { Array[File] assembly_noseq_gfas = hifiasm_assemble.assembly_noseq_gfas Array[File] assembly_lowQ_beds = hifiasm_assemble.assembly_lowQ_beds Array[File] zipped_assembly_fastas = gfa2fa.zipped_fasta Array[File] assembly_stats = gfa2fa.assembly_stats - IndexData asm_bam = {"data": align_hifiasm.asm_bam, "data_index": align_hifiasm.asm_bam_index} + Array[IndexData] asm_bams = sample_aligned_bam + Array[Pair[ReferenceData,IndexData]] alignments = align_data } parameter_meta { sample_id: {help: "Sample ID; used for naming files"} reads_fastas: {help: "Reads in fasta format to be used for assembly; one for each movie bam to be used in assembly. Reads fastas from one or more sample may be combined to use in the assembly"} - reference: {help: "Reference genome data"} + references: {help: "Array of Reference genomes data"} hiiasm_extra_params: {help: "[OPTIONAL] Additional parameters to pass to hifiasm assembly"} father_yak: {help: "[OPTIONAL] kmer counts for the father; required if running trio-based assembly"} mother_yak: {help: "[OPTIONAL] kmer counts for the mother; required if running trio-based assembly"} @@ -88,7 +98,7 @@ task hifiasm_assemble { String prefix = "~{sample_id}.asm" Int threads = 48 Int mem_gb = threads * 6 - Int disk_size = ceil((size(reads_fastas[0], "GB") * length(reads_fastas)) * 4 + 20) + Int disk_size = ceil(size(reads_fastas, "GB") * 4 + 20) command <<< set -euo pipefail @@ -132,8 +142,6 @@ task gfa2fa { input { File gfa - File reference_index - RuntimeAttributes runtime_attributes } @@ -157,11 +165,12 @@ task gfa2fa { # Calculate assembly stats k8 \ /opt/calN50/calN50.js \ - -f ~{reference_index} \ ~{gfa_basename}.fasta.gz \ > ~{gfa_basename}.fasta.stats.txt >>> + + output { File zipped_fasta = "~{gfa_basename}.fasta.gz" File assembly_stats = "~{gfa_basename}.fasta.stats.txt" @@ -193,7 +202,8 @@ task align_hifiasm { } Int threads = 16 - Int disk_size = ceil((size(query_sequences[0], "GB") * length(query_sequences) + size(reference, "GB")) * 2 + 20) + Int mem_gb = threads * 8 + Int disk_size = ceil((size(query_sequences, "GB") + size(reference, "GB")) * 2 + 20) command <<< set -euo pipefail @@ -209,7 +219,7 @@ task align_hifiasm { ~{reference} \ ~{sep=' ' query_sequences} \ | samtools sort \ - -@ 4 \ + -@ 3 \ -T ./TMP \ -m 8G \ -O BAM \ @@ -226,7 +236,7 @@ task align_hifiasm { runtime { docker: "~{runtime_attributes.container_registry}/align_hifiasm@sha256:3968cb152a65163005ffed46297127536701ec5af4c44e8f3e7051f7b01f80fe" cpu: threads - memory: "128 GB" + memory: mem_gb + " GB" disk: disk_size + " GB" disks: "local-disk " + disk_size + " HDD" preemptible: runtime_attributes.preemptible_tries diff --git a/workflows/de_novo_assembly_sample/de_novo_assembly_sample.wdl b/workflows/de_novo_assembly_sample/de_novo_assembly_sample.wdl index 72f7957..43354fc 100644 --- a/workflows/de_novo_assembly_sample/de_novo_assembly_sample.wdl +++ b/workflows/de_novo_assembly_sample/de_novo_assembly_sample.wdl @@ -12,7 +12,7 @@ workflow de_novo_assembly_sample { input { Sample sample - ReferenceData reference + Array[ReferenceData] references String backend RuntimeAttributes default_runtime_attributes @@ -31,48 +31,57 @@ workflow de_novo_assembly_sample { input: sample_id = sample.sample_id, reads_fastas = samtools_fasta.reads_fasta, - reference = reference, - hifiasm_extra_params = "", + references = references, backend = backend, default_runtime_attributes = default_runtime_attributes, on_demand_runtime_attributes = on_demand_runtime_attributes } - call htsbox { - input: - bam = assemble_genome.asm_bam.data, - bam_index = assemble_genome.asm_bam.data_index, - reference = reference.fasta.data, - runtime_attributes = default_runtime_attributes - } + scatter (aln in assemble_genome.alignments) { + ReferenceData ref = aln.left + IndexData bam = aln.right + call htsbox { + input: + bam = bam.data, + bam_index = bam.data_index, + reference = ref.fasta.data, + runtime_attributes = default_runtime_attributes + } - call ZipIndexVcf.zip_index_vcf { - input: - vcf = htsbox.htsbox_vcf, - runtime_attributes = default_runtime_attributes - } + call ZipIndexVcf.zip_index_vcf { + input: + vcf = htsbox.htsbox_vcf, + runtime_attributes = default_runtime_attributes + } - call BcftoolsStats.bcftools_stats { - input: - vcf = zip_index_vcf.zipped_vcf, - params = "--samples ~{basename(assemble_genome.asm_bam.data)}", - reference = reference.fasta.data, - runtime_attributes = default_runtime_attributes - } + IndexData htsbox_vcf = { + "data": zip_index_vcf.zipped_vcf, + "data_index": zip_index_vcf.zipped_vcf_index + } + call BcftoolsStats.bcftools_stats { + input: + vcf = zip_index_vcf.zipped_vcf, + params = "--samples ~{basename(bam.data)}", + reference = ref.fasta.data, + runtime_attributes = default_runtime_attributes + } + + } output { Array[File] assembly_noseq_gfas = assemble_genome.assembly_noseq_gfas Array[File] assembly_lowQ_beds = assemble_genome.assembly_lowQ_beds Array[File] zipped_assembly_fastas = assemble_genome.zipped_assembly_fastas Array[File] assembly_stats = assemble_genome.assembly_stats - IndexData asm_bam = assemble_genome.asm_bam - IndexData htsbox_vcf = {"data": zip_index_vcf.zipped_vcf, "data_index": zip_index_vcf.zipped_vcf_index} - File htsbox_vcf_stats = bcftools_stats.stats + Array[IndexData] asm_bams = assemble_genome.asm_bams + + Array[IndexData] htsbox_vcfs = htsbox_vcf + Array[File] htsbox_vcf_stats = bcftools_stats.stats } parameter_meta { sample: {help: "Sample information and associated data files"} - reference: {help: "Reference genome data"} + references: {help: "Array of Reference genomes data"} default_runtime_attributes: {help: "Default RuntimeAttributes; spot if preemptible was set to true, otherwise on_demand"} on_demand_runtime_attributes: {help: "RuntimeAttributes for tasks that require dedicated instances"} } diff --git a/workflows/de_novo_assembly_trio/de_novo_assembly_trio.wdl b/workflows/de_novo_assembly_trio/de_novo_assembly_trio.wdl index b5c9bcc..aa07ffc 100644 --- a/workflows/de_novo_assembly_trio/de_novo_assembly_trio.wdl +++ b/workflows/de_novo_assembly_trio/de_novo_assembly_trio.wdl @@ -11,7 +11,7 @@ workflow de_novo_assembly_trio { input { Cohort cohort - ReferenceData reference + Array[ReferenceData] references String backend RuntimeAttributes default_runtime_attributes @@ -41,13 +41,6 @@ workflow de_novo_assembly_trio { } } - call yak_count as yak_count_father { - input: - sample_id = father.sample_id, - reads_fastas = samtools_fasta_father.reads_fasta, - runtime_attributes = default_runtime_attributes - } - scatter (movie_bam in mother.movie_bams) { call SamtoolsFasta.samtools_fasta as samtools_fasta_mother { input: @@ -56,10 +49,32 @@ workflow de_novo_assembly_trio { } } + # if parental coverage is low (<15x), keep singleton kmers from parents and use them to bin child reads + # if parental coverage is high (>=15x), use bloom filter and require that a kmer occur >= 5 times in + # one parent and <2 times in the other parent to be used for binning + # 60GB uncompressed FASTA ~= 10x coverage (this is not robust to big changes in mean read length) + # memory for 24 threads is 48GB with bloom filter (<=50x coverage) and 65GB without bloom filter (<=30x coverage) + Boolean low_depth = if ((size(samtools_fasta_father.reads_fasta, "GB") < 90) && (size(samtools_fasta_mother.reads_fasta, "GB") < 90)) then true else false + + String yak_params = if (low_depth) then "-b0" else "-b37" + Int yak_mem_gb = if (low_depth) then 70 else 50 + String hifiasm_extra_params = if (low_depth) then "-c1 -d1" else "-c2 -d5" + + call yak_count as yak_count_father { + input: + sample_id = father.sample_id, + reads_fastas = samtools_fasta_father.reads_fasta, + yak_params = yak_params, + mem_gb = yak_mem_gb, + runtime_attributes = default_runtime_attributes + } + call yak_count as yak_count_mother { input: sample_id = mother.sample_id, reads_fastas = samtools_fasta_mother.reads_fasta, + yak_params = yak_params, + mem_gb = yak_mem_gb, runtime_attributes = default_runtime_attributes } @@ -84,8 +99,8 @@ workflow de_novo_assembly_trio { input: sample_id = "~{cohort.cohort_id}.~{child.sample_id}", reads_fastas = samtools_fasta_child.reads_fasta, - reference = reference, - hifiasm_extra_params = "-c1 -d1", + references = references, + hifiasm_extra_params = hifiasm_extra_params, father_yak = yak_count_father.yak, mother_yak = yak_count_mother.yak, backend = backend, @@ -101,12 +116,12 @@ workflow de_novo_assembly_trio { Array[Array[File]] assembly_lowQ_beds = flatten(assemble_genome.assembly_lowQ_beds) Array[Array[File]] zipped_assembly_fastas = flatten(assemble_genome.zipped_assembly_fastas) Array[Array[File]] assembly_stats = flatten(assemble_genome.assembly_stats) - Array[IndexData] asm_bams = flatten(assemble_genome.asm_bam) + Array[Array[IndexData]] asm_bams = flatten(assemble_genome.asm_bams) } parameter_meta { cohort: {help: "Sample information for the cohort"} - reference: {help: "Reference genome data"} + references: {help: "Array of Reference genomes data"} default_runtime_attributes: {help: "Default RuntimeAttributes; spot if preemptible was set to true, otherwise on_demand"} on_demand_runtime_attributes: {help: "RuntimeAttributes for tasks that require dedicated instances"} } @@ -150,13 +165,13 @@ task yak_count { String sample_id Array[File] reads_fastas + String yak_params + Int mem_gb + RuntimeAttributes runtime_attributes } - Int threads = 10 - - # Usage up to 140 GB @ 10 threads for Revio samples - Int mem_gb = 16 * threads + Int threads = 24 Int disk_size = ceil(size(reads_fastas, "GB") * 2 + 20) command <<< @@ -165,6 +180,7 @@ task yak_count { yak count \ -t ~{threads} \ -o ~{sample_id}.yak \ + ~{yak_params} \ ~{sep=' ' reads_fastas} >>> diff --git a/workflows/input_template.json b/workflows/input_template.json index 148b817..64e5d62 100644 --- a/workflows/input_template.json +++ b/workflows/input_template.json @@ -1,28 +1,31 @@ { - "de_novo_assembly.cohort": { - "cohort_id": "String", - "samples": [ - { - "sample_id": "String", - "movie_bams": "Array[File]", - "sex": "String?", - "father_id": "String?", - "mother_id": "String?", - "run_de_novo_assembly": "Boolean" - } + "de_novo_assembly.cohort": { + "cohort_id": "String", + "samples": [ + { + "sample_id": "String", + "movie_bams": "Array[File]", + "sex": "String?", + "father_id": "String?", + "mother_id": "String?", + "run_de_novo_assembly": "Boolean" + } + ], + "run_de_novo_assembly_trio": "Boolean" + }, + "de_novo_assembly.references": [ + { + "name": "String", + "fasta": { + "data": "File", + "data_index": "File" + } + } ], - "run_de_novo_assembly_trio": "Boolean" - }, - "de_novo_assembly.reference": { - "name": "String", - "fasta": { - "data": "File", - "data_index": "File" - } - }, - "de_novo_assembly.backend": "String ['GCP', 'Azure', 'AWS', or 'HPC']", - "de_novo_assembly.zones": "String? (optional); required if backend is set to 'GCP' or 'AWS'", - "de_novo_assembly.aws_spot_queue_arn": "String? (optional); required if backend is set to 'AWS'", - "de_novo_assembly.aws_on_demand_queue_arn": "String? (optional); required if backend is set to 'AWS'", - "de_novo_assembly.preemptible": "Boolean" + "de_novo_assembly.zones": "String? (optional); required if backend is set to 'AWS'", + "de_novo_assembly.aws_spot_queue_arn": "String? (optional); required if backend is set to 'AWS'", + "de_novo_assembly.aws_on_demand_queue_arn": "String? (optional)", + "de_novo_assembly.preemptible": "Boolean", + "de_novo_assembly.backend": "String ['GCP', 'Azure', 'AWS', or 'HPC']", + "de_novo_assembly.container_registry": "String? (optional)" } \ No newline at end of file diff --git a/workflows/main.wdl b/workflows/main.wdl index bafe340..7647f34 100644 --- a/workflows/main.wdl +++ b/workflows/main.wdl @@ -10,7 +10,7 @@ workflow de_novo_assembly { input { Cohort cohort - ReferenceData reference + Array[ReferenceData] references # Backend configuration String backend @@ -38,7 +38,7 @@ workflow de_novo_assembly { call DeNovoAssemblySample.de_novo_assembly_sample { input: sample = sample, - reference = reference, + references = references, backend = backend, default_runtime_attributes = default_runtime_attributes, on_demand_runtime_attributes = backend_configuration.on_demand_runtime_attributes @@ -51,7 +51,7 @@ workflow de_novo_assembly { call DeNovoAssemblyTrio.de_novo_assembly_trio { input: cohort = cohort, - reference = reference, + references = references, backend = backend, default_runtime_attributes = default_runtime_attributes, on_demand_runtime_attributes = backend_configuration.on_demand_runtime_attributes @@ -65,9 +65,9 @@ workflow de_novo_assembly { Array[Array[File]?] assembly_lowQ_beds = de_novo_assembly_sample.assembly_lowQ_beds Array[Array[File]?] zipped_assembly_fastas = de_novo_assembly_sample.zipped_assembly_fastas Array[Array[File]?] assembly_stats = de_novo_assembly_sample.assembly_stats - Array[IndexData?] asm_bam = de_novo_assembly_sample.asm_bam - Array[IndexData?] htsbox_vcf = de_novo_assembly_sample.htsbox_vcf - Array[File?] htsbox_vcf_stats = de_novo_assembly_sample.htsbox_vcf_stats + Array[Array[IndexData]?] asm_bam = de_novo_assembly_sample.asm_bams + Array[Array[IndexData]?] htsbox_vcf = de_novo_assembly_sample.htsbox_vcfs + Array[Array[File]?] htsbox_vcf_stats = de_novo_assembly_sample.htsbox_vcf_stats # de_novo_assembly_trio output Array[Map[String, String]]? haplotype_key = de_novo_assembly_trio.haplotype_key @@ -75,7 +75,7 @@ workflow de_novo_assembly { Array[Array[File]]? trio_assembly_lowQ_beds = de_novo_assembly_trio.assembly_lowQ_beds Array[Array[File]]? trio_zipped_assembly_fastas = de_novo_assembly_trio.zipped_assembly_fastas Array[Array[File]]? trio_assembly_stats = de_novo_assembly_trio.assembly_stats - Array[IndexData]? trio_asm_bams = de_novo_assembly_trio.asm_bams + Array[Array[IndexData]]? trio_asm_bams = de_novo_assembly_trio.asm_bams } parameter_meta {