diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6f3161eb42..3e23d4c1d3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,7 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
-- [1759](https://github.com/nf-core/sarek/pull/1759) - Back to dev
+- [1682](https://github.com/nf-core/sarek/pull/1682) - Add `bcftools_norm` in `POST_VARIANTCALLING` for normalization of all vcf files; edit vcf_concatenate_germline subworkflow
+- [1760](https://github.com/nf-core/sarek/pull/1760) - Back to dev
### Changed
diff --git a/README.md b/README.md
index c4f7b5443b..9b6b608903 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,7 @@ Depending on the options and samples provided, the pipeline can currently perfor
- `Strelka2`
- `TIDDIT`
- `Lofreq`
+- Post-variant calling options (`BCFtools concat` for germline vcfs, `BCFtools norm` for all vcfs)
- Variant filtering and annotation (`SnpEff`, `Ensembl VEP`, `BCFtools annotate`)
- Summarise and represent QC (`MultiQC`)
@@ -183,6 +184,7 @@ We thank the following people for their extensive assistance in the development
- [Szilveszter Juhos](https://github.com/szilvajuhos)
- [Tobias Koch](https://github.com/KochTobi)
- [Winni Kretzschmar](https://github.com/winni2k)
+- [Patricie Skaláková](https://github.com/Patricie34)
## Acknowledgements
diff --git a/conf/modules/post_variant_calling.config b/conf/modules/post_variant_calling.config
index 3354d4671f..9b9bd0b7bc 100644
--- a/conf/modules/post_variant_calling.config
+++ b/conf/modules/post_variant_calling.config
@@ -16,7 +16,7 @@
process {
- withName: 'GERMLINE_VCFS_CONCAT'{
+ withName: 'GERMLINE_VCFS_CONCAT' {
ext.args = { "-a" }
ext.when = { params.concatenate_vcfs }
publishDir = [
@@ -25,18 +25,41 @@ process {
]
}
- withName: 'GERMLINE_VCFS_CONCAT_SORT'{
+ withName: 'GERMLINE_VCFS_CONCAT_SORT' {
ext.prefix = { "${meta.id}.germline" }
ext.when = { params.concatenate_vcfs }
publishDir = [
mode: params.publish_dir_mode,
- path: { "${params.outdir}/variant_calling/concat/${meta.id}/" }
+ path: { "${params.outdir}/variant_calling/concat/${meta.id}/" },
+ pattern: "*vcf.gz"
+ ]
+ }
+
+ withName: 'VCFS_NORM_SORT' {
+ ext.prefix = { "${meta.id}.${meta.variantcaller}.norm" }
+ ext.when = { params.normalize_vcfs }
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/normalized/${meta.id}/" },
+ pattern: "*vcf.gz"
+ ]
+ }
+
+ withName: 'VCFS_NORM' {
+ ext.args = { [
+ '--multiallelics -both', //split multiallelic sites into biallelic records and both SNPs and indels should be merged separately into two records
+ '--rm-dup all' //output only the first instance of a record which is present multiple times
+ ].join(' ') }
+ ext.when = { params.normalize_vcfs }
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/normalized/${meta.id}/" }
]
}
withName: 'TABIX_EXT_VCF' {
ext.prefix = { "${input.baseName}" }
- ext.when = { params.concatenate_vcfs }
+ ext.when = { params.concatenate_vcfs || params.normalize_vcfs }
}
withName: 'TABIX_GERMLINE_VCFS_CONCAT_SORT'{
@@ -44,7 +67,19 @@ process {
ext.when = { params.concatenate_vcfs }
publishDir = [
mode: params.publish_dir_mode,
- path: { "${params.outdir}/variant_calling/concat/${meta.id}/" }
+ path: { "${params.outdir}/variant_calling/concat/${meta.id}/" },
+ pattern: "*.tbi"
+ ]
+ }
+
+ withName: 'TABIX_VCFS_NORM_SORT'{
+ ext.prefix = { "${meta.id}.${meta.variantcaller}.norm" }
+ ext.when = { params.normalize_vcfs }
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/normalized/${meta.id}/" },
+ pattern: "*.tbi"
]
}
}
+
diff --git a/docs/images/sarek_subway.png b/docs/images/sarek_subway.png
index a381343500..2bc9fad810 100644
Binary files a/docs/images/sarek_subway.png and b/docs/images/sarek_subway.png differ
diff --git a/docs/images/sarek_subway.svg b/docs/images/sarek_subway.svg
index 6d8d172652..1dca7b2985 100644
--- a/docs/images/sarek_subway.svg
+++ b/docs/images/sarek_subway.svg
@@ -2,103 +2,17 @@
+ id="path5684-4-7" />UMImappingfastqcfastpfasfastqmosdepth, samtoolsbcftools, vcftoolsmosdepth, samtoolsngscheckmateconcatenate(germline)convertbam/crambam/crambam/cramvcfmarkduplicatesbam/cramvariant callingpre-processingvariant callingprepare recalibrationapplybqsrnormalizevcfvcfvcfensemblvepsnpeffmultiqcannotationbcftools annotateconvertubamspring
diff --git a/docs/output.md b/docs/output.md
index 6d723ba03b..72200f256c 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -880,6 +880,20 @@ Germline VCFs from `DeepVariant`, `FreeBayes`, `HaplotypeCaller`, `Haplotyper`,
+### Normalization
+
+All VCFs from `DeepVariant`, `FreeBayes`, `HaplotypeCaller`, `Haplotyper`, `Manta`, `bcftools mpileup`, `Strelka`, or `Tiddit` are normalized with `bcftools norm`. The field `SOURCE` is added to the VCF header to report the variant caller.
+
+
+Normalized VCF-files for normal and tumor samples
+
+**Output directory: `{outdir}/variantcalling/normalized//`**
+
+- `..norm.vcf.gz` and `..norm.vcf.gz.tbi`
+ - VCF with tabix index
+
+
+
## Variant annotation
This directory contains results from the final annotation steps: two tools are used for annotation, [snpEff](http://snpeff.sourceforge.net/) and [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html). Both results can also be combined by setting `--tools merge`.
diff --git a/modules.json b/modules.json
index ad4fd57616..2a4b8e5145 100644
--- a/modules.json
+++ b/modules.json
@@ -26,6 +26,11 @@
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["bam_ngscheckmate"]
},
+ "bcftools/norm": {
+ "branch": "master",
+ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
+ "installed_by": ["modules"]
+ },
"bcftools/sort": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
diff --git a/modules/nf-core/bcftools/norm/environment.yml b/modules/nf-core/bcftools/norm/environment.yml
new file mode 100644
index 0000000000..5c00b116ad
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/environment.yml
@@ -0,0 +1,5 @@
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::bcftools=1.20
diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf
new file mode 100644
index 0000000000..bd7a250127
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/main.nf
@@ -0,0 +1,70 @@
+process BCFTOOLS_NORM {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0':
+ 'biocontainers/bcftools:1.20--h8b25389_0' }"
+
+ input:
+ tuple val(meta), path(vcf), path(tbi)
+ tuple val(meta2), path(fasta)
+
+ output:
+ tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf
+ tuple val(meta), path("*.tbi") , emit: tbi, optional: true
+ tuple val(meta), path("*.csi") , emit: csi, optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: '--output-type z'
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
+ args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
+ args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
+ args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
+ "vcf.gz"
+
+ """
+ bcftools norm \\
+ --fasta-ref ${fasta} \\
+ --output ${prefix}.${extension} \\
+ $args \\
+ --threads $task.cpus \\
+ ${vcf}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: '--output-type z'
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
+ args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
+ args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
+ args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
+ "vcf.gz"
+ def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" :
+ args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" :
+ args.contains("--write-index") || args.contains("-W") ? "csi" :
+ ""
+ def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch"
+ def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : ""
+
+ """
+ ${create_cmd} ${prefix}.${extension}
+ ${create_index}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml
new file mode 100644
index 0000000000..b6edeb4aae
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/meta.yml
@@ -0,0 +1,85 @@
+name: bcftools_norm
+description: Normalize VCF file
+keywords:
+ - normalize
+ - norm
+ - variant calling
+ - VCF
+tools:
+ - norm:
+ description: |
+ Normalize VCF files.
+ homepage: http://samtools.github.io/bcftools/bcftools.html
+ documentation: http://www.htslib.org/doc/bcftools.html
+ doi: 10.1093/bioinformatics/btp352
+ licence: ["MIT"]
+ identifier: biotools:bcftools
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - vcf:
+ type: file
+ description: |
+ The vcf file to be normalized
+ e.g. 'file1.vcf'
+ pattern: "*.{vcf,vcf.gz}"
+ - tbi:
+ type: file
+ description: |
+ An optional index of the VCF file (for when the VCF is compressed)
+ pattern: "*.vcf.gz.tbi"
+ - - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'genome' ]
+ - fasta:
+ type: file
+ description: FASTA reference file
+ pattern: "*.{fasta,fa}"
+output:
+ - vcf:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.{vcf,vcf.gz,bcf,bcf.gz}":
+ type: file
+ description: One of uncompressed VCF (.vcf), compressed VCF (.vcf.gz), compressed
+ BCF (.bcf.gz) or uncompressed BCF (.bcf) normalized output file
+ pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}"
+ - tbi:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.tbi":
+ type: file
+ description: Alternative VCF file index
+ pattern: "*.tbi"
+ - csi:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.csi":
+ type: file
+ description: Default VCF file index
+ pattern: "*.csi"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@abhi18av"
+ - "@ramprasadn"
+maintainers:
+ - "@abhi18av"
+ - "@ramprasadn"
diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test b/modules/nf-core/bcftools/norm/tests/main.nf.test
new file mode 100644
index 0000000000..dbc4150237
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/tests/main.nf.test
@@ -0,0 +1,563 @@
+nextflow_process {
+
+ name "Test Process BCFTOOLS_NORM"
+ script "../main.nf"
+ process "BCFTOOLS_NORM"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "bcftools"
+ tag "bcftools/norm"
+
+ test("sarscov2 - [ vcf, [] ], fasta") {
+
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ []
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index") {
+
+ config "./vcf_gz_index.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ []
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.vcf,
+ process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }
+ ).match() },
+ { assert process.out.csi[0][1].endsWith(".csi") }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi") {
+
+ config "./vcf_gz_index_csi.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ []
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.vcf,
+ process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }
+ ).match() },
+ { assert process.out.csi[0][1].endsWith(".csi") }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi") {
+
+ config "./vcf_gz_index_tbi.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ []
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.vcf,
+ process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }
+ ).match() },
+ { assert process.out.tbi[0][1].endsWith(".tbi") }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, tbi ], fasta") {
+
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, tbi ], fasta - vcf output") {
+
+ config "./nextflow.vcf.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output") {
+
+ config "./nextflow.vcf.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.vcf,
+ process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } },
+ process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } },
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, tbi ], fasta - bcf output") {
+
+ config "./nextflow.bcf.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output") {
+
+ config "./nextflow.bcf_gz.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, [] ], fasta - stub") {
+
+ config "./nextflow.config"
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ []
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, tbi ], fasta -stub") {
+
+ config "./nextflow.config"
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, tbi ], fasta - vcf output -stub") {
+
+ config "./nextflow.vcf.config"
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub") {
+
+ config "./nextflow.vcf.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, tbi ], fasta - bcf output - stub") {
+
+ config "./nextflow.bcf.config"
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub") {
+
+ config "./nextflow.bcf_gz.config"
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub") {
+
+ config "./vcf_gz_index.config"
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ []
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.csi[0][1].endsWith(".csi") }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub") {
+
+ config "./vcf_gz_index_csi.config"
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ []
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.csi[0][1].endsWith(".csi") }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub") {
+
+ config "./vcf_gz_index_tbi.config"
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ []
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.tbi[0][1].endsWith(".tbi") }
+ )
+ }
+
+ }
+
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test.snap b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap
new file mode 100644
index 0000000000..3be52116a9
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap
@@ -0,0 +1,758 @@
+{
+ "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T14:38:42.639095032"
+ },
+ "sarscov2 - [ vcf, [] ], fasta - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T14:38:05.448449893"
+ },
+ "sarscov2 - [ vcf, tbi ], fasta - vcf output": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T14:37:12.741719961"
+ },
+ "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T14:39:22.875147941"
+ },
+ "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db"
+ ]
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-05T08:15:23.38765384"
+ },
+ "sarscov2 - [ vcf, [] ], fasta": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T14:36:21.519977754"
+ },
+ "sarscov2 - [ vcf, tbi ], fasta - vcf output -stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T14:38:27.8230994"
+ },
+ "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.bcf:md5,f35545c26a788b5eb697d9c0490339d9"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.bcf:md5,f35545c26a788b5eb697d9c0490339d9"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T14:37:53.942403192"
+ },
+ "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-05T13:56:05.3799488"
+ },
+ "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db"
+ ]
+ ],
+ [
+
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-05T13:53:28.356741947"
+ },
+ "sarscov2 - [ vcf, tbi ], fasta": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T14:36:58.39445154"
+ },
+ "sarscov2 - [ vcf, tbi ], fasta -stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T14:38:16.259516142"
+ },
+ "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T14:39:10.503208929"
+ },
+ "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz.csi"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-05T07:52:58.381931979"
+ },
+ "sarscov2 - [ vcf, tbi ], fasta - bcf output - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T14:38:59.121377258"
+ },
+ "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-05T13:56:16.404380471"
+ },
+ "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test_vcf.vcf.gz.csi"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-05T13:53:09.808834237"
+ },
+ "sarscov2 - [ vcf, tbi ], fasta - bcf output": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ff760495922469e56d0fc3372773000d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T14:37:42.141945244"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config
new file mode 100644
index 0000000000..b79af86817
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config
@@ -0,0 +1,4 @@
+process {
+ ext.args = '-m -any --output-type b --no-version'
+ ext.prefix = "test_norm"
+}
diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config
new file mode 100644
index 0000000000..f36f397c2c
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config
@@ -0,0 +1,4 @@
+process {
+ ext.args = '-m -any --output-type u --no-version'
+ ext.prefix = "test_norm"
+}
diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.config b/modules/nf-core/bcftools/norm/tests/nextflow.config
new file mode 100644
index 0000000000..510803b407
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/tests/nextflow.config
@@ -0,0 +1,4 @@
+process {
+ ext.args = '-m -any --no-version'
+ ext.prefix = "test_norm"
+}
diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config
new file mode 100644
index 0000000000..10bf93e320
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config
@@ -0,0 +1,4 @@
+process {
+ ext.args = '-m -any --output-type v --no-version'
+ ext.prefix = "test_norm"
+}
diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config
new file mode 100644
index 0000000000..b31dd2de22
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config
@@ -0,0 +1,4 @@
+process {
+ ext.args = '-m -any --output-type z ---no-version'
+ ext.prefix = "test_norm"
+}
diff --git a/modules/nf-core/bcftools/norm/tests/tags.yml b/modules/nf-core/bcftools/norm/tests/tags.yml
new file mode 100644
index 0000000000..f6f5e35616
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/tests/tags.yml
@@ -0,0 +1,2 @@
+bcftools/norm:
+ - "modules/nf-core/bcftools/norm/**"
diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config
new file mode 100644
index 0000000000..7dd696ee26
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config
@@ -0,0 +1,4 @@
+process {
+ ext.prefix = { "${meta.id}_vcf" }
+ ext.args = "--output-type z --write-index --no-version"
+}
diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config
new file mode 100644
index 0000000000..aebffb6fb7
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config
@@ -0,0 +1,4 @@
+process {
+ ext.prefix = { "${meta.id}_vcf" }
+ ext.args = "--output-type z --write-index=csi --no-version"
+}
diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config
new file mode 100644
index 0000000000..b192ae7d19
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config
@@ -0,0 +1,4 @@
+process {
+ ext.prefix = { "${meta.id}_vcf" }
+ ext.args = "--output-type z --write-index=tbi --no-version"
+}
diff --git a/nextflow.config b/nextflow.config
index de5e6d4ca6..b847d21f47 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -73,6 +73,7 @@ params {
ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2
joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected
joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling
+ normalize_vcfs = false // by default we don't normalize the vcf-files
only_paired_variant_calling = false // if true, skips germline variant calling for normal-paired sample
sentieon_dnascope_emit_mode = 'variant' // default value for Sentieon dnascope
sentieon_dnascope_pcr_indel_model = 'CONSERVATIVE'
@@ -477,4 +478,3 @@ includeConfig 'conf/modules/lofreq.config'
//annotate
includeConfig 'conf/modules/annotate.config'
-
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 5cdf35d555..f416eeeff9 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -402,6 +402,12 @@
"fa_icon": "fas fa-tape",
"description": "Option for concatenating germline vcf-files.",
"help_text": "Concatenating the germline vcf-files from each applied variant-caller into one vcf-file using bfctools concat."
+ },
+ "normalize_vcfs": {
+ "type": "boolean",
+ "fa_icon": "fas fa-tape",
+ "description": "Option for normalization of vcf-files.",
+ "help_text": "Normalization of all vcf-files from each applied variant-caller using bfctools norm."
}
}
},
diff --git a/subworkflows/local/post_variantcalling/main.nf b/subworkflows/local/post_variantcalling/main.nf
index 6b75d2c6b8..86bcd47353 100644
--- a/subworkflows/local/post_variantcalling/main.nf
+++ b/subworkflows/local/post_variantcalling/main.nf
@@ -3,23 +3,36 @@
//
include { CONCATENATE_GERMLINE_VCFS } from '../vcf_concatenate_germline/main'
+include { NORMALIZE_VCFS } from '../vcf_normalization/main'
workflow POST_VARIANTCALLING {
take:
- vcfs
+ germline_vcfs
+ tumor_only_vcfs
+ somatic_vcfs
+ fasta
concatenate_vcfs
+ normalize_vcfs
main:
versions = Channel.empty()
+ vcfs = Channel.empty()
if (concatenate_vcfs){
- CONCATENATE_GERMLINE_VCFS(vcfs)
+ CONCATENATE_GERMLINE_VCFS(germline_vcfs)
vcfs = vcfs.mix(CONCATENATE_GERMLINE_VCFS.out.vcfs)
versions = versions.mix(CONCATENATE_GERMLINE_VCFS.out.versions)
}
+ if (normalize_vcfs){
+ NORMALIZE_VCFS(germline_vcfs, tumor_only_vcfs, somatic_vcfs, fasta)
+
+ vcfs = vcfs.mix(NORMALIZE_VCFS.out.vcfs)
+ versions = versions.mix(NORMALIZE_VCFS.out.versions)
+ }
+
emit:
vcfs // post processed vcfs
diff --git a/subworkflows/local/vcf_concatenate_germline/main.nf b/subworkflows/local/vcf_concatenate_germline/main.nf
index 87f46b22e1..9d24420a2f 100644
--- a/subworkflows/local/vcf_concatenate_germline/main.nf
+++ b/subworkflows/local/vcf_concatenate_germline/main.nf
@@ -22,7 +22,7 @@ workflow CONCATENATE_GERMLINE_VCFS {
TABIX_EXT_VCF(ADD_INFO_TO_VCF.out.vcf)
// Gather vcfs and vcf-tbis for concatenating germline-vcfs
- germline_vcfs_with_tbis = TABIX_EXT_VCF.out.gz_tbi.map{ meta, vcf, tbi -> [ meta.subMap('id'), vcf, tbi ] }.groupTuple()
+ germline_vcfs_with_tbis = TABIX_EXT_VCF.out.gz_tbi.groupTuple()
GERMLINE_VCFS_CONCAT(germline_vcfs_with_tbis)
GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT.out.vcf)
@@ -32,11 +32,12 @@ workflow CONCATENATE_GERMLINE_VCFS {
versions = versions.mix(ADD_INFO_TO_VCF.out.versions)
versions = versions.mix(TABIX_EXT_VCF.out.versions)
versions = versions.mix(GERMLINE_VCFS_CONCAT.out.versions)
- versions = versions.mix(GERMLINE_VCFS_CONCAT.out.versions)
+ versions = versions.mix(GERMLINE_VCFS_CONCAT_SORT.out.versions)
versions = versions.mix(TABIX_GERMLINE_VCFS_CONCAT_SORT.out.versions)
emit:
- vcfs = germline_vcfs_with_tbis // post processed vcfs
+ vcfs = GERMLINE_VCFS_CONCAT_SORT.out.vcf // concatenated vcfs
versions // channel: [ versions.yml ]
}
+
diff --git a/subworkflows/local/vcf_normalization/main.nf b/subworkflows/local/vcf_normalization/main.nf
new file mode 100644
index 0000000000..3e1f9edf1e
--- /dev/null
+++ b/subworkflows/local/vcf_normalization/main.nf
@@ -0,0 +1,50 @@
+// Normalize all unannotated VCFs
+
+// Import modules
+include { ADD_INFO_TO_VCF } from '../../../modules/local/add_info_to_vcf/main'
+include { TABIX_BGZIPTABIX as TABIX_EXT_VCF } from '../../../modules/nf-core/tabix/bgziptabix/main'
+include { BCFTOOLS_NORM as VCFS_NORM } from '../../../modules/nf-core/bcftools/norm/main'
+include { BCFTOOLS_SORT as VCFS_NORM_SORT } from '../../../modules/nf-core/bcftools/sort/main'
+include { TABIX_TABIX as TABIX_VCFS_NORM_SORT } from '../../../modules/nf-core/tabix/tabix/main'
+
+// Workflow to normalize, compress, and index VCF files
+workflow NORMALIZE_VCFS {
+
+ take:
+ germline_vcfs
+ tumor_only_vcfs
+ somatic_vcfs
+ fasta
+
+ main:
+ versions = Channel.empty()
+
+ vcfs = germline_vcfs.mix(tumor_only_vcfs, somatic_vcfs)
+
+ // Add additional information to VCF files
+ ADD_INFO_TO_VCF(vcfs)
+
+ // Compress the VCF files with bgzip
+ TABIX_EXT_VCF(ADD_INFO_TO_VCF.out.vcf)
+
+ // Normalize the VCF files with BCFTOOLS_NORM
+ VCFS_NORM(TABIX_EXT_VCF.out.gz_tbi, fasta)
+
+ // Sort the normalized VCF files
+ VCFS_NORM_SORT(VCFS_NORM.out.vcf)
+
+ // Index the sorted normalized VCF files
+ TABIX_VCFS_NORM_SORT(VCFS_NORM_SORT.out.vcf)
+
+ // Gather versions of all tools used
+ versions = versions.mix(ADD_INFO_TO_VCF.out.versions)
+ versions = versions.mix(VCFS_NORM.out.versions)
+ versions = versions.mix(TABIX_EXT_VCF.out.versions)
+ versions = versions.mix(VCFS_NORM_SORT.out.versions)
+ versions = versions.mix(TABIX_VCFS_NORM_SORT.out.versions)
+
+ emit:
+ vcfs = VCFS_NORM_SORT.out.vcf // normalized vcfs
+ versions // Channel: [versions.yml]
+}
+
diff --git a/tests/config/pytesttags.yml b/tests/config/pytesttags.yml
index 6be50502d1..1976d6e6d3 100644
--- a/tests/config/pytesttags.yml
+++ b/tests/config/pytesttags.yml
@@ -373,6 +373,38 @@ concatenate_vcfs:
- tests/csv/3.0/mapped_joint_bam.csv
- tests/test_concat_germline_vcfs.yml
+## normalize all vcfs
+normalize_vcfs:
+ - conf/modules/post_variant_calling.config
+ - modules/nf-core/bcftools/concat/**
+ - modules/nf-core/bcftools/mpileup/**
+ - modules/nf-core/bcftools/norm/**
+ - modules/nf-core/bcftools/sort/**
+ - modules/nf-core/deepvariant/**
+ - modules/nf-core/freebayes/**
+ - modules/nf-core/gatk4/haplotypecaller/**
+ - modules/nf-core/gatk4/mergevcfs/**
+ - modules/nf-core/manta/germline/**
+ - modules/nf-core/samtools/mpileup/**
+ - modules/nf-core/strelka/germline/**
+ - modules/nf-core/tabix/bgziptabix/**
+ - modules/nf-core/tabix/tabix/**
+ - modules/nf-core/tiddit/sv/**
+ - subworkflows/local/bam_variant_calling_deepvariant/**
+ - subworkflows/local/bam_variant_calling_freebayes/**
+ - subworkflows/local/bam_variant_calling_germline_all/**
+ - subworkflows/local/bam_variant_calling_germline_manta/**
+ - subworkflows/local/bam_variant_calling_haplotypecaller/**
+ - subworkflows/local/bam_variant_calling_mpileup/**
+ - subworkflows/local/bam_variant_calling_single_strelka/**
+ - subworkflows/local/bam_variant_calling_single_tiddit/**
+ - subworkflows/local/bam_variant_calling_somatic_all/**
+ - subworkflows/local/bam_variant_calling_tumor_only_all/**
+ - subworkflows/local/post_variantcalling/**
+ - subworkflows/local/vcf_normalization/**
+ - tests/csv/3.0/mapped_joint_bam.csv
+ - tests/test_normalize_vcfs.yml
+
# sampleqc
## ngscheckmate
diff --git a/tests/test_concat_germline_vcfs.yml b/tests/test_concat_germline_vcfs.yml
index 97a2453fbc..bbc1ca3113 100644
--- a/tests/test_concat_germline_vcfs.yml
+++ b/tests/test_concat_germline_vcfs.yml
@@ -4,19 +4,9 @@
- concatenate_vcfs
files:
- path: results/variant_calling/concat/testN/testN.germline.vcf.gz
- # binary changes md5sums on reruns
- contains:
- [
- "SOURCE=testN.deepvariant.vcf.gz",
- "AB=0.167832;ABP=277.102;AC=1;AF=0.5;AN=2;AO=48;CIGAR=1X;DP=286;DPB=286;DPRA=0;EPP=3.0103;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=105.855;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=2017;QR=9863;RO=238;RPL=0;RPP=107.241;RPPR=519.821;RPR=48;RUN=1;SAF=24;SAP=3.0103;SAR=24;SRF=119;SRP=3.0103;SRR=119;TYPE=snp;technology.illumina=1;",
- "SOURCE=testN.freebayes.vcf.gz",
- "SNVHPOL=7;MQ=60;",
- "SOURCE=testN.strelka.variants.vcf.gz",
- "SOURCE=testN.bcftools.vcf.gz",
- ]
- path: results/variant_calling/concat/testT/testT.germline.vcf.gz
- # binary changes md5sums on reruns
+ # binary changes md5sums on reruns
- path: results/variant_calling/concat/testN/testN.germline.vcf.gz.tbi
- # binary changes md5sums on reruns
+ # binary changes md5sums on reruns
- path: results/variant_calling/concat/testT/testT.germline.vcf.gz.tbi
- # binary changes md5sums on reruns
+ # binary changes md5sums on reruns
diff --git a/tests/test_normalize_vcfs.yml b/tests/test_normalize_vcfs.yml
new file mode 100644
index 0000000000..d2e6813fd3
--- /dev/null
+++ b/tests/test_normalize_vcfs.yml
@@ -0,0 +1,33 @@
+- name: Run all variant callers and check for existence of normalized vcf-files
+ command: nextflow run main.nf -profile test --input ./tests/csv/3.0/mapped_joint_bam.csv --normalize_vcfs --tools deepvariant,freebayes,haplotypecaller,manta,mpileup,strelka,tiddit --step variant_calling --outdir results
+ tags:
+ - normalize_vcfs
+ files:
+ - path: results/variant_calling/normalized/testN/testN.deepvariant.norm.vcf.gz
+ - path: results/variant_calling/normalized/testT/testT.deepvariant.norm.vcf.gz
+ - path: results/variant_calling/normalized/testN/testN.deepvariant.norm.vcf.gz.tbi
+ - path: results/variant_calling/normalized/testT/testT.deepvariant.norm.vcf.gz.tbi
+ - path: results/variant_calling/normalized/testN/testN.freebayes.norm.vcf.gz
+ - path: results/variant_calling/normalized/testT/testT.freebayes.norm.vcf.gz
+ - path: results/variant_calling/normalized/testN/testN.freebayes.norm.vcf.gz.tbi
+ - path: results/variant_calling/normalized/testT/testT.freebayes.norm.vcf.gz.tbi
+ - path: results/variant_calling/normalized/testN/testN.haplotypecaller.norm.vcf.gz
+ - path: results/variant_calling/normalized/testT/testT.haplotypecaller.norm.vcf.gz
+ - path: results/variant_calling/normalized/testN/testN.haplotypecaller.norm.vcf.gz.tbi
+ - path: results/variant_calling/normalized/testT/testT.haplotypecaller.norm.vcf.gz.tbi
+ - path: results/variant_calling/normalized/testN/testN.manta.norm.vcf.gz
+ - path: results/variant_calling/normalized/testT/testT.manta.norm.vcf.gz
+ - path: results/variant_calling/normalized/testN/testN.manta.norm.vcf.gz.tbi
+ - path: results/variant_calling/normalized/testT/testT.manta.norm.vcf.gz.tbi
+ - path: results/variant_calling/normalized/testN/testN.bcftools.norm.vcf.gz
+ - path: results/variant_calling/normalized/testT/testT.bcftools.norm.vcf.gz
+ - path: results/variant_calling/normalized/testN/testN.bcftools.norm.vcf.gz.tbi
+ - path: results/variant_calling/normalized/testT/testT.bcftools.norm.vcf.gz.tbi
+ - path: results/variant_calling/normalized/testN/testN.strelka.norm.vcf.gz
+ - path: results/variant_calling/normalized/testT/testT.strelka.norm.vcf.gz
+ - path: results/variant_calling/normalized/testN/testN.strelka.norm.vcf.gz.tbi
+ - path: results/variant_calling/normalized/testT/testT.strelka.norm.vcf.gz.tbi
+ - path: results/variant_calling/normalized/testN/testN.tiddit.norm.vcf.gz
+ - path: results/variant_calling/normalized/testT/testT.tiddit.norm.vcf.gz
+ - path: results/variant_calling/normalized/testN/testN.tiddit.norm.vcf.gz.tbi
+ - path: results/variant_calling/normalized/testT/testT.tiddit.norm.vcf.gz.tbi
diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf
index f554cd9ddd..9c852ca2be 100644
--- a/workflows/sarek/main.nf
+++ b/workflows/sarek/main.nf
@@ -778,21 +778,32 @@ workflow SAREK {
// POST VARIANTCALLING
POST_VARIANTCALLING(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all,
- params.concatenate_vcfs)
+ BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.vcf_all,
+ BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all,
+ fasta,
+ params.concatenate_vcfs,
+ params.normalize_vcfs)
// Gather vcf files for annotation and QC
vcf_to_annotate = Channel.empty()
- vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_deepvariant)
- vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_freebayes)
- vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_haplotypecaller)
- vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_manta)
- vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_sentieon_dnascope)
- vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_sentieon_haplotyper)
- vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_strelka)
- vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_tiddit)
- vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_mpileup)
- vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.vcf_all)
- vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all)
+
+ // Check if normalization is requested
+ if (params.normalize_vcfs) {
+ vcf_to_annotate = vcf_to_annotate.mix(POST_VARIANTCALLING.out.vcfs)
+ } else {
+ // If not normalized, gather existing VCFs
+ vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_deepvariant)
+ vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_freebayes)
+ vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_haplotypecaller)
+ vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_manta)
+ vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_sentieon_dnascope)
+ vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_sentieon_haplotyper)
+ vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_strelka)
+ vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_tiddit)
+ vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_mpileup)
+ vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.vcf_all)
+ vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all)
+ }
// QC
VCF_QC_BCFTOOLS_VCFTOOLS(vcf_to_annotate, intervals_bed_combined)