diff --git a/CHANGELOG.md b/CHANGELOG.md
index f251b17d20..058f5c97f9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ A set of connecting glaciers.
### Added
+- [1613](https://github.com/nf-core/sarek/pull/1613) - add indexcov
- [1638](https://github.com/nf-core/sarek/pull/1638) - Added additional documentation detailing ASCAT WES usage.
- [1640](https://github.com/nf-core/sarek/pull/1620) - Add `lofreq` as a tumor-only variant caller
- [1642](https://github.com/nf-core/sarek/pull/1642) - Back to dev
diff --git a/README.md b/README.md
index 33892e0a6b..c4f7b5443b 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,7 @@ Depending on the options and samples provided, the pipeline can currently perfor
- `freebayes`
- `GATK HaplotypeCaller`
- `Manta`
+ - `indexcov`
- `mpileup`
- `MSIsensor-pro`
- `Mutect2`
@@ -171,6 +172,7 @@ We thank the following people for their extensive assistance in the development
- [pallolason](https://github.com/pallolason)
- [Paul Cantalupo](https://github.com/pcantalupo)
- [Phil Ewels](https://github.com/ewels)
+- [Pierre Lindenbaum](https://github.com/lindenb)
- [Sabrina Krakau](https://github.com/skrakau)
- [Sam Minot](https://github.com/sminot)
- [Sebastian-D](https://github.com/Sebastian-D)
diff --git a/conf/modules/indexcov.config b/conf/modules/indexcov.config
new file mode 100644
index 0000000000..082ea3b7cc
--- /dev/null
+++ b/conf/modules/indexcov.config
@@ -0,0 +1,21 @@
+
+// INDEXCOV
+
+process {
+ if (params.tools && params.tools.split(',').contains('indexcov')) {
+
+ withName: 'SAMTOOLS_REINDEX_BAM' {
+ ext.args = { ' -F 3844 -q 30 ' } // high mapq , primary read paired properly mapped
+ }
+
+ withName: 'GOLEFT_INDEXCOV' {
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/indexcov/" }
+ ]
+
+ }
+
+ }
+
+}
diff --git a/docs/images/sarek_subway.png b/docs/images/sarek_subway.png
index f2f20ffc01..a381343500 100644
Binary files a/docs/images/sarek_subway.png and b/docs/images/sarek_subway.png differ
diff --git a/docs/images/sarek_subway.svg b/docs/images/sarek_subway.svg
index 62544b70fc..6d8d172652 100644
--- a/docs/images/sarek_subway.svg
+++ b/docs/images/sarek_subway.svg
@@ -32,12 +32,12 @@
inkscape:pagecheckerboard="false"
inkscape:document-units="mm"
showgrid="true"
- inkscape:zoom="0.61695405"
- inkscape:cx="709.12898"
- inkscape:cy="403.59569"
- inkscape:window-width="1440"
- inkscape:window-height="847"
- inkscape:window-x="0"
+ inkscape:zoom="1.8101934"
+ inkscape:cx="659.04562"
+ inkscape:cy="459.61941"
+ inkscape:window-width="2560"
+ inkscape:window-height="1027"
+ inkscape:window-x="1512"
inkscape:window-y="25"
inkscape:window-maximized="1"
inkscape:current-layer="layer4"
@@ -795,13 +795,27 @@
id="rect6693-4"
style="display:inline;fill:#e6e6e6;fill-opacity:1;stroke:none;stroke-width:4.00201;stroke-linecap:butt;stroke-linejoin:bevel;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:normal" />indexcovdeepvariantfreebayeshaplotypecallerstrelka2tidditmutect2freebayesmantalofreqlofreqExample analysis pathwaysmpileupmpileupSentieon haplotyperSentieon dnascopeSNPs & IndelsMSI
+ style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.00004;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" />strelka2
diff --git a/docs/output.md b/docs/output.md
index 6204ada6a5..6d723ba03b 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -45,6 +45,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [Strelka](#strelka)
- [Lofreq](#lofreq)
- [Structural Variants](#structural-variants)
+ - [Indexcov](#indexcov)
- [Manta](#manta)
- [TIDDIT](#tiddit)
- [Sample heterogeneity, ploidy and CNVs](#sample-heterogeneity-ploidy-and-cnvs)
@@ -592,6 +593,30 @@ For further downstream analysis, take a look [here](https://github.com/Illumina/
### Structural Variants
+#### indexcov
+
+[indexcov](https://github.com/brentp/goleft/tree/master/indexcov) quickly estimate coverage from a whole-genome bam or cram index.
+A bam index has 16KB resolution and it is used as a coverage estimate .
+The output is scaled to around 1. So a long stretch with values of 1.5 would be a heterozygous duplication. This is useful as a quick QC to get coverage values across the genome.
+
+**Output directory: `{outdir}/variantcalling/indexcov/`**
+
+In addition to the interactive HTML files, `indexcov` outputs a number of text files:
+
+- `-indexcov.ped`: a .ped/.fam file with the inferred sex in the appropriate column if the sex chromosomes were found.
+ the CNX and CNY columns indicating the floating-point estimate of copy-number for those chromosomes.
+ `bins.out`: how many bins had a coverage value outside of (0.85, 1.15). high values can indicate high-bias samples.
+ `bins.lo`: number of bins with value < 0.15. high values indicate missing data.
+ `bins.hi`: number of bins with value > 1.15.
+ `bins.in`: number of bins with value inside of (0.85, 1.15)
+ `p.out`: `bins.out/bins.in`
+ `PC1...PC5`: PCA projections calculated with depth of autosomes.
+
+- `-indexcov.roc`: tab-delimited columns of chrom, scaled coverage cutoff, and $n_samples columns where each indicates the
+ proportion of 16KB blocks at or above that scaled coverage value.
+- `-indexcov.bed.gz`: a bed file with columns of chrom, start, end, and a column per sample where the values indicate there
+ scaled coverage for that sample in that 16KB chunk.
+
#### Manta
[Manta](https://github.com/Illumina/manta) calls structural variants (SVs) and indels from mapped paired-end sequencing reads.
diff --git a/docs/usage.md b/docs/usage.md
index bedacabd11..0a279cd6c5 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -585,6 +585,7 @@ This list is by no means exhaustive and it will depend on the specific analysis
| [mpileup](https://www.htslib.org/doc/samtools-mpileup.html) | x | x | x | x | x | - |
| [Strelka](https://github.com/Illumina/strelka) | x | x | x | x | - | x |
| [Manta](https://github.com/Illumina/manta) | x | x | x | x | x | x |
+| [indexcov](https://github.com/brentp/goleft/tree/master/indexcov) | x | - | - | x | - | x |
| [TIDDIT](https://github.com/SciLifeLab/TIDDIT) | x | x | x | x | x | x |
| [ASCAT](https://github.com/VanLoo-lab/ascat) | x | x | - | - | - | x |
| [CNVKit](https://cnvkit.readthedocs.io/en/stable/) | x | x | - | x | x | x |
@@ -921,30 +922,30 @@ nextflow run nf-core/sarek --known_indels false --genome GRCh38.GATK
For GATK.GRCh38 the links for each reference file and the corresponding processes that use them is listed below. For GATK.GRCh37 the files originate from the same sources:
-| File | Tools | Origin | Docs |
-| :-------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------- |
-| ascat_alleles | ASCAT | https://www.dropbox.com/s/uouszfktzgoqfy7/G1000_alleles_hg38.zip | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS |
-| ascat_loci | ASCAT | https://www.dropbox.com/s/80cq0qgao8l1inj/G1000_loci_hg38.zip | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS |
-| ascat_loci_gc | ASCAT | https://www.dropbox.com/s/80cq0qgao8l1inj/G1000_loci_hg38.zip | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS |
-| ascat_loci_rt | ASCAT | https://www.dropbox.com/s/xlp99uneqh6nh6p/RT_G1000_hg38.zip | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS |
-| bwa | bwa-mem | `bwa index -p bwa/${fasta.baseName} $fasta` | |
-| bwamem2 | bwa-mem2 | `bwa-mem2 index -p bwamem2/${fasta} $fasta` | |
-| dragmap | DragMap | `dragen-os --build-hash-table true --ht-reference $fasta --output-directory dragmap` | |
-| dbsnp | Baserecalibrator, ControlFREEC, GenotypeGVCF, HaplotypeCaller | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle |
-| dbsnp_tbi | Baserecalibrator, ControlFREEC, GenotypeGVCF, HaplotypeCaller | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
-| dict | Baserecalibrator(Spark), CNNScoreVariant, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, MarkDulpicates(Spark), MergeVCFs, Mutect2, Variantrecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle |
-| fasta | ApplyBQSR(Spark), ApplyVQSR, ASCAT, Baserecalibrator(Spark), BWA, BWAMem2, CNNScoreVariant, CNVKit, ControlFREEC, DragMap, DEEPVariant, EnsemblVEP, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, FreeBayes, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, interval building, Manta, MarkDuplicates(Spark),MergeVCFs,MSISensorPro, Mutect2, Samtools, SnpEff, Strelka, Tiddit, Variantrecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle |
-| fasta_fai | ApplyBQSR(Spark), ApplyVQSR, ASCAT, Baserecalibrator(Spark), BWA, BWAMem2, CNNScoreVariant, CNVKit, ControlFREEC, DragMap, DEEPVariant, EnsemblVEP, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, FreeBayes, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, interval building, Manta, MarkDuplicates(Spark),MergeVCFs,MSISensorPro, Mutect2, Samtools, SnpEff, Strelka, Tiddit, Variantrecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle |
-| germline_resource | GetPileupsummaries,Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
-| germline_resource_tbi | GetPileupsummaries,Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
-| intervals | ApplyBQSR(Spark), ASCAT, Baserecalibrator(Spark), BCFTools, CNNScoreVariants, ControlFREEC, Deepvariant, FilterVariantTranches, FreeBayes, GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, Strelka, mpileup, MSISensorPro, Mutect2, VCFTools | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
-| known_indels | BaseRecalibrator(Spark), FilterVariantTranches | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
-| known_indels_tbi | BaseRecalibrator(Spark), FilterVariantTranches | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
-| known_snps | BaseRecalibrator(Spark), FilterVariantTranches, VariantRecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
-| known_snps_tbi | BaseRecalibrator(Spark), FilterVariantTranches, VariantRecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) |
-| mappability | ControlFREEC | http://xfer.curie.fr/get/vyIi4w8EONl/out100m2_hg38.zip | http://boevalab.inf.ethz.ch/FREEC/tutorial.html |
-| pon | Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- |
-| pon_tbi | Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- |
+| File | Tools | Origin | Docs |
+| :-------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------- |
+| ascat_alleles | ASCAT | https://www.dropbox.com/s/uouszfktzgoqfy7/G1000_alleles_hg38.zip | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS |
+| ascat_loci | ASCAT | https://www.dropbox.com/s/80cq0qgao8l1inj/G1000_loci_hg38.zip | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS |
+| ascat_loci_gc | ASCAT | https://www.dropbox.com/s/80cq0qgao8l1inj/G1000_loci_hg38.zip | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS |
+| ascat_loci_rt | ASCAT | https://www.dropbox.com/s/xlp99uneqh6nh6p/RT_G1000_hg38.zip | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS |
+| bwa | bwa-mem | `bwa index -p bwa/${fasta.baseName} $fasta` | |
+| bwamem2 | bwa-mem2 | `bwa-mem2 index -p bwamem2/${fasta} $fasta` | |
+| dragmap | DragMap | `dragen-os --build-hash-table true --ht-reference $fasta --output-directory dragmap` | |
+| dbsnp | Baserecalibrator, ControlFREEC, GenotypeGVCF, HaplotypeCaller | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle |
+| dbsnp_tbi | Baserecalibrator, ControlFREEC, GenotypeGVCF, HaplotypeCaller | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
+| dict | Baserecalibrator(Spark), CNNScoreVariant, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, MarkDulpicates(Spark), MergeVCFs, Mutect2, Variantrecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle |
+| fasta | ApplyBQSR(Spark), ApplyVQSR, ASCAT, Baserecalibrator(Spark), BWA, BWAMem2, CNNScoreVariant, CNVKit, ControlFREEC, DragMap, DEEPVariant, EnsemblVEP, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, FreeBayes, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, indexcov, interval building, Manta, MarkDuplicates(Spark),MergeVCFs,MSISensorPro, Mutect2, Samtools, SnpEff, Strelka, Tiddit, Variantrecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle |
+| fasta_fai | ApplyBQSR(Spark), ApplyVQSR, ASCAT, Baserecalibrator(Spark), BWA, BWAMem2, CNNScoreVariant, CNVKit, ControlFREEC, DragMap, DEEPVariant, EnsemblVEP, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, FreeBayes, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, indexcov, interval building, Manta, MarkDuplicates(Spark),MergeVCFs,MSISensorPro, Mutect2, Samtools, SnpEff, Strelka, Tiddit, Variantrecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle |
+| germline_resource | GetPileupsummaries,Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
+| germline_resource_tbi | GetPileupsummaries,Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
+| intervals | ApplyBQSR(Spark), ASCAT, Baserecalibrator(Spark), BCFTools, CNNScoreVariants, ControlFREEC, Deepvariant, FilterVariantTranches, FreeBayes, GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, Strelka, mpileup, MSISensorPro, Mutect2, VCFTools | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
+| known_indels | BaseRecalibrator(Spark), FilterVariantTranches | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
+| known_indels_tbi | BaseRecalibrator(Spark), FilterVariantTranches | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
+| known_snps | BaseRecalibrator(Spark), FilterVariantTranches, VariantRecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | |
+| known_snps_tbi | BaseRecalibrator(Spark), FilterVariantTranches, VariantRecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) |
+| mappability | ControlFREEC | http://xfer.curie.fr/get/vyIi4w8EONl/out100m2_hg38.zip | http://boevalab.inf.ethz.ch/FREEC/tutorial.html |
+| pon | Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- |
+| pon_tbi | Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- |
## How to customise SnpEff and VEP annotation
diff --git a/modules.json b/modules.json
index 1b00aac7e4..ad4fd57616 100644
--- a/modules.json
+++ b/modules.json
@@ -310,6 +310,11 @@
"git_sha": "97321eded31a12598837a476d3615300af413bb7",
"installed_by": ["modules"]
},
+ "goleft/indexcov": {
+ "branch": "master",
+ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
+ "installed_by": ["modules"]
+ },
"lofreq/callparallel": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
diff --git a/modules/local/samtools/reindex_bam/environment.yml b/modules/local/samtools/reindex_bam/environment.yml
new file mode 100644
index 0000000000..da2df5e43a
--- /dev/null
+++ b/modules/local/samtools/reindex_bam/environment.yml
@@ -0,0 +1,6 @@
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::samtools=1.20
+ - bioconda::htslib=1.20
diff --git a/modules/local/samtools/reindex_bam/main.nf b/modules/local/samtools/reindex_bam/main.nf
new file mode 100644
index 0000000000..153f9093d6
--- /dev/null
+++ b/modules/local/samtools/reindex_bam/main.nf
@@ -0,0 +1,57 @@
+/**
+ * The aim of this process is to re-index the bam file without the duplicate, supplementary, unmapped etc, for goleft/indexcov
+ * It creates a BAM containing only a header (so indexcov can get the sample name) and a BAM index were low quality reads, supplementary etc, have been removed
+ */
+process SAMTOOLS_REINDEX_BAM {
+ tag "$meta.id"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' :
+ 'biocontainers/samtools:1.20--h50ea8bc_0' }"
+
+ input:
+ tuple val(meta), path(input), path(input_index)
+ tuple val(meta2), path(fasta)
+ tuple val(meta3), path(fai)
+
+ output:
+ tuple val(meta), path("${meta.id}.reindex.bam"), path("${meta.id}.reindex.bam.bai"),emit: output
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def reference = fasta ? "--reference ${fasta}" : ""
+ """
+ # write header only
+ samtools \\
+ view \\
+ --header-only \\
+ --threads ${task.cpus} \\
+ -O BAM \\
+ -o "${meta.id}.reindex.bam" \\
+ ${reference} \\
+ ${input}
+
+ # write BAM index only, remove unmapped, supplementary, etc...
+ samtools \\
+ view \\
+ --uncompressed \\
+ --write-index \\
+ --threads ${task.cpus} \\
+ -O BAM \\
+ -o "/dev/null##idx##${meta.id}.reindex.bam.bai" \\
+ ${reference} \\
+ ${args} \\
+ ${input}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/goleft/indexcov/environment.yml b/modules/nf-core/goleft/indexcov/environment.yml
new file mode 100644
index 0000000000..813146929c
--- /dev/null
+++ b/modules/nf-core/goleft/indexcov/environment.yml
@@ -0,0 +1,6 @@
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::goleft=0.2.4
+ - bioconda::htslib=1.12
diff --git a/modules/nf-core/goleft/indexcov/main.nf b/modules/nf-core/goleft/indexcov/main.nf
new file mode 100644
index 0000000000..5d0ed5dfb0
--- /dev/null
+++ b/modules/nf-core/goleft/indexcov/main.nf
@@ -0,0 +1,65 @@
+process GOLEFT_INDEXCOV {
+ tag "${meta.id}"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/goleft:0.2.4--h9ee0642_1':
+ 'biocontainers/goleft:0.2.4--h9ee0642_1' }"
+
+ input:
+ tuple val(meta), path(bams), path(indexes)
+ tuple val(meta2), path(fai)
+
+ output:
+ tuple val(meta), path("${prefix}/*") , emit: output
+ tuple val(meta), path("${prefix}/*ped") , emit: ped , optional: true
+ tuple val(meta), path("${prefix}/*bed.gz") , emit: bed , optional: true
+ tuple val(meta), path("${prefix}/*bed.gz.tbi"), emit: bed_index , optional: true
+ tuple val(meta), path("${prefix}/*roc") , emit: roc , optional: true
+ tuple val(meta), path("${prefix}/*html") , emit: html, optional: true
+ tuple val(meta), path("${prefix}/*png") , emit: png , optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+ // indexcov uses BAM files or CRAI
+ def input_files = bams.findAll{it.name.endsWith(".bam")} + indexes.findAll{it.name.endsWith(".crai")}
+ def extranormalize = input_files.any{it.name.endsWith(".crai")} ? " --extranormalize " : ""
+ """
+ goleft indexcov \\
+ --fai ${fai} \\
+ --directory ${prefix} \\
+ ${extranormalize} \\
+ $args \\
+ ${input_files.join(" ")}
+
+ if [ -f "${prefix}/${prefix}-indexcov.bed.gz" ] ; then
+ tabix -p bed "${prefix}/${prefix}-indexcov.bed.gz"
+ fi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ goleft: \$(goleft --version 2>&1 | head -n 1 | sed 's/^.*goleft Version: //')
+ tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
+ END_VERSIONS
+ """
+ stub:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ mkdir "${prefix}"
+ echo "" | gzip > "${prefix}/${prefix}-indexcov.bed.gz"
+ touch "${prefix}/${prefix}-indexcov.bed.gz.tbi"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ goleft: \$(goleft --version 2>&1 | head -n 1 | sed 's/^.*goleft Version: //')
+ tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/goleft/indexcov/meta.yml b/modules/nf-core/goleft/indexcov/meta.yml
new file mode 100644
index 0000000000..1619caf32d
--- /dev/null
+++ b/modules/nf-core/goleft/indexcov/meta.yml
@@ -0,0 +1,122 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+name: "goleft_indexcov"
+description: Quickly estimate coverage from a whole-genome bam or cram index. A bam
+ index has 16KB resolution so that's what this gives, but it provides what appears
+ to be a high-quality coverage estimate in seconds per genome.
+keywords:
+ - coverage
+ - cnv
+ - genomics
+ - depth
+tools:
+ - "goleft":
+ description: "goleft is a collection of bioinformatics tools distributed under
+ MIT license in a single static binary"
+ homepage: "https://github.com/brentp/goleft"
+ documentation: "https://github.com/brentp/goleft"
+ tool_dev_url: "https://github.com/brentp/goleft"
+ doi: "10.1093/gigascience/gix090"
+ licence: ["MIT"]
+ identifier: ""
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false]
+ - bams:
+ type: file
+ description: Sorted BAM/CRAM/SAM files
+ pattern: "*.{bam,cram,sam}"
+ - indexes:
+ type: file
+ description: BAI/CRAI files
+ pattern: "*.{bai,crai}"
+ - - meta2:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false]
+ - fai:
+ type: file
+ description: FASTA index
+ pattern: "*.{fai}"
+output:
+ - output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}/*:
+ type: file
+ description: Files generated by indexcov
+ - ped:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}/*ped:
+ type: file
+ description: ped files
+ pattern: "*ped"
+ - bed:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}/*bed.gz:
+ type: file
+ description: bed files
+ pattern: "*bed.gz"
+ - bed_index:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}/*bed.gz.tbi:
+ type: file
+ description: bed index files
+ pattern: "*bed.gz.tbi"
+ - roc:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}/*roc:
+ type: file
+ description: roc files
+ pattern: "*roc"
+ - html:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}/*html:
+ type: file
+ description: html files
+ pattern: "*html"
+ - png:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}/*png:
+ type: file
+ description: png files
+ pattern: "*png"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@lindenb"
+maintainers:
+ - "@lindenb"
diff --git a/modules/nf-core/goleft/indexcov/tests/main.nf.test b/modules/nf-core/goleft/indexcov/tests/main.nf.test
new file mode 100644
index 0000000000..1296c644cd
--- /dev/null
+++ b/modules/nf-core/goleft/indexcov/tests/main.nf.test
@@ -0,0 +1,131 @@
+nextflow_process {
+
+ name "Test Process GOLEFT_INDEXCOV"
+ script "../main.nf"
+ process "GOLEFT_INDEXCOV"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "goleft"
+ tag "goleft/indexcov"
+
+ test("sarscov2 - bam") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test' ], // meta map
+ [
+ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam", checkIfExists: true),
+ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true)
+ ],
+ [
+ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai", checkIfExists: true),
+ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true)
+ ],
+ ])
+
+ input[1] = Channel.of(
+ [
+ [:],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta.fai", checkIfExists: true)
+ ]
+ )
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.ped,
+ process.out.bed,
+ file(process.out.bed_index[0][1]).name,
+ process.out.roc,
+ process.out.html,
+ process.out.png,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+
+ test("sarscov2 - crai") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test' ], // meta map
+ [
+ file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram", checkIfExists: true),
+ file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram", checkIfExists: true)
+ ],
+ [
+ file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram.crai", checkIfExists: true),
+ file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai", checkIfExists: true)
+ ]
+ ])
+
+ input[1] = Channel.of(
+ [
+ [:],
+ file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai", checkIfExists: true)
+ ]
+ )
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.ped,
+ process.out.bed,
+ file(process.out.bed_index[0][1]).name,
+ process.out.roc,
+ process.out.html,
+ process.out.png,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test' ], // meta map
+ [],
+ []
+ ])
+
+ input[1] = Channel.of([
+ [:],
+ []
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/goleft/indexcov/tests/main.nf.test.snap b/modules/nf-core/goleft/indexcov/tests/main.nf.test.snap
new file mode 100644
index 0000000000..1c79232db0
--- /dev/null
+++ b/modules/nf-core/goleft/indexcov/tests/main.nf.test.snap
@@ -0,0 +1,205 @@
+{
+ "sarscov2 - crai": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test-indexcov.ped:md5,8737714b6ea160e06d5282391f89f791"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test-indexcov.bed.gz:md5,04aa3637cffca5d99316df7741c06589"
+ ]
+ ],
+ "test-indexcov.bed.gz.tbi",
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test-indexcov.roc:md5,548b76fdf16e97768b0c9b8ecbfd5bef"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ [
+ "index.html:md5,41840ede180b20cdf6074c431269929e",
+ "test-indexcov-depth-chr21.html:md5,4c839b03f2f41e3fdca5642903c35008",
+ "test-indexcov-roc-chr21.html:md5,f84b547328a23196f16f71d093eb7450"
+ ]
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ [
+ "test-indexcov-depth-chr21.png:md5,1999b0bf1cd0680f6d107d438e7257cf",
+ "test-indexcov-roc-chr21.png:md5,41f1460535b255fff053da59fcccf698"
+ ]
+ ]
+ ],
+ [
+ "versions.yml:md5,f9c06c1c05a2a31854b4e04e449a24c5"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-08-22T06:40:17.142801459"
+ },
+ "sarscov2 - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ [
+ "test-indexcov.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test-indexcov.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "test-indexcov.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test"
+ },
+ "test-indexcov.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+
+ ],
+ "5": [
+
+ ],
+ "6": [
+
+ ],
+ "7": [
+ "versions.yml:md5,f9c06c1c05a2a31854b4e04e449a24c5"
+ ],
+ "bed": [
+ [
+ {
+ "id": "test"
+ },
+ "test-indexcov.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "bed_index": [
+ [
+ {
+ "id": "test"
+ },
+ "test-indexcov.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "html": [
+
+ ],
+ "output": [
+ [
+ {
+ "id": "test"
+ },
+ [
+ "test-indexcov.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test-indexcov.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "ped": [
+
+ ],
+ "png": [
+
+ ],
+ "roc": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,f9c06c1c05a2a31854b4e04e449a24c5"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-08-22T06:44:59.203730744"
+ },
+ "sarscov2 - bam": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test-indexcov.ped:md5,da2bd9882474d2f00f8ad2ab20b140c9"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test-indexcov.bed.gz:md5,eab7a78287e261d600c06def12a33029"
+ ]
+ ],
+ "test-indexcov.bed.gz.tbi",
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test-indexcov.roc:md5,3f460308bb86203d1ada71b7c84d995d"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "index.html:md5,d1cc28023cd827446e0f9c905c94fe3e"
+ ]
+ ],
+ [
+
+ ],
+ [
+ "versions.yml:md5,f9c06c1c05a2a31854b4e04e449a24c5"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-08-22T06:39:48.470187823"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/goleft/indexcov/tests/tags.yml b/modules/nf-core/goleft/indexcov/tests/tags.yml
new file mode 100644
index 0000000000..c27c4b9d5e
--- /dev/null
+++ b/modules/nf-core/goleft/indexcov/tests/tags.yml
@@ -0,0 +1,2 @@
+goleft/indexcov:
+ - "modules/nf-core/goleft/indexcov/**"
diff --git a/nextflow.config b/nextflow.config
index d3baa4fdb4..1268aab63a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -460,6 +460,7 @@ includeConfig 'conf/modules/controlfreec.config'
includeConfig 'conf/modules/deepvariant.config'
includeConfig 'conf/modules/freebayes.config'
includeConfig 'conf/modules/haplotypecaller.config'
+includeConfig 'conf/modules/indexcov.config'
includeConfig 'conf/modules/joint_germline.config'
includeConfig 'conf/modules/manta.config'
includeConfig 'conf/modules/mpileup.config'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 2e66ccdf53..5cdf35d555 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -111,8 +111,8 @@
"type": "string",
"fa_icon": "fas fa-toolbox",
"description": "Tools to use for duplicate marking, variant calling and/or for annotation.",
- "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Lofreq, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.",
- "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|lofreq|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.",
+ "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|lofreq|sentieon_dnascope|sentieon_haplotyper|manta|indexcov|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? [ counts ] })
reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_qual.collect{ meta, qual -> [ qual ] })
reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_filter_summary.collect{ meta, summary -> [ summary ] })
+ reports = reports.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.out_indexcov.collect{ meta, indexcov -> indexcov.flatten() })
+ reports = reports.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.out_indexcov.collect{ meta, indexcov -> indexcov.flatten() })
CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_annotate, params.outdir)