diff --git a/CHANGELOG.md b/CHANGELOG.md index 6db667ce..fef1c805 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -- Restrict deepvariant analysis of WES samples to bait regions [#633](https://github.com/nf-core/raredisease/pull/633) +- Restrict deepvariant analysis of WES samples to bait regions [#633](https://github.com/nf-core/raredisease/pull/633), [#658](https://github.com/nf-core/raredisease/pull/658) - bcftools annotate declaration in annotate CADD subworkflow [#624](https://github.com/nf-core/raredisease/pull/624) - Rhocallviz subworkflow will only be invocated once per sample [#621](https://github.com/nf-core/raredisease/pull/621) - Updated createCaseChannel function to include a check for maternal and paternal ids being set to a numeric 0 [#643](https://github.com/nf-core/raredisease/pull/643) diff --git a/CITATIONS.md b/CITATIONS.md index c28f2ec2..27a30628 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -16,6 +16,10 @@ > Danecek P, Bonfield JK, Liddle J, et al. Twelve years of SAMtools and BCFtools. GigaScience. 2021;10(2):giab008. doi:10.1093/gigascience/giab008 +- [BEDTools](https://academic.oup.com/bioinformatics/article/26/6/841/244688) + + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010;26(6):841-842. doi:10.1093/bioinformatics/btq033 + - [BWA-MEM](https://arxiv.org/abs/1303.3997) > Li H. Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. Published online May 26, 2013. Accessed March 14, 2023. http://arxiv.org/abs/1303.3997 diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index 89a2a1eb..894b5959 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -104,6 +104,16 @@ process { ext.args2 = '--csi' } + withName: '.*PREPARE_REFERENCES:BEDTOOLS_PAD_TARGET_BED' { + ext.when = { !params.target_bed.equals(null) && params.bait_padding > 0 } + ext.prefix = { "${meta.id}_pad${params.bait_padding}" } + ext.args = { "-b ${params.bait_padding}" } + } + + withName: '.*PREPARE_REFERENCES:TABIX_BGZIPINDEX_PADDED_BED' { + ext.prefix = { "${meta.id}_pad${params.bait_padding}" } + } + withName: '.*PREPARE_REFERENCES:GATK_BILT' { ext.when = { !params.target_bed.equals(null) } ext.prefix = { "${meta.id}_target" } diff --git a/modules.json b/modules.json index ad388fcf..82de9d5b 100644 --- a/modules.json +++ b/modules.json @@ -55,6 +55,11 @@ "git_sha": "bfa8975eefb8df3e480a44ac9e594f23f52b2963", "installed_by": ["modules"] }, + "bedtools/slop": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "bwa/index": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/nf-core/bedtools/slop/environment.yml b/modules/nf-core/bedtools/slop/environment.yml new file mode 100644 index 00000000..5683bc05 --- /dev/null +++ b/modules/nf-core/bedtools/slop/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/slop/main.nf b/modules/nf-core/bedtools/slop/main.nf new file mode 100644 index 00000000..e5b8e1ef --- /dev/null +++ b/modules/nf-core/bedtools/slop/main.nf @@ -0,0 +1,49 @@ +process BEDTOOLS_SLOP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(bed) + path sizes + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + slop \\ + -i $bed \\ + -g $sizes \\ + $args \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/slop/meta.yml b/modules/nf-core/bedtools/slop/meta.yml new file mode 100644 index 00000000..762db534 --- /dev/null +++ b/modules/nf-core/bedtools/slop/meta.yml @@ -0,0 +1,51 @@ +name: bedtools_slop +description: Adds a specified number of bases in each direction (unique values may + be specified for either -l or -r) +keywords: + - bed + - slopBed + - bedtools +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/slop.html + licence: ["MIT"] + identifier: biotools:bedtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Input BED file + pattern: "*.{bed}" + - - sizes: + type: file + description: Chromosome sizes file +output: + - bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bed": + type: file + description: Slopped BED file + pattern: "*.{bed}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" +maintainers: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" diff --git a/modules/nf-core/bedtools/slop/tests/main.nf.test b/modules/nf-core/bedtools/slop/tests/main.nf.test new file mode 100644 index 00000000..c8dccc8e --- /dev/null +++ b/modules/nf-core/bedtools/slop/tests/main.nf.test @@ -0,0 +1,36 @@ + +nextflow_process { + + name "Test Process BEDTOOLS_SLOP" + script "../main.nf" + process "BEDTOOLS_SLOP" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/slop" + + test("test-bedtools-slop") { + + when { + process { + """ + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/bedtools/slop/tests/main.nf.test.snap b/modules/nf-core/bedtools/slop/tests/main.nf.test.snap new file mode 100644 index 00000000..899ac21b --- /dev/null +++ b/modules/nf-core/bedtools/slop/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test-bedtools-slop": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_out.bed:md5,4f1d8924925fe5d205c9e1981fe290a4" + ] + ], + "1": [ + "versions.yml:md5,ee6210f0a2c4a60d9cad324bfe18e0cf" + ], + "bed": [ + [ + { + "id": "test" + }, + "test_out.bed:md5,4f1d8924925fe5d205c9e1981fe290a4" + ] + ], + "versions": [ + "versions.yml:md5,ee6210f0a2c4a60d9cad324bfe18e0cf" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-26T13:52:04.945029" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/slop/tests/nextflow.config b/modules/nf-core/bedtools/slop/tests/nextflow.config new file mode 100644 index 00000000..fef75481 --- /dev/null +++ b/modules/nf-core/bedtools/slop/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: BEDTOOLS_SLOP { + ext.args = '-l 15 -r 30' + ext.prefix = { "${meta.id}_out" } + } +} diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 99b291c9..2ea912a1 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -2,6 +2,7 @@ // Prepare reference files // +include { BEDTOOLS_SLOP as BEDTOOLS_PAD_TARGET_BED } from '../../modules/nf-core/bedtools/slop/main' include { BWA_INDEX as BWA_INDEX_GENOME } from '../../modules/nf-core/bwa/index/main' include { BWA_INDEX as BWA_INDEX_MT } from '../../modules/nf-core/bwa/index/main' include { BWA_INDEX as BWA_INDEX_MT_SHIFT } from '../../modules/nf-core/bwa/index/main' @@ -24,6 +25,7 @@ include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_GENOME } from '../../modul include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT } from '../../modules/nf-core/sentieon/bwaindex/main' include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT_SHIFT } from '../../modules/nf-core/sentieon/bwaindex/main' include { TABIX_BGZIPTABIX as TABIX_PBT } from '../../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_BGZIPTABIX as TABIX_BGZIPINDEX_PADDED_BED } from '../../modules/nf-core/tabix/bgziptabix/main' include { TABIX_BGZIPTABIX as TABIX_BGZIPINDEX_VCFANNOEXTRA } from '../../modules/nf-core/tabix/bgziptabix/main' include { TABIX_TABIX as TABIX_VCFANNOEXTRA } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_DBSNP } from '../../modules/nf-core/tabix/tabix/main' @@ -97,8 +99,18 @@ workflow PREPARE_REFERENCES { // Vcf, tab and bed indices TABIX_DBSNP(ch_known_dbsnp) TABIX_GNOMAD_AF(ch_gnomad_af_tab) - TABIX_PT(ch_target_bed).tbi.set { ch_tbi } - TABIX_PBT(ch_target_bed).gz_tbi.set { ch_bgzip_tbi } + + // Index target bed file in case of gz input + TABIX_PT(ch_target_bed) + ch_target_bed + .join(TABIX_PT.out.tbi) + .set{ ch_trgt_bed_tbi } + // Compress and index target bed file in case of uncompressed input + TABIX_PBT(ch_target_bed).gz_tbi + .set { ch_bgzip_tbi } + ch_target_bed_gz_tbi = Channel.empty() + .mix(ch_trgt_bed_tbi, ch_bgzip_tbi) + ch_vcfanno_extra_unprocessed .branch { it -> bgzipindex: !it[1].toString().endsWith(".gz") @@ -121,6 +133,15 @@ workflow PREPARE_REFERENCES { .mix(ch_vcfanno_bgzip, ch_vcfanno_index) .collect() .set{ch_vcfanno_extra} + + // Pad bed file + BEDTOOLS_PAD_TARGET_BED( + ch_target_bed, + ch_fai.map { _meta, fai -> return fai } + ) + TABIX_BGZIPINDEX_PADDED_BED(BEDTOOLS_PAD_TARGET_BED.out.bed).gz_tbi + .set { ch_target_bed_gz_tbi } + // Generate bait and target intervals GATK_BILT(ch_target_bed, ch_dict).interval_list GATK_ILT(GATK_BILT.out.interval_list) @@ -163,6 +184,8 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(TABIX_BGZIPINDEX_VCFANNOEXTRA.out.versions) ch_versions = ch_versions.mix(TABIX_VCFANNOEXTRA.out.versions) ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions) + ch_versions = ch_versions.mix(BEDTOOLS_PAD_TARGET_BED.out.versions) + ch_versions = ch_versions.mix(TABIX_BGZIPINDEX_PADDED_BED.out.versions) ch_versions = ch_versions.mix(GATK_BILT.out.versions) ch_versions = ch_versions.mix(GATK_ILT.out.versions) ch_versions = ch_versions.mix(CAT_CAT_BAIT.out.versions) @@ -190,10 +213,9 @@ workflow PREPARE_REFERENCES { mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fasta) ] mtshift_bwa_index = ch_bwa_mtshift // channel: [ val(meta), path(index) ] mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] - gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] - target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ] + target_bed = ch_target_bed_gz_tbi.collect() // channel: [ val(meta), path(bed), path(tbi) ] vcfanno_extra = ch_vcfanno_extra.ifEmpty([[]]) // channel: [ [path(vcf), path(tbi)] ] bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect().ifEmpty([[]]) // channel: [ path(intervals) ] target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ] diff --git a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf index 00a176ab..24e7ed73 100644 --- a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf @@ -302,6 +302,7 @@ def toolCitationText() { ] other_citation_text = [ "BCFtools (Danecek et al., 2021),", + "BEDTools (Quinlan & Hall, 2010),", "GATK (McKenna et al., 2010),", "MultiQC (Ewels et al. 2016),", params.skip_peddy ? "" : "Peddy (Pedersen & Quinlan, 2017),", @@ -432,7 +433,8 @@ def toolBibliographyText() { params.run_rtgvcfeval ? "
  • Cleary, J. G., Braithwaite, R., Gaastra, K., Hilbush, B. S., Inglis, S., Irvine, S. A., Jackson, A., Littin, R., Rathod, M., Ware, D., Zook, J. M., Trigg, L., & Vega, F. M. D. L. (2015). Comparing Variant Call Files for Performance Benchmarking of Next-Generation Sequencing Variant Calling Pipelines (p. 023754). bioRxiv. https://doi.org/10.1101/023754
  • " : "", "
  • Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., Marth, G., Abecasis, G., Durbin, R., & 1000 Genome Project Data Processing Subgroup. (2009). The Sequence Alignment/Map format and SAMtools. Bioinformatics, 25(16), 2078–2079. https://doi.org/10.1093/bioinformatics/btp352
  • ", (!params.skip_smncopynumbercaller && params.analysis_type.equals("wgs")) ? "
  • Chen, X., Sanchis-Juan, A., French, C. E., Connell, A. J., Delon, I., Kingsbury, Z., Chawla, A., Halpern, A. L., Taft, R. J., Bentley, D. R., Butchbach, M. E. R., Raymond, F. L., & Eberle, M. A. (2020). Spinal muscular atrophy diagnosis and carrier screening from genome sequencing data. Genetics in Medicine, 22(5), 945–953. https://doi.org/10.1038/s41436-020-0754-0
  • " : "", - "
  • Li, H. (2011). Tabix: Fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics, 27(5), 718–719. https://doi.org/10.1093/bioinformatics/btq671
  • " + "
  • Li, H. (2011). Tabix: Fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics, 27(5), 718–719. https://doi.org/10.1093/bioinformatics/btq671
  • ", + "
  • Quinlan, AR., Hall IM. (2010). BEDTools: a flexible suite of utilities for comparing genomic features. Bioinfomatics, 26(6), 841-842. https://doi.org/10.1093/bioinformatics/btq033
  • " ] def concat_text = align_text +