From b0cf83853f9ee3c30f70ee687875d474539dccde Mon Sep 17 00:00:00 2001 From: alxndrdiaz Date: Mon, 14 Oct 2024 21:53:46 -0600 Subject: [PATCH 01/31] add module parameters --- conf/modules.config | 28 ++++++++++++++++++++++++++++ nextflow.config | 8 ++++++++ nextflow_schema.json | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 2ed4477..8c4bd93 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -50,4 +50,32 @@ process { withName: MALT_BUILD { ext.args = { "--sequenceType ${params.malt_sequencetype}" } } + + withName: BRACKEN_BUILD { + ext.args = { "${params.bracken_build_params}" } + } + + withName: KRAKEN2_BUILD { + ext.args = { "${params.kraken2_build_params}" } + } + + withName: KRAKENUNIQ_BUILD { + ext.args = { "${params.krakenuniq_build_params}" } + } + + withName: CENTRIFUGE_BUILD { + ext.args = { "${params.centrifuge_build_params}" } + } + + withName: DIAMOND_MAKEDB { + ext.args = { "${params.diamond_build_params}" } + } + + withName: KAIJU_MKFMI { + ext.args = { "${params.kaiju_build_params}" } + } + + withName: MALT_BUILD { + ext.args = { "${params.malt_build_params}" } + } } diff --git a/nextflow.config b/nextflow.config index 6eac678..336e537 100644 --- a/nextflow.config +++ b/nextflow.config @@ -67,6 +67,14 @@ params { build_kraken2 = false kraken2_keepintermediate = false build_krakenuniq = false + bracken_build_params = null + kraken2_build_params = null + krakenuniq_build_params = null + centrifuge_build_params = null + diamond_build_params = null + kaiju_build_params = null + malt_build_params = null + } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 0553172..b5be852 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -155,6 +155,41 @@ "type": "boolean", "fa_icon": "fas fa-toggle-on", "description": "Turn on building of KrakenUniq database. Requires nucleotide FASTA file input." + }, + "bracken_build_params": { + "type": "string", + "description": "Specify parameters being given to bracken build", + "help_text": "See [Bracken documentation](https://github.com/jenniferlu717/Bracken?tab=readme-ov-file#step-1-generate-the-bracken-database-file-databasexmerskmer_distrib)" + }, + "kraken2_build_params": { + "type": "string", + "description": "Specify parameters being given to kraken2 build", + "help_text": "See [Kraken2 documentation](https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown#custom-databases)" + }, + "krakenuniq_build_params": { + "type": "string", + "description": "Specify parameters being given to krakenuniq-build", + "help_text": "See [KrakenUniq documentation](https://github.com/fbreitwieser/krakenuniq#database-building)" + }, + "centrifuge_build_params": { + "type": "string", + "description": "Specify parameters being given to centrifuge-build", + "help_text": "See [Centrifuge documentation](https://github.com/DaehwanKimLab/centrifuge/blob/master/MANUAL.markdown#database-download-and-index-building)" + }, + "diamond_build_params": { + "type": "string", + "description": "Specify parameters being given to diamond makedb", + "help_text": "See [diamond documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options)" + }, + "kaiju_build_params": { + "type": "string", + "description": "Specify parameters being given to kaiju-mkbwt", + "help_text": "See [Kaiju documentation](https://github.com/bioinformatics-centre/kaiju/tree/master#custom-database)" + }, + "malt_build_params": { + "type": "string", + "description": "Specify parameters being given to malt-build, exception: sequenceType", + "help_text": "See [The MALT index builder, in the MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/welcome.html)" } }, "fa_icon": "fas fa-database" From ff2c1a2b2cda96e94ae5d714e7c508de0fb2fd7d Mon Sep 17 00:00:00 2001 From: alxndrdiaz Date: Tue, 15 Oct 2024 10:37:18 -0600 Subject: [PATCH 02/31] linting --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 8c4bd93..8b03ecc 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -74,7 +74,7 @@ process { withName: KAIJU_MKFMI { ext.args = { "${params.kaiju_build_params}" } } - + withName: MALT_BUILD { ext.args = { "${params.malt_build_params}" } } From 55ff9fdbfa16739541e1e2d6f65ce6165b3fdabf Mon Sep 17 00:00:00 2001 From: alxndrdiaz Date: Tue, 15 Oct 2024 10:46:10 -0600 Subject: [PATCH 03/31] linting --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 8b03ecc..8863307 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -62,7 +62,7 @@ process { withName: KRAKENUNIQ_BUILD { ext.args = { "${params.krakenuniq_build_params}" } } - + withName: CENTRIFUGE_BUILD { ext.args = { "${params.centrifuge_build_params}" } } From 66133af6399d939036f86cd16db26fc519194a99 Mon Sep 17 00:00:00 2001 From: alxndrdiaz Date: Wed, 16 Oct 2024 12:01:45 -0600 Subject: [PATCH 04/31] empty string instead of null --- conf/modules.config | 7 ------- nextflow.config | 12 +++++------- nextflow_schema.json | 10 ---------- 3 files changed, 5 insertions(+), 24 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 8863307..f239716 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -59,10 +59,6 @@ process { ext.args = { "${params.kraken2_build_params}" } } - withName: KRAKENUNIQ_BUILD { - ext.args = { "${params.krakenuniq_build_params}" } - } - withName: CENTRIFUGE_BUILD { ext.args = { "${params.centrifuge_build_params}" } } @@ -75,7 +71,4 @@ process { ext.args = { "${params.kaiju_build_params}" } } - withName: MALT_BUILD { - ext.args = { "${params.malt_build_params}" } - } } diff --git a/nextflow.config b/nextflow.config index 336e537..32581e2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -67,13 +67,11 @@ params { build_kraken2 = false kraken2_keepintermediate = false build_krakenuniq = false - bracken_build_params = null - kraken2_build_params = null - krakenuniq_build_params = null - centrifuge_build_params = null - diamond_build_params = null - kaiju_build_params = null - malt_build_params = null + bracken_build_params = '' + kraken2_build_params = '' + centrifuge_build_params = '' + diamond_build_params = '' + kaiju_build_params = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index b5be852..98fe471 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -166,11 +166,6 @@ "description": "Specify parameters being given to kraken2 build", "help_text": "See [Kraken2 documentation](https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown#custom-databases)" }, - "krakenuniq_build_params": { - "type": "string", - "description": "Specify parameters being given to krakenuniq-build", - "help_text": "See [KrakenUniq documentation](https://github.com/fbreitwieser/krakenuniq#database-building)" - }, "centrifuge_build_params": { "type": "string", "description": "Specify parameters being given to centrifuge-build", @@ -185,11 +180,6 @@ "type": "string", "description": "Specify parameters being given to kaiju-mkbwt", "help_text": "See [Kaiju documentation](https://github.com/bioinformatics-centre/kaiju/tree/master#custom-database)" - }, - "malt_build_params": { - "type": "string", - "description": "Specify parameters being given to malt-build, exception: sequenceType", - "help_text": "See [The MALT index builder, in the MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/welcome.html)" } }, "fa_icon": "fas fa-database" From bcc8571aff8219e53fc4ebc9359904fd32e97999 Mon Sep 17 00:00:00 2001 From: alxndrdiaz Date: Sat, 16 Nov 2024 14:46:33 -0600 Subject: [PATCH 05/31] malt params update --- conf/modules.config | 9 +++++++-- nextflow.config | 2 +- nextflow_schema.json | 9 +++------ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f239716..58cee72 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -47,8 +47,13 @@ process { ] } - withName: MALT_BUILD { - ext.args = { "--sequenceType ${params.malt_sequencetype}" } + if(params.build_malt && (params.malt_build_params.contains('--sequenceType DNA') || params.malt_build_params.contains('--sequenceType Protein'))) { + withName: MALT_BUILD { + ext.args = { "${params.malt_build_params}" } + } + } + else { + println "Please specify the input sequence type using --sequenceType DNA or --sequenceType Protein" } withName: BRACKEN_BUILD { diff --git a/nextflow.config b/nextflow.config index 32581e2..91abfce 100644 --- a/nextflow.config +++ b/nextflow.config @@ -62,7 +62,7 @@ params { build_diamond = false build_kaiju = false build_malt = false - malt_sequencetype = "DNA" + malt_build_params = "--sequenceType DNA" build_centrifuge = false build_kraken2 = false kraken2_keepintermediate = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 98fe471..c7b8a34 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -133,13 +133,10 @@ "fa_icon": "fas fa-toggle-on", "description": "Turn on building of MALT database. Requires nucleotide FASTA file input." }, - "malt_sequencetype": { + "malt_build_params": { "type": "string", - "default": "DNA", - "description": "Specify type of input sequence being given to MALT", - "enum": ["DNA", "Protein"], - "help_text": "Use to specify whether the reference sequences are DNA or Protein sequences. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).\n\n> Modifies tool(s) parameter(s)\n> - malt-build: `--sequenceType` ", - "fa_icon": "fas fa-dna" + "description": "Specify parameters for malt-build", + "help_text": "At least the type of reference sequences should be specified. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).\n\n> Modifies tool(s) parameter(s)\n> - malt-build: `--sequenceType` " }, "build_kraken2": { "type": "boolean", From c808195a4795904e7f76044d8732a56f0b6f30e0 Mon Sep 17 00:00:00 2001 From: alxndrdiaz Date: Thu, 12 Dec 2024 07:23:30 -0600 Subject: [PATCH 06/31] fix malt warning --- workflows/createtaxdb.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index 51c2fe6..d2c33c4 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -183,7 +183,7 @@ workflow CREATETAXDB { ch_malt_mapdb = file(params.malt_mapdb) } - if (params.malt_sequencetype == 'Protein') { + if (params.malt_build_params.contains('--sequenceType Protein')) { ch_input_for_malt = ch_prepped_aa_fastas.map { meta, file -> file } } else { From 671329f9cd87553384c1eeb0228533327584a824 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 29 Oct 2024 13:03:39 +0100 Subject: [PATCH 07/31] Start adding gagnon --- modules.json | 5 ++ .../nf-core/ganon/buildcustom/environment.yml | 5 ++ modules/nf-core/ganon/buildcustom/main.nf | 60 +++++++++++++ modules/nf-core/ganon/buildcustom/meta.yml | 77 +++++++++++++++++ .../ganon/buildcustom/tests/main.nf.test | 69 +++++++++++++++ .../ganon/buildcustom/tests/main.nf.test.snap | 72 ++++++++++++++++ .../ganon/buildcustom/tests/nextflow.config | 5 ++ .../nf-core/ganon/buildcustom/tests/tags.yml | 2 + workflows/createtaxdb.nf | 85 ++++++++++++------- 9 files changed, 350 insertions(+), 30 deletions(-) create mode 100644 modules/nf-core/ganon/buildcustom/environment.yml create mode 100644 modules/nf-core/ganon/buildcustom/main.nf create mode 100644 modules/nf-core/ganon/buildcustom/meta.yml create mode 100644 modules/nf-core/ganon/buildcustom/tests/main.nf.test create mode 100644 modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap create mode 100644 modules/nf-core/ganon/buildcustom/tests/nextflow.config create mode 100644 modules/nf-core/ganon/buildcustom/tests/tags.yml diff --git a/modules.json b/modules.json index cd86b5a..f681212 100644 --- a/modules.json +++ b/modules.json @@ -30,6 +30,11 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "ganon/buildcustom": { + "branch": "master", + "git_sha": "58b4d685b1c93429917fec530f5d656aca3f2ef6", + "installed_by": ["modules"] + }, "gunzip": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/nf-core/ganon/buildcustom/environment.yml b/modules/nf-core/ganon/buildcustom/environment.yml new file mode 100644 index 0000000..0e073d5 --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ganon=2.1.0 diff --git a/modules/nf-core/ganon/buildcustom/main.nf b/modules/nf-core/ganon/buildcustom/main.nf new file mode 100644 index 0000000..212e49f --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/main.nf @@ -0,0 +1,60 @@ +process GANON_BUILDCUSTOM { + tag "${meta.id}" + label 'process_high' + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/ganon:2.1.0--py310hab1bfa5_1' + : 'biocontainers/ganon:2.1.0--py310hab1bfa5_1'}" + + input: + tuple val(meta), path(input) + val input_type + path taxonomy_files + path genome_size_files + + output: + tuple val(meta), path("*.{hibf,ibf,tax}"), emit: db + tuple val(meta), path("*.info.tsv"), emit: info, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_cmd = input_type == 'fasta' ? "--input ${input}" : input_type == 'tsv' ? "--input-file ${input}" : error("Invalid input type: ${input_type}. Options: fasta, tsv") + def taxonomy_args = taxonomy_files ? "--taxonomy-files ${taxonomy_files}" : "" + def genome_size_args = genome_size_files ? "--genome-size-files ${genome_size_files}" : "" + """ + ganon \\ + build-custom \\ + --threads ${task.cpus} \\ + --input ${input} \\ + --db-prefix ${prefix} \\ + ${taxonomy_args} \\ + ${genome_size_args} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ganon: \$(echo \$(ganon --version 2>1) | sed 's/.*ganon //g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def taxonomy_args = taxonomy_files ? "--taxonomy-files ${taxonomy_files}" : "" + def genome_size_args = genome_size_files ? "--genome-size-files ${genome_size_files}" : "" + """ + touch ${prefix}.hibf + touch ${prefix}.tax + touch ${prefix}.info.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ganon: \$(echo \$(ganon --version 2>1) | sed 's/.*ganon //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ganon/buildcustom/meta.yml b/modules/nf-core/ganon/buildcustom/meta.yml new file mode 100644 index 0000000..39bc073 --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/meta.yml @@ -0,0 +1,77 @@ +name: "ganon_buildcustom" +description: Build ganon database using custom reference sequences. +keywords: + - ganon + - metagenomics + - profiling + - taxonomy + - k-mer + - database +tools: + - "ganon": + description: "ganon classifies short DNA sequences against large sets of genomic + reference sequences efficiently" + homepage: "https://github.com/pirovc/ganon" + documentation: "https://github.com/pirovc/ganon" + tool_dev_url: "https://github.com/pirovc/ganon" + doi: "10.1093/bioinformatics/btaa458" + licence: ["MIT"] + identifier: biotools:ganon +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: | + List of input FASTA files, or a directory containing input FASTA files. + Note you must supply --input-extension via ext.args if FASTA extensions do not end in the default `fna.gz`. + pattern: "*" + - - input_type: + type: string + description: | + Specify whether the file(s) given to the input channel are in FASTA format (and will be supplied as --input) + or in TSV format (and will be supplied as --input-file). For TSV format, the 'file' column should be just the + file name so that it's local to the working directory of this process. + pattern: "fasta|tsv" + - - taxonomy_files: + type: file + description: Pre-downloaded taxonomy files of input sequences. See ganon docs + for formats + - - genome_size_files: + type: file + description: Pre-downloaded NCBI or GTDB genome size files of input sequences. + See ganon docs for formats + pattern: "{species_genome_size.txt.gz,*_metadata.tar.gz}" +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{hibf,ibf,tax}": + type: file + description: ganon database files + pattern: "*.{ibf,tax}" + - info: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.info.tsv": + type: file + description: Copy of target info generated. Can be used for updating database. + pattern: "*info.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/ganon/buildcustom/tests/main.nf.test b/modules/nf-core/ganon/buildcustom/tests/main.nf.test new file mode 100644 index 0000000..8fa4227 --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process GANON_BUILDCUSTOM" + script "../main.nf" + process "GANON_BUILDCUSTOM" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "ganon" + tag "ganon/buildcustom" + + test("sarscov2 - genome fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + ] + input[1] = 'fasta' + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db.get(0).get(1).findAll { file(it).name != "test.tax" }, + process.out.versions + ).match() + }, + { assert file(process.out.db.get(0).get(1).find { file(it).name == "test.tax" }).text.contains("MT192765.1") }, + ) + } + } + + test("sarscov2 - genome fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + ] + input[1] = 'fasta' + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap b/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap new file mode 100644 index 0000000..2c3243f --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - genome fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test.hibf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tax:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.info.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e" + ], + "db": [ + [ + { + "id": "test" + }, + [ + "test.hibf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tax:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "info": [ + [ + { + "id": "test" + }, + "test.info.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-07T17:00:22.98042261" + }, + "sarscov2 - genome fasta": { + "content": [ + [ + "test.hibf:md5,d10fe6fc6d198696bc15ca85a1459614" + ], + [ + "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-07T19:03:25.060306554" + } +} \ No newline at end of file diff --git a/modules/nf-core/ganon/buildcustom/tests/nextflow.config b/modules/nf-core/ganon/buildcustom/tests/nextflow.config new file mode 100644 index 0000000..a12988e --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GANON_BUILDCUSTOM { + ext.args = "--input-target sequence" + } +} diff --git a/modules/nf-core/ganon/buildcustom/tests/tags.yml b/modules/nf-core/ganon/buildcustom/tests/tags.yml new file mode 100644 index 0000000..46c2aa4 --- /dev/null +++ b/modules/nf-core/ganon/buildcustom/tests/tags.yml @@ -0,0 +1,2 @@ +ganon/buildcustom: + - "modules/nf-core/ganon/buildcustom/**" diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index d2c33c4..8c4cdf1 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -4,11 +4,11 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createtaxdb_pipeline' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createtaxdb_pipeline' // Preprocessing include { GUNZIP as GUNZIP_DNA } from '../modules/nf-core/gunzip/main' @@ -20,6 +20,7 @@ include { CAT_CAT as CAT_CAT_AA } from '../modules/nf-core/cat/cat/ // Database building (with specific auxiliary modules) include { CENTRIFUGE_BUILD } from '../modules/nf-core/centrifuge/build/main' include { DIAMOND_MAKEDB } from '../modules/nf-core/diamond/makedb/main' +include { GANON_BUILDCUSTOM } from '../modules/nf-core/ganon/buildcustom/main' include { KAIJU_MKFMI } from '../modules/nf-core/kaiju/mkfmi/main' include { KRAKENUNIQ_BUILD } from '../modules/nf-core/krakenuniq/build/main' include { UNZIP } from '../modules/nf-core/unzip/main' @@ -75,8 +76,9 @@ workflow CREATETAXDB { ch_versions = ch_versions.mix(GUNZIP_DNA.out.versions.first()) // Place in single file - ch_singleref_for_dna = CAT_CAT_DNA(ch_prepped_dna_fastas) + CAT_CAT_DNA(ch_prepped_dna_fastas) ch_versions = ch_versions.mix(CAT_CAT_DNA.out.versions.first()) + ch_singleref_for_dna = CAT_CAT_DNA.out } // TODO: Possibly need to have a modification step to get header correct to actually run with kaiju... @@ -101,7 +103,8 @@ workflow CREATETAXDB { ch_prepped_aa_fastas = PIGZ_COMPRESS_AA.out.archive.mix(ch_aa_for_zipping.zipped).groupTuple() //ch_versions = ch_versions.mix( PIGZ_COMPRESS_AA.versions.first() ) - ch_singleref_for_aa = CAT_CAT_AA(ch_prepped_aa_fastas) + CAT_CAT_AA(ch_prepped_aa_fastas) + ch_singleref_for_aa = CAT_CAT_AA.out_file ch_versions = ch_versions.mix(CAT_CAT_AA.out.versions.first()) } @@ -114,7 +117,7 @@ workflow CREATETAXDB { // Module: Run CENTRIFUGE/BUILD if (params.build_centrifuge) { - CENTRIFUGE_BUILD(CAT_CAT_DNA.out.file_out, ch_nucl2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp, []) + CENTRIFUGE_BUILD(ch_singleref_for_dna, ch_nucl2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp, []) ch_versions = ch_versions.mix(CENTRIFUGE_BUILD.out.versions.first()) ch_centrifuge_output = CENTRIFUGE_BUILD.out.cf } @@ -125,7 +128,7 @@ workflow CREATETAXDB { // MODULE: Run DIAMOND/MAKEDB if (params.build_diamond) { - DIAMOND_MAKEDB(CAT_CAT_AA.out.file_out, ch_prot2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp) + DIAMOND_MAKEDB(ch_singleref_for_aa, ch_prot2taxid, ch_taxonomy_nodesdmp, ch_taxonomy_namesdmp) ch_versions = ch_versions.mix(DIAMOND_MAKEDB.out.versions.first()) ch_diamond_output = DIAMOND_MAKEDB.out.db } @@ -133,10 +136,27 @@ workflow CREATETAXDB { ch_diamond_output = Channel.empty() } + if (params.build_ganon) { + ch_ganon_input_tsv = ch_prepped_dna_fastas + .map { meta, file -> + [meta, file] + [file.name(), meta.id, meta.taxid] + } + .map { it.values().join("\t") } + .collectFile { + name: "ganon_input.tsv" + newLine: true + } + + GANON_BUILDCUSTOM(ch_ganon_input_tsv, 'tsv', tax_file, []) + ch_versions = ch_versions.mix(GANON_BUILDCUSTOM.out.versions.first()) + ch_ganon_output = GANON_BUILDCUSTOM.out.db + } + // MODULE: Run KAIJU/MKFMI if (params.build_kaiju) { - KAIJU_MKFMI(CAT_CAT_AA.out.file_out) + KAIJU_MKFMI(ch_singleref_for_aa) ch_versions = ch_versions.mix(KAIJU_MKFMI.out.versions.first()) ch_kaiju_output = KAIJU_MKFMI.out.fmi } @@ -149,7 +169,7 @@ workflow CREATETAXDB { // Condition is inverted because subworkflow asks if you want to 'clean' (true) or not, but pipeline says to 'keep' if (params.build_kraken2 || params.build_bracken) { def k2_keepintermediates = params.kraken2_keepintermediate || params.build_bracken ? false : true - FASTA_BUILD_ADD_KRAKEN2_BRACKEN(CAT_CAT_DNA.out.file_out, ch_taxonomy_namesdmp, ch_taxonomy_nodesdmp, ch_accession2taxid, k2_keepintermediates, params.build_bracken) + FASTA_BUILD_ADD_KRAKEN2_BRACKEN(ch_singleref_for_dna, ch_taxonomy_namesdmp, ch_taxonomy_nodesdmp, ch_accession2taxid, k2_keepintermediates, params.build_bracken) ch_versions = ch_versions.mix(FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.versions.first()) ch_kraken2_bracken_output = FASTA_BUILD_ADD_KRAKEN2_BRACKEN.out.db } @@ -214,25 +234,31 @@ workflow CREATETAXDB { // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath( - "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? - Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() - - summary_params = paramsSummaryMap( - workflow, parameters_schema: "nextflow_schema.json") + ch_multiqc_config = Channel.fromPath( + "${projectDir}/assets/multiqc_config.yml", + checkIfExists: true + ) + ch_multiqc_custom_config = params.multiqc_config + ? Channel.fromPath(params.multiqc_config, checkIfExists: true) + : Channel.empty() + ch_multiqc_logo = params.multiqc_logo + ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) + : Channel.empty() + + summary_params = paramsSummaryMap( + workflow, + parameters_schema: "nextflow_schema.json" + ) ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) ch_multiqc_files = ch_multiqc_files.mix( - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? - file(params.multiqc_methods_description, checkIfExists: true) : - file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( - methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') + ) + ch_multiqc_custom_methods_description = params.multiqc_methods_description + ? file(params.multiqc_methods_description, checkIfExists: true) + : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description) + ) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix( @@ -250,14 +276,13 @@ workflow CREATETAXDB { [], [] ) - multiqc_report = MULTIQC.out.report.toList() emit: - multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html centrifuge_database = ch_centrifuge_output diamond_database = ch_diamond_output + ganon_database = ch_ganon_output kaiju_database = ch_kaiju_output kraken2_bracken_database = ch_kraken2_bracken_output krakenuniq_database = ch_krakenuniq_output From 52473c48c649b6b31cc0d9df46b5cd9a870a08ee Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 9 Nov 2024 16:24:17 +0100 Subject: [PATCH 08/31] Continue work --- conf/modules.config | 4 +++ modules/nf-core/ganon/buildcustom/main.nf | 2 +- nextflow.config | 3 ++- nextflow_schema.json | 23 ++++++++++------- workflows/createtaxdb.nf | 30 ++++++++++++++--------- 5 files changed, 40 insertions(+), 22 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 58cee72..274ca3c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -56,6 +56,10 @@ process { println "Please specify the input sequence type using --sequenceType DNA or --sequenceType Protein" } + withName: GANON_BUILD { + ext.args = { "--verbose" } + } + withName: BRACKEN_BUILD { ext.args = { "${params.bracken_build_params}" } } diff --git a/modules/nf-core/ganon/buildcustom/main.nf b/modules/nf-core/ganon/buildcustom/main.nf index 212e49f..396f8e1 100644 --- a/modules/nf-core/ganon/buildcustom/main.nf +++ b/modules/nf-core/ganon/buildcustom/main.nf @@ -30,7 +30,7 @@ process GANON_BUILDCUSTOM { ganon \\ build-custom \\ --threads ${task.cpus} \\ - --input ${input} \\ + $input_cmd \\ --db-prefix ${prefix} \\ ${taxonomy_args} \\ ${genome_size_args} \\ diff --git a/nextflow.config b/nextflow.config index 91abfce..883603e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -60,6 +60,7 @@ params { // tool specific options build_bracken = false build_diamond = false + build_ganon = false build_kaiju = false build_malt = false malt_build_params = "--sequenceType DNA" @@ -288,4 +289,4 @@ validation { } // Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +includeConfig 'conf/modules.config' \ No newline at end of file diff --git a/nextflow_schema.json b/nextflow_schema.json index c7b8a34..964ca2f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -123,20 +123,20 @@ "fa_icon": "fas fa-toggle-on", "description": "Turn on building of DIAMOND database. Requires amino-acid FASTA file input." }, - "build_kaiju": { - "type": "boolean", - "description": "Turn on building of Kaiju database. Requires amino-acid FASTA file input.", - "fa_icon": "fas fa-toggle-on" - }, "build_malt": { "type": "boolean", "fa_icon": "fas fa-toggle-on", "description": "Turn on building of MALT database. Requires nucleotide FASTA file input." }, - "malt_build_params": { - "type": "string", - "description": "Specify parameters for malt-build", - "help_text": "At least the type of reference sequences should be specified. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).\n\n> Modifies tool(s) parameter(s)\n> - malt-build: `--sequenceType` " + "build_ganon": { + "type": "boolean", + "description": "Turn on building of ganon database. Requires nucleotide FASTA file input.", + "fa_icon": "fas fa-toggle-on" + }, + "build_kaiju": { + "type": "boolean", + "description": "Turn on building of Kaiju database. Requires amino-acid FASTA file input.", + "fa_icon": "fas fa-toggle-on" }, "build_kraken2": { "type": "boolean", @@ -177,6 +177,11 @@ "type": "string", "description": "Specify parameters being given to kaiju-mkbwt", "help_text": "See [Kaiju documentation](https://github.com/bioinformatics-centre/kaiju/tree/master#custom-database)" + }, + "malt_build_params": { + "type": "string", + "description": "Specify parameters for malt-build", + "help_text": "At least the type of reference sequences should be specified. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).\n\n> Modifies tool(s) parameter(s)\n> - malt-build: `--sequenceType` " } }, "fa_icon": "fas fa-database" diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index 8c4cdf1..3852991 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -57,11 +57,11 @@ workflow CREATETAXDB { // PREPARE: Prepare input for single file inputs modules - if ([params.build_malt, params.build_centrifuge, params.build_kraken2, params.build_bracken, params.build_krakenuniq].any()) { + if ([params.build_malt, params.build_centrifuge, params.build_kraken2, params.build_bracken, params.build_krakenuniq, params.build_ganon].any()) { // Pull just DNA sequences ch_dna_refs_for_singleref = ch_samplesheet - .map { meta, fasta_dna, fasta_aa -> [[id: params.dbname], fasta_dna] } + .map { meta, fasta_dna, fasta_aa -> [meta, fasta_dna] } .filter { meta, fasta_dna -> fasta_dna } @@ -72,7 +72,7 @@ workflow CREATETAXDB { } GUNZIP_DNA(ch_dna_for_unzipping.zipped) - ch_prepped_dna_fastas = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped).groupTuple() + ch_prepped_dna_fastas = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped).tap { ch_prepped_dna_fastas_ungrouped }.map { meta, fasta -> [[id: params.dbname], fasta] }.groupTuple() ch_versions = ch_versions.mix(GUNZIP_DNA.out.versions.first()) // Place in single file @@ -137,18 +137,26 @@ workflow CREATETAXDB { } if (params.build_ganon) { - ch_ganon_input_tsv = ch_prepped_dna_fastas - .map { meta, file -> - [meta, file] - [file.name(), meta.id, meta.taxid] + ch_ganon_input_tsv = ch_prepped_dna_fastas_ungrouped + .map { meta, fasta -> + // I tried with .name() but it kept giving error of `Unknown method invocation `name` on XPath type... not sure why + def fasta_name = fasta.toString().split('/').last() + [fasta_name, meta.id, meta.taxid] } - .map { it.values().join("\t") } - .collectFile { - name: "ganon_input.tsv" + .map { it.join("\t") } + .collectFile ( + name: "ganon_fasta_input.tsv", newLine: true + ) + .map{ + [[id: params.dbname], it] } - GANON_BUILDCUSTOM(ch_ganon_input_tsv, 'tsv', tax_file, []) + // Nodes must come first + ch_ganon_tax_files = Channel.fromPath(ch_taxonomy_nodesdmp).combine(Channel.fromPath(ch_taxonomy_namesdmp)) + + // TODO Fix module so `input_cmd` is used and add test! + GANON_BUILDCUSTOM(ch_ganon_input_tsv, 'tsv', ch_ganon_tax_files, []) ch_versions = ch_versions.mix(GANON_BUILDCUSTOM.out.versions.first()) ch_ganon_output = GANON_BUILDCUSTOM.out.db } From 1bed5477a481625289a73d51cbea8f962f919af8 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 9 Nov 2024 16:24:50 +0100 Subject: [PATCH 09/31] Revert manual change to ganonbuild module (to upstreeam the fix) --- modules/nf-core/ganon/buildcustom/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/ganon/buildcustom/main.nf b/modules/nf-core/ganon/buildcustom/main.nf index 396f8e1..212e49f 100644 --- a/modules/nf-core/ganon/buildcustom/main.nf +++ b/modules/nf-core/ganon/buildcustom/main.nf @@ -30,7 +30,7 @@ process GANON_BUILDCUSTOM { ganon \\ build-custom \\ --threads ${task.cpus} \\ - $input_cmd \\ + --input ${input} \\ --db-prefix ${prefix} \\ ${taxonomy_args} \\ ${genome_size_args} \\ From 08aa4c18c237c27cc2d60133ab29dbcb2668e763 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 28 Nov 2024 10:13:18 +0100 Subject: [PATCH 10/31] Add working ganon-build (missing docs) --- modules.json | 2 +- modules/nf-core/ganon/buildcustom/main.nf | 6 +- modules/nf-core/ganon/buildcustom/meta.yml | 12 ++-- .../ganon/buildcustom/tests/main.nf.test | 55 +++++++++++++++++-- .../ganon/buildcustom/tests/main.nf.test.snap | 47 +++++++++++----- .../ganon/buildcustom/tests/nextflow.config | 2 +- modules/nf-core/malt/build/main.nf | 18 +++--- nextflow.config | 2 +- workflows/createtaxdb.nf | 17 +++--- 9 files changed, 112 insertions(+), 49 deletions(-) diff --git a/modules.json b/modules.json index f681212..ebd0e4f 100644 --- a/modules.json +++ b/modules.json @@ -32,7 +32,7 @@ }, "ganon/buildcustom": { "branch": "master", - "git_sha": "58b4d685b1c93429917fec530f5d656aca3f2ef6", + "git_sha": "4265ef4b3b9af8877671715b081f102041c64cfd", "installed_by": ["modules"] }, "gunzip": { diff --git a/modules/nf-core/ganon/buildcustom/main.nf b/modules/nf-core/ganon/buildcustom/main.nf index 212e49f..41ffd68 100644 --- a/modules/nf-core/ganon/buildcustom/main.nf +++ b/modules/nf-core/ganon/buildcustom/main.nf @@ -8,7 +8,7 @@ process GANON_BUILDCUSTOM { input: tuple val(meta), path(input) - val input_type + path input_tsv path taxonomy_files path genome_size_files @@ -23,14 +23,14 @@ process GANON_BUILDCUSTOM { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input_cmd = input_type == 'fasta' ? "--input ${input}" : input_type == 'tsv' ? "--input-file ${input}" : error("Invalid input type: ${input_type}. Options: fasta, tsv") + def input_cmd = input_tsv ? "--input-file ${input_tsv}" : "--input ${input}" def taxonomy_args = taxonomy_files ? "--taxonomy-files ${taxonomy_files}" : "" def genome_size_args = genome_size_files ? "--genome-size-files ${genome_size_files}" : "" """ ganon \\ build-custom \\ --threads ${task.cpus} \\ - --input ${input} \\ + ${input_cmd} \\ --db-prefix ${prefix} \\ ${taxonomy_args} \\ ${genome_size_args} \\ diff --git a/modules/nf-core/ganon/buildcustom/meta.yml b/modules/nf-core/ganon/buildcustom/meta.yml index 39bc073..5c481ec 100644 --- a/modules/nf-core/ganon/buildcustom/meta.yml +++ b/modules/nf-core/ganon/buildcustom/meta.yml @@ -28,14 +28,14 @@ input: description: | List of input FASTA files, or a directory containing input FASTA files. Note you must supply --input-extension via ext.args if FASTA extensions do not end in the default `fna.gz`. - pattern: "*" - - - input_type: + pattern: "*.{fasta,fna,fa,fa,fasta.gz,fna.gz,fa.gz,fa.gz}" + - - input_tsv: type: string description: | - Specify whether the file(s) given to the input channel are in FASTA format (and will be supplied as --input) - or in TSV format (and will be supplied as --input-file). For TSV format, the 'file' column should be just the - file name so that it's local to the working directory of this process. - pattern: "fasta|tsv" + (Optional) Specify an TSV file containing the paths, and relevant metadata to the input FASTA files to use the `--input-file` option. + The 'file' column should be just the file name of each FASTA file (so that it's local to the working directory of the process). + See ganon documentation for more more information on the other columns. + pattern: "*tsv" - - taxonomy_files: type: file description: Pre-downloaded taxonomy files of input sequences. See ganon docs diff --git a/modules/nf-core/ganon/buildcustom/tests/main.nf.test b/modules/nf-core/ganon/buildcustom/tests/main.nf.test index 8fa4227..9fe3948 100644 --- a/modules/nf-core/ganon/buildcustom/tests/main.nf.test +++ b/modules/nf-core/ganon/buildcustom/tests/main.nf.test @@ -10,16 +10,19 @@ nextflow_process { tag "ganon" tag "ganon/buildcustom" - test("sarscov2 - genome fasta") { + test("sarscov2 - genome - fasta") { when { + params { + module_args = '--input-target sequence' + } process { """ input[0] = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), ] - input[1] = 'fasta' + input[1] = [] input[2] = [] input[3] = [] """ @@ -31,10 +34,49 @@ nextflow_process { { assert process.success }, { assert snapshot( process.out.db.get(0).get(1).findAll { file(it).name != "test.tax" }, - process.out.versions + process.out.versions, + file(process.out.db.get(0).get(1).find { file(it).name == "test.tax" }).text.contains("MT192765.1") ).match() }, - { assert file(process.out.db.get(0).get(1).find { file(it).name == "test.tax" }).text.contains("MT192765.1") }, + ) + } + } + +test("sarscov2 - genome - tsv") { + + when { + params { + module_args = '--input-target file' + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + ] + input[1] = ch_ganon_input_tsv = Channel.of(["genome.fasta", "Severe_acute_respiratory_syndrome_coronavirus_2", "2697049"]). + map { it.join("\t") } + .collectFile ( + name: "ganon_fasta_input.tsv", + newLine: true + ) + input[2] = [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) + ] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db.get(0).get(1), + process.out.versions + ).match() + } ) } } @@ -44,13 +86,16 @@ nextflow_process { options "-stub" when { + params { + module_args = '' + } process { """ input[0] = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), ] - input[1] = 'fasta' + input[1] = [] input[2] = [] input[3] = [] """ diff --git a/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap b/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap index 2c3243f..e27a749 100644 --- a/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap +++ b/modules/nf-core/ganon/buildcustom/tests/main.nf.test.snap @@ -1,4 +1,36 @@ { + "sarscov2 - genome - tsv": { + "content": [ + [ + "test.hibf:md5,9edfe4c3873d621a88ebcad438dca42c", + "test.tax:md5,e15400a1e43cce61545834695da46465" + ], + [ + "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-13T13:37:14.320278404" + }, + "sarscov2 - genome - fasta": { + "content": [ + [ + "test.hibf:md5,d10fe6fc6d198696bc15ca85a1459614" + ], + [ + "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e" + ], + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-13T13:36:52.317157496" + }, "sarscov2 - genome fasta - stub": { "content": [ { @@ -53,20 +85,5 @@ "nextflow": "24.04.4" }, "timestamp": "2024-10-07T17:00:22.98042261" - }, - "sarscov2 - genome fasta": { - "content": [ - [ - "test.hibf:md5,d10fe6fc6d198696bc15ca85a1459614" - ], - [ - "versions.yml:md5,9c73293ae36914c6ce3718ad6728ad9e" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-10-07T19:03:25.060306554" } } \ No newline at end of file diff --git a/modules/nf-core/ganon/buildcustom/tests/nextflow.config b/modules/nf-core/ganon/buildcustom/tests/nextflow.config index a12988e..15c20b6 100644 --- a/modules/nf-core/ganon/buildcustom/tests/nextflow.config +++ b/modules/nf-core/ganon/buildcustom/tests/nextflow.config @@ -1,5 +1,5 @@ process { withName: GANON_BUILDCUSTOM { - ext.args = "--input-target sequence" + ext.args = params.module_args } } diff --git a/modules/nf-core/malt/build/main.nf b/modules/nf-core/malt/build/main.nf index 6f05e9e..710c82e 100644 --- a/modules/nf-core/malt/build/main.nf +++ b/modules/nf-core/malt/build/main.nf @@ -1,11 +1,9 @@ process MALT_BUILD { - label 'process_high' - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0' : - 'biocontainers/malt:0.61--hdfd78af_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0' + : 'biocontainers/malt:0.61--hdfd78af_0'}" input: path fastas @@ -13,8 +11,8 @@ process MALT_BUILD { path mapping_db output: - path "malt_index/" , emit: index - path "versions.yml" , emit: versions + path "malt_index/", emit: index + path "versions.yml", emit: versions path "malt-build.log", emit: log when: @@ -28,10 +26,10 @@ process MALT_BUILD { malt-build \\ -v \\ --input ${fastas.join(' ')} \\ - $igff \\ + ${igff} \\ -d 'malt_index/' \\ - -t $task.cpus \\ - $args \\ + -t ${task.cpus} \\ + ${args} \\ -mdb ${mapping_db}/*.db |&tee malt-build.log cat <<-END_VERSIONS > versions.yml diff --git a/nextflow.config b/nextflow.config index 883603e..4c4adb4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -223,7 +223,7 @@ set -C # No clobber - prevent output redirection from overwriting files. // Disable process selector warnings by default. Use debug profile to enable warnings. nextflow.enable.configProcessNamesValidation = false -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index 3852991..56fa77a 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -61,8 +61,8 @@ workflow CREATETAXDB { // Pull just DNA sequences ch_dna_refs_for_singleref = ch_samplesheet - .map { meta, fasta_dna, fasta_aa -> [meta, fasta_dna] } - .filter { meta, fasta_dna -> + .map { meta, fasta_dna, _fasta_aa -> [meta, fasta_dna] } + .filter { _meta, fasta_dna -> fasta_dna } @@ -72,7 +72,8 @@ workflow CREATETAXDB { } GUNZIP_DNA(ch_dna_for_unzipping.zipped) - ch_prepped_dna_fastas = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped).tap { ch_prepped_dna_fastas_ungrouped }.map { meta, fasta -> [[id: params.dbname], fasta] }.groupTuple() + ch_prepped_dna_fastas_ungrouped = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped) + ch_prepped_dna_fastas = ch_prepped_dna_fastas_ungrouped.map { meta, fasta -> [[id: params.dbname], fasta] }.groupTuple() ch_versions = ch_versions.mix(GUNZIP_DNA.out.versions.first()) // Place in single file @@ -137,6 +138,9 @@ workflow CREATETAXDB { } if (params.build_ganon) { + + ch_ganon_input_fastas = ch_prepped_dna_fastas_ungrouped.collect() + ch_ganon_input_tsv = ch_prepped_dna_fastas_ungrouped .map { meta, fasta -> // I tried with .name() but it kept giving error of `Unknown method invocation `name` on XPath type... not sure why @@ -144,19 +148,18 @@ workflow CREATETAXDB { [fasta_name, meta.id, meta.taxid] } .map { it.join("\t") } - .collectFile ( + .collectFile( name: "ganon_fasta_input.tsv", newLine: true ) - .map{ + .map { [[id: params.dbname], it] } // Nodes must come first ch_ganon_tax_files = Channel.fromPath(ch_taxonomy_nodesdmp).combine(Channel.fromPath(ch_taxonomy_namesdmp)) - // TODO Fix module so `input_cmd` is used and add test! - GANON_BUILDCUSTOM(ch_ganon_input_tsv, 'tsv', ch_ganon_tax_files, []) + GANON_BUILDCUSTOM(ch_prepped_dna_fastas, ch_ganon_input_tsv.map { _meta, tsv -> tsv }, ch_ganon_tax_files, []) ch_versions = ch_versions.mix(GANON_BUILDCUSTOM.out.versions.first()) ch_ganon_output = GANON_BUILDCUSTOM.out.db } From 784a38da988accb533d0ba28b08bdae4f2f997b3 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 28 Nov 2024 12:13:25 +0100 Subject: [PATCH 11/31] Add better input validation tests --- conf/modules.config | 4 +- nextflow.config | 215 +++++++++--------- .../utils_nfcore_createtaxdb_pipeline/main.nf | 27 ++- workflows/createtaxdb.nf | 4 +- 4 files changed, 139 insertions(+), 111 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 274ca3c..c85a699 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,8 +18,8 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + withName: MULTIQC { + ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, diff --git a/nextflow.config b/nextflow.config index 4c4adb4..4124e98 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,14 +11,14 @@ params { // TODO nf-core: Specify your pipeline's command line flags // Input options - input = null + input = null // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options outdir = null @@ -35,27 +35,27 @@ params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' // Config options - config_profile_name = null - config_profile_description = null + config_profile_name = null + config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Schema validation default options - validate_params = true + validate_params = true // General parameters - dbname = null - save_concatenated_fastas = false + dbname = null + save_concatenated_fastas = false - accession2taxid = null - prot2taxid = null - nucl2taxid = null - nodesdmp = null - namesdmp = null - malt_mapdb = null + accession2taxid = null + prot2taxid = null + nucl2taxid = null + nodesdmp = null + namesdmp = null + malt_mapdb = null // tool specific options build_bracken = false @@ -81,90 +81,90 @@ includeConfig 'conf/base.config' profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - conda.channels = ['conda-forge', 'bioconda'] - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false } wave { apptainer.ociAutoPull = true @@ -174,14 +174,19 @@ profiles { wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB + } + test { + includeConfig 'conf/test.config' + } + test_full { + includeConfig 'conf/test_full.config' + } + test_nothing { + includeConfig 'conf/test_nothing.config' } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } - test_nothing { includeConfig 'conf/test_nothing.config' } - } // Load nf-core custom profiles from different Institutions @@ -193,10 +198,10 @@ includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${pa // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled // Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' // Export these variables to prevent local Python/R libraries from conflicting with those in the container @@ -223,7 +228,7 @@ set -C # No clobber - prevent output redirection from overwriting files. // Disable process selector warnings by default. Use debug profile to enable warnings. nextflow.enable.configProcessNamesValidation = false -trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') timeline { enabled = true file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" @@ -254,17 +259,17 @@ manifest { // Nextflow plugins plugins { - id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.1.1' } validation { defaultIgnoreParams = ["genomes"] help { - enabled = true - command = "nextflow run $manifest.name -profile --input samplesheet.csv --outdir " - fullParameter = "help_full" + enabled = true + command = "nextflow run ${manifest.name} -profile --input samplesheet.csv --outdir " + fullParameter = "help_full" showHiddenParameter = "show_hidden" - beforeText = """ + beforeText = """ -\033[2m----------------------------------------------------\033[0m- \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m \033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m @@ -274,7 +279,7 @@ validation { \033[0;35m ${manifest.name} ${manifest.version}\033[0m -\033[2m----------------------------------------------------\033[0m- """ - afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} + afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/', '')}" }.join("\n")}${manifest.doi ? "\n" : ""} * The nf-core framework https://doi.org/10.1038/s41587-020-0439-x @@ -284,7 +289,7 @@ validation { } summary { beforeText = validation.help.beforeText - afterText = validation.help.afterText + afterText = validation.help.afterText } } diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf index c86caa0..589b799 100644 --- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf @@ -130,10 +130,35 @@ workflow PIPELINE_COMPLETION { // def validateInputParameters() { - // Validate DIAMOND parameter combinations + // Validate CENTRIFUGE auxiliary file combinations + if (params.build_centrifuge && [!params.nucl2taxid, !params.nodesdmp, !params.namesdmp].any()) { + error('[nf-core/createtaxdb] Supplied --build_centrifuge, but missing at least one of: --nucl2taxid, --nodesdmp, or --namesdmp (all are mandatory for CENTRIFUGE)') + } + + // Validate DIAMOND auxiliary file combinations if (params.build_diamond && [!params.prot2taxid, !params.nodesdmp, !params.namesdmp].any()) { error('[nf-core/createtaxdb] Supplied --build_diamond, but missing at least one of: --prot2taxid, --nodesdmp, or --namesdmp (all are mandatory for DIAMOND)') } + + // Validate GANON parameter combinations + if (params.build_ganon && [!params.nodesdmp, !params.namesdmp].any()) { + error('[nf-core/createtaxdb] Supplied --build_ganon, but missing at least one of: --nodesdmp, or --namesdmp (all are mandatory for GANON)') + } + + // Validate BRACKEN/KRAKEN parameter combinations + if ((params.build_bracken || params.build_kraken2) && [!params.accession2taxid, !params.nodesdmp, !params.namesdmp].any()) { + error('[nf-core/createtaxdb] Supplied --build_kraken2 or --bracken, but missing at least one of: --accession2taxid, --nodesdmp, or --namesdmp (all are mandatory for BRACKEN/KRAKEN2)') + } + + // Validate KRAKENUNIQ auxiliary file combinations + if (params.build_krakenuniq && [!params.nucl2taxid, !params.nodesdmp, !params.namesdmp].any()) { + error('[nf-core/createtaxdb] Supplied --build_krakenuniq, but missing at least one of: --nucl2taxid, --nodesdmp, or --namesdmp (all are mandatory for KRAKENUNIQ)') + } + + // Validate MALT auxiliary file combinations + if (params.build_krakenuniq && [!params.malt_mapdb].any()) { + error('[nf-core/createtaxdb] Supplied --build_malt, but missing: --malt_mapdb (all are mandatory for MALT)') + } } // diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index 56fa77a..10ea9aa 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -139,8 +139,6 @@ workflow CREATETAXDB { if (params.build_ganon) { - ch_ganon_input_fastas = ch_prepped_dna_fastas_ungrouped.collect() - ch_ganon_input_tsv = ch_prepped_dna_fastas_ungrouped .map { meta, fasta -> // I tried with .name() but it kept giving error of `Unknown method invocation `name` on XPath type... not sure why @@ -192,7 +190,7 @@ workflow CREATETAXDB { if (params.build_krakenuniq) { ch_taxdmpfiles_for_krakenuniq = Channel.of(ch_taxonomy_namesdmp).combine(Channel.of(ch_taxonomy_nodesdmp)).map { [it] } - ch_input_for_krakenuniq = ch_prepped_dna_fastas.combine(ch_taxdmpfiles_for_krakenuniq).map { meta, reads, taxdump -> [meta, reads, taxdump, ch_nucl2taxid] } + ch_input_for_krakenuniq = ch_prepped_dna_fastas.combine(ch_taxdmpfiles_for_krakenuniq).map { meta, fastas, taxdump -> [meta, fastas, taxdump, ch_nucl2taxid] } KRAKENUNIQ_BUILD(ch_input_for_krakenuniq) ch_versions = ch_versions.mix(KRAKENUNIQ_BUILD.out.versions.first()) From 30aace6d8600cb456a59b0887383f3c3c7f21c1e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 28 Nov 2024 13:26:35 +0100 Subject: [PATCH 12/31] Add reference to ganon across all test profiles and fix typo --- conf/test.config | 1 + conf/test_full.config | 17 ++++---- conf/test_nothing.config | 1 + main.nf | 89 ++++++++++++++++++---------------------- tests/test.nf.test | 2 + workflows/createtaxdb.nf | 7 +++- 6 files changed, 58 insertions(+), 59 deletions(-) diff --git a/conf/test.config b/conf/test.config index 93cf208..d582ba1 100644 --- a/conf/test.config +++ b/conf/test.config @@ -31,6 +31,7 @@ params { build_bracken = true build_diamond = true + build_ganon = true build_kaiju = true build_malt = true build_centrifuge = true diff --git a/conf/test_full.config b/conf/test_full.config index a628fd1..f14adca 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,13 +17,14 @@ params { // Input data for full size test // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' - build_bracken = true - build_diamond = true - build_kaiju = true - build_malt = true - build_centrifuge = true - build_kraken2 = true - build_krakenuniq = true + build_bracken = true + build_diamond = true + build_ganon = true + build_kaiju = true + build_malt = true + build_centrifuge = true + build_kraken2 = true + build_krakenuniq = true } diff --git a/conf/test_nothing.config b/conf/test_nothing.config index 1f22ce2..b39e675 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -22,6 +22,7 @@ params { build_bracken = false build_diamond = false + build_ganon = false build_kaiju = false build_malt = false build_centrifuge = false diff --git a/main.nf b/main.nf index 81aefc5..ec245d9 100644 --- a/main.nf +++ b/main.nf @@ -15,51 +15,9 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { CREATETAXDB } from './workflows/createtaxdb' +include { CREATETAXDB } from './workflows/createtaxdb' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_createtaxdb_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_createtaxdb_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOWS FOR PIPELINE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// WORKFLOW: Run main analysis pipeline depending on type of input -// -workflow NFCORE_CREATETAXDB { - - take: - samplesheet // channel: samplesheet read in from --input - - main: - - // - // WORKFLOW: Run pipeline - // - ch_samplesheet = samplesheet - ch_taxonomy_namesdmp = file(params.namesdmp) - ch_taxonomy_nodesdmp = file(params.nodesdmp) - ch_accession2taxid = file(params.accession2taxid) - ch_nucl2taxid = file(params.nucl2taxid) - ch_prot2taxid = file(params.prot2taxid) - ch_malt_mapdb = file(params.malt_mapdb) - - - CREATETAXDB ( - ch_samplesheet, - ch_taxonomy_namesdmp, - ch_taxonomy_nodesdmp, - ch_accession2taxid, - ch_nucl2taxid, - ch_prot2taxid, - ch_malt_mapdb, - - ) - emit: - multiqc_report = CREATETAXDB.out.multiqc_report // channel: /path/to/multiqc_report.html -} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -67,12 +25,10 @@ workflow NFCORE_CREATETAXDB { */ workflow { - - main: // // SUBWORKFLOW: Run initialisation tasks // - PIPELINE_INITIALISATION ( + PIPELINE_INITIALISATION( params.version, params.validate_params, params.monochrome_logs, @@ -84,13 +40,13 @@ workflow { // // WORKFLOW: Run main workflow // - NFCORE_CREATETAXDB ( + NFCORE_CREATETAXDB( PIPELINE_INITIALISATION.out.samplesheet ) // // SUBWORKFLOW: Run completion tasks // - PIPELINE_COMPLETION ( + PIPELINE_COMPLETION( params.email, params.email_on_fail, params.plaintext_email, @@ -103,6 +59,41 @@ workflow { /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +// +// WORKFLOW: Run main analysis pipeline depending on type of input +// +workflow NFCORE_CREATETAXDB { + take: + samplesheet // channel: samplesheet read in from --input + + main: + + // + // WORKFLOW: Run pipeline + // + ch_samplesheet = samplesheet + ch_taxonomy_namesdmp = file(params.namesdmp, checkIfExists: true) + ch_taxonomy_nodesdmp = file(params.nodesdmp, checkIfExists: true) + ch_accession2taxid = file(params.accession2taxid, checkIfExists: true) + ch_nucl2taxid = file(params.nucl2taxid, checkIfExists: true) + ch_prot2taxid = file(params.prot2taxid, checkIfExists: true) + ch_malt_mapdb = file(params.malt_mapdb, checkIfExists: true) + + + CREATETAXDB( + ch_samplesheet, + ch_taxonomy_namesdmp, + ch_taxonomy_nodesdmp, + ch_accession2taxid, + ch_nucl2taxid, + ch_prot2taxid, + ch_malt_mapdb + ) + + emit: + multiqc_report = CREATETAXDB.out.multiqc_report // channel: /path/to/multiqc_report.html +} diff --git a/tests/test.nf.test b/tests/test.nf.test index 76a073c..c8c6632 100644 --- a/tests/test.nf.test +++ b/tests/test.nf.test @@ -23,6 +23,8 @@ nextflow_pipeline { file("$outputDir/bracken/database/database.kraken").name, path("$outputDir/centrifuge/"), path("$outputDir/diamond/database.dmnd"), + path("$outputDir/ganon/database.hibf"), + path("$outputDir/ganon/database.tax"), path("$outputDir/kaiju/database.fmi"), path("$outputDir/kraken2/database/hash.k2d"), file("$outputDir/kraken2/database/opts.k2d").name, diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index 10ea9aa..62c50e9 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -79,7 +79,7 @@ workflow CREATETAXDB { // Place in single file CAT_CAT_DNA(ch_prepped_dna_fastas) ch_versions = ch_versions.mix(CAT_CAT_DNA.out.versions.first()) - ch_singleref_for_dna = CAT_CAT_DNA.out + ch_singleref_for_dna = CAT_CAT_DNA.out.file_out } // TODO: Possibly need to have a modification step to get header correct to actually run with kaiju... @@ -105,7 +105,7 @@ workflow CREATETAXDB { //ch_versions = ch_versions.mix( PIGZ_COMPRESS_AA.versions.first() ) CAT_CAT_AA(ch_prepped_aa_fastas) - ch_singleref_for_aa = CAT_CAT_AA.out_file + ch_singleref_for_aa = CAT_CAT_AA.out.file_out ch_versions = ch_versions.mix(CAT_CAT_AA.out.versions.first()) } @@ -161,6 +161,9 @@ workflow CREATETAXDB { ch_versions = ch_versions.mix(GANON_BUILDCUSTOM.out.versions.first()) ch_ganon_output = GANON_BUILDCUSTOM.out.db } + else { + ch_ganon_output = Channel.empty() + } // MODULE: Run KAIJU/MKFMI From dc28bc9bc7f80ab8884ebfc46fe1293d6735d4fe Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 28 Nov 2024 13:30:43 +0100 Subject: [PATCH 13/31] Update test --- tests/test.nf.test.snap | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap index 0bd549f..dc60a3f 100644 --- a/tests/test.nf.test.snap +++ b/tests/test.nf.test.snap @@ -11,6 +11,8 @@ "database.4.cf:md5,2902ec5df0db6da41a91b40d2f46b30d" ], "database.dmnd:md5,b2ea49ef5490c526e2c56cae19bcb462", + "database.hibf:md5,af913cecda744b02751e2f5320c35c7c", + "database.tax:md5,30f327fbe453aa1a981363fd9f4df21b", "database.fmi:md5,54fd89f5e4eab61af30175e8aa389598", "hash.k2d:md5,941118164b4bcc010593f7a7c7b30029", "opts.k2d", @@ -30,8 +32,8 @@ ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.2" }, - "timestamp": "2024-10-08T16:33:06.699148849" + "timestamp": "2024-11-28T13:27:57.851046024" } } \ No newline at end of file From 086499cd8dac219ce143eadcbf3eb5f4a6e9f106 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 28 Nov 2024 13:47:01 +0100 Subject: [PATCH 14/31] Add current required documentation --- CITATIONS.md | 4 ++++ README.md | 1 + docs/output.md | 17 ++++++++++++++++- .../utils_nfcore_createtaxdb_pipeline/main.nf | 2 ++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/CITATIONS.md b/CITATIONS.md index 354566c..1e56a20 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -40,6 +40,10 @@ > Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. https://doi.org/10.7717/peerj-cs.104 +- [ganon](https://doi.org/10.1093/bioinformatics/btaa458) + + > Piro, V. C., Dadi, T. H., Seiler, E., Reinert, K., & Renard, B. Y. (2020). Ganon: Precise metagenomics classification against large and up-to-date sets of reference sequences. Bioinformatics (Oxford, England), 36(Suppl_1), i12–i20. https://doi.org/10.1093/bioinformatics/btaa458 + - [Centrifuge](https://doi.org/10.1101/gr.210641.116) > Kim, D., Song, L., Breitwieser, F. P., & Salzberg, S. L. (2016). Centrifuge: rapid and sensitive classification of metagenomic sequences. Genome Research, 26(12), 1721–1729. https://doi.org/10.1101/gr.210641.116 diff --git a/README.md b/README.md index 16bd3fa..a5fffbe 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ 2. Builds databases for: - [Bracken](https://doi.org/10.7717/peerj-cs.104) - [Centrifuge](https://doi.org/10.1101/gr.210641.116) + - [ganon](https://doi.org/10.1093/bioinformatics/btaa458) - [DIAMOND](https://doi.org/10.1038/nmeth.3176) - [Kaiju](https://doi.org/10.1038/ncomms11257) - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0) diff --git a/docs/output.md b/docs/output.md index 9f5ddb0..762991f 100644 --- a/docs/output.md +++ b/docs/output.md @@ -14,7 +14,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution -- [Bracken](#bracken) - Database files for Brakcen +- [Bracken](#bracken) - Database files for Bracken +- [ganon](#ganon) - Database files for ganon - [Centrifuge](#centrifuge) - Database files for Centrifuge - [DIAMOND](#diamond) - Database files for DIAMOND - [Kaiju](#kaiju) - Database files for Kaiju @@ -92,6 +93,20 @@ The resulting `/` directory can be given to Bracken itself with `bracke A directory and `cf` files can be given to the Centrifuge command with `centrifuge -x ///` etc. +### Ganon + +[ganon](https://github.com/pirovc/ganon/) classifies genomic sequences against large sets of references efficiently, with integrated download and update of databases (refseq/genbank), taxonomic profiling (ncbi/gtdb), binning and hierarchical classification, customized reporting and more. + +
+Output files + +- `diamond/` + - `.hibf`: main bloom filter index file + - `.tax`: taxonomy tree used for taxonomy assignment +
+ +The directory containing these two files can be given to ganon itself with using the name as a prefix, e.g., `ganon classify -d ///`. + ### Diamond [DIAMOND](https://github.com/bbuchfink/diamond) is a accelerated BLAST compatible local sequence aligner particularly used for protein alignment. diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf index 589b799..ce1edde 100644 --- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf @@ -186,6 +186,7 @@ def toolCitationText() { "Tools used in the workflow included:", params.build_bracken ? "Bracken (Lu et al. 2017)," : "", params.build_centrifuge ? "Centrifuge (Kim et al. 2016)," : "", + params.build_ganon ? "ganon (Piro et al. 2020)" : "", params.build_diamond ? "DIAMOND (Buchfink et al. 2015)," : "", params.build_kaiju ? "Kaiju (Menzel et al. 2016)," : "", params.build_kraken2 ? "Kraken2 (Wood et al. 2019)," : "", @@ -205,6 +206,7 @@ def toolBibliographyText() { def reference_text = [ params.build_bracken ? '
  • Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. 10.7717/peerj-cs.104
  • ' : "", params.build_centrifuge ? '
  • Kim, D., Song, L., Breitwieser, F. P., & Salzberg, S. L. (2016). Centrifuge: rapid and sensitive classification of metagenomic sequences. Genome Research, 26(12), 1721–1729. 10.1101/gr.210641.116
  • ' : "", + params.build_ganon ? "
  • Piro, V. C., Dadi, T. H., Seiler, E., Reinert, K., & Renard, B. Y. (2020). Ganon: Precise metagenomics classification against large and up-to-date sets of reference sequences. Bioinformatics (Oxford, England), 36(Suppl_1), i12–i20. 10.1093/bioinformatics/btaa458
  • " : "", params.build_diamond ? '
  • Buchfink, B., Xie, C., & Huson, D. H. (2015). Fast and sensitive protein alignment using DIAMOND. Nature Methods, 12(1), 59–60. 10.1038/nmeth.3176
  • ' : "", params.build_kaiju ? '
  • Menzel, P., Ng, K. L., & Krogh, A. (2016). Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nature Communications, 7, 11257. 10.1038/ncomms11257
  • ' : "", params.build_kraken2 ? '
  • Wood, D. E., Lu, J., & Langmead, B. (2019). Improved metagenomic analysis with Kraken 2. Genome Biology, 20(1), 257. 10.1186/s13059-019-1891-0
  • ' : "", From 6ac547b9ea63231c79435bd963127e33ee06c7e5 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 28 Nov 2024 14:01:03 +0100 Subject: [PATCH 15/31] Fix mangled regex strings --- subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf index ce1edde..7fb98b7 100644 --- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf @@ -245,7 +245,7 @@ def methodsDescriptionText(mqc_methods_yaml) { meta["tool_citations"] = "" meta["tool_bibliography"] = "" - meta["tool_citations"] = toolCitationText().replaceAll(', .', ".").replaceAll('. .', ' .').replaceAll(', .', '.') + meta["tool_citations"] = toolCitationText().replaceAll(', \\.', ".").replaceAll('. \\.', ' .').replaceAll(', \\.', '.') meta["tool_bibliography"] = toolBibliographyText() From 99ca0f21dde57139a96b92bf6f68c15c99005e79 Mon Sep 17 00:00:00 2001 From: LilyAnderssonLee Date: Thu, 28 Nov 2024 09:52:43 +0100 Subject: [PATCH 16/31] update malt/build module --- modules.json | 2 +- modules/nf-core/malt/build/environment.yml | 2 +- modules/nf-core/malt/build/main.nf | 10 ++++---- modules/nf-core/malt/build/meta.yml | 2 ++ modules/nf-core/malt/build/tests/main.nf.test | 19 ++++++++++----- .../malt/build/tests/main.nf.test.snap | 24 +++++++++---------- .../nf-core/malt/build/tests/nextflow.config | 5 +--- 7 files changed, 35 insertions(+), 29 deletions(-) diff --git a/modules.json b/modules.json index ebd0e4f..fc50554 100644 --- a/modules.json +++ b/modules.json @@ -63,7 +63,7 @@ }, "malt/build": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "47f728e7a9af5a327def60928a43cb9b20da61ba", "installed_by": ["modules"] }, "multiqc": { diff --git a/modules/nf-core/malt/build/environment.yml b/modules/nf-core/malt/build/environment.yml index 15a7750..2e3a66a 100644 --- a/modules/nf-core/malt/build/environment.yml +++ b/modules/nf-core/malt/build/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::malt=0.61 + - bioconda::malt=0.62 diff --git a/modules/nf-core/malt/build/main.nf b/modules/nf-core/malt/build/main.nf index 710c82e..b0f2249 100644 --- a/modules/nf-core/malt/build/main.nf +++ b/modules/nf-core/malt/build/main.nf @@ -2,11 +2,11 @@ process MALT_BUILD { label 'process_high' conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0' - : 'biocontainers/malt:0.61--hdfd78af_0'}" + ? 'https://depot.galaxyproject.org/singularity/malt:0.62--hdfd78af_0' + : 'biocontainers/malt:0.62--hdfd78af_0'}" input: - path fastas + path fastas, stageAs: 'fa_folder/' path gff path mapping_db @@ -24,12 +24,12 @@ process MALT_BUILD { """ malt-build \\ + ${args} \\ -v \\ - --input ${fastas.join(' ')} \\ + --input fa_folder \\ ${igff} \\ -d 'malt_index/' \\ -t ${task.cpus} \\ - ${args} \\ -mdb ${mapping_db}/*.db |&tee malt-build.log cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/malt/build/meta.yml b/modules/nf-core/malt/build/meta.yml index da1fc6c..099030a 100644 --- a/modules/nf-core/malt/build/meta.yml +++ b/modules/nf-core/malt/build/meta.yml @@ -51,5 +51,7 @@ output: pattern: "malt-build.log" authors: - "@jfy133" + - "@LilyAnderssonLee" maintainers: - "@jfy133" + - "@LilyAnderssonLee" diff --git a/modules/nf-core/malt/build/tests/main.nf.test b/modules/nf-core/malt/build/tests/main.nf.test index 2294602..a2a0db8 100644 --- a/modules/nf-core/malt/build/tests/main.nf.test +++ b/modules/nf-core/malt/build/tests/main.nf.test @@ -3,7 +3,6 @@ nextflow_process { name "Test Process MALT_BUILD" script "../main.nf" process "MALT_BUILD" - config "./nextflow.config" tag "modules" tag "modules_nfcore" @@ -16,18 +15,24 @@ nextflow_process { script "../../../unzip/main.nf" process { """ - input[0] = [[], file("s3://ngi-igenomes/test-data/createtaxdb/taxonomy/megan-nucl-Feb2022.db.zip", checkIfExists: true)] + input[0] = [[], file("https://ngi-igenomes.s3.eu-west-1.amazonaws.com/test-data/createtaxdb/taxonomy/megan-nucl-Feb2022.db.zip", checkIfExists: true)] """ } } } test("sarscov2 - fastq") { - + config "./nextflow.config" when { + params { + module_args = '-J-Xmx6G --sequenceType DNA' + } process { """ - input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + def genome_1 = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/fasta/sarscov2.fasta', checkIfExists: true ) + def genome_2 = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true ) + + input[0] = [genome_1,genome_2] input[1] = [] input[2] = UNZIP.out.unzipped_archive.map { it[1] } """ @@ -59,13 +64,15 @@ nextflow_process { } test("sarscov2 - fastq - stub") { - options "-stub" when { process { """ - input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + def genome_1 = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/fasta/sarscov2.fasta', checkIfExists: true ) + def genome_2 = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true ) + + input[0] = [genome_1,genome_2] input[1] = [] input[2] = UNZIP.out.unzipped_archive.map { it[1] } """ diff --git a/modules/nf-core/malt/build/tests/main.nf.test.snap b/modules/nf-core/malt/build/tests/main.nf.test.snap index 421a154..9ff080c 100644 --- a/modules/nf-core/malt/build/tests/main.nf.test.snap +++ b/modules/nf-core/malt/build/tests/main.nf.test.snap @@ -8,7 +8,7 @@ ] ], "1": [ - "versions.yml:md5,52c299d59c90219b9b442ee54f1acc97" + "versions.yml:md5,4ad582e415ed27dd4a275a149209961b" ], "2": [ "malt-build.log:md5,d41d8cd98f00b204e9800998ecf8427e" @@ -22,33 +22,33 @@ "malt-build.log:md5,d41d8cd98f00b204e9800998ecf8427e" ], "versions": [ - "versions.yml:md5,52c299d59c90219b9b442ee54f1acc97" + "versions.yml:md5,4ad582e415ed27dd4a275a149209961b" ] } ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nextflow": "24.10.0" }, - "timestamp": "2024-01-26T11:49:19.685017271" + "timestamp": "2024-11-15T12:14:24.942518" }, "sarscov2 - fastq": { "content": [ - "index0.idx:md5,1954f2c00b418d00112829b0a6adb8ce", - "ref.db:md5,772a09aeb162515485b037604399f2bd", - "ref.idx:md5,7dea362b3fac8e00956a4952a3d4f474", - "ref.inf:md5,b146842067cf278ef1d23e6c2e7c0c35", - "taxonomy.idx:md5,bb335e7c378a5bd85761b6eeed16d984", + "index0.idx:md5,0ba3d8bfb7ef28d08e2a005dd3405c55", + "ref.db:md5,6b36ae031c49feaae50f4cea07d9c7f4", + "ref.idx:md5,8ba66cdf65181c7efee1d366574cb9d7", + "ref.inf:md5,042712533a0187b6566db67c6503a71e", + "taxonomy.idx:md5,e7ce35e6238f39fa0c236fcf991546e4", "taxonomy.map:md5,5bb3f2192e925bca2e61e4b54f1671e0", "taxonomy.tre:md5,f76fb2d5aa9b0d637234d48175841e0e", [ - "versions.yml:md5,52c299d59c90219b9b442ee54f1acc97" + "versions.yml:md5,4ad582e415ed27dd4a275a149209961b" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nextflow": "24.10.0" }, - "timestamp": "2024-06-30T19:04:45.72181253" + "timestamp": "2024-11-15T12:13:59.078459" } } \ No newline at end of file diff --git a/modules/nf-core/malt/build/tests/nextflow.config b/modules/nf-core/malt/build/tests/nextflow.config index c538bb5..f1c7169 100644 --- a/modules/nf-core/malt/build/tests/nextflow.config +++ b/modules/nf-core/malt/build/tests/nextflow.config @@ -1,8 +1,5 @@ process { - - withName: MALT_BUILD { - ext.args = "--sequenceType DNA" + ext.args = params.module_args } - } From 195570784ac646984c1b65c146dc974a22b87eee Mon Sep 17 00:00:00 2001 From: alxndrdiaz Date: Mon, 14 Oct 2024 21:53:46 -0600 Subject: [PATCH 17/31] add module parameters --- conf/modules.config | 9 ++++++--- nextflow.config | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c85a699..d94994a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -66,8 +66,8 @@ process { withName: KRAKEN2_BUILD { ext.args = { "${params.kraken2_build_params}" } - } - + } + withName: CENTRIFUGE_BUILD { ext.args = { "${params.centrifuge_build_params}" } } @@ -79,5 +79,8 @@ process { withName: KAIJU_MKFMI { ext.args = { "${params.kaiju_build_params}" } } - + + withName: MALT_BUILD { + ext.args = { "${params.malt_build_params}" } + } } diff --git a/nextflow.config b/nextflow.config index 4124e98..359cca8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -60,7 +60,7 @@ params { // tool specific options build_bracken = false build_diamond = false - build_ganon = false + build_ganon = false build_kaiju = false build_malt = false malt_build_params = "--sequenceType DNA" From 2d944fd49726377a483034a7505e56a892978221 Mon Sep 17 00:00:00 2001 From: alxndrdiaz Date: Wed, 16 Oct 2024 12:01:45 -0600 Subject: [PATCH 18/31] empty string instead of null --- conf/modules.config | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index d94994a..be445a2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -66,8 +66,8 @@ process { withName: KRAKEN2_BUILD { ext.args = { "${params.kraken2_build_params}" } - } - + } + withName: CENTRIFUGE_BUILD { ext.args = { "${params.centrifuge_build_params}" } } @@ -83,4 +83,5 @@ process { withName: MALT_BUILD { ext.args = { "${params.malt_build_params}" } } + } From 7ac5212f44628c9d379ddf0e426c31e2e2949f8c Mon Sep 17 00:00:00 2001 From: alxndrdiaz Date: Thu, 12 Dec 2024 09:32:26 -0600 Subject: [PATCH 19/31] linting --- nextflow.config | 2 +- nextflow_schema.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 359cca8..7d82c12 100644 --- a/nextflow.config +++ b/nextflow.config @@ -294,4 +294,4 @@ validation { } // Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' \ No newline at end of file +includeConfig 'conf/modules.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index 24b8648..964ca2f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -178,7 +178,7 @@ "description": "Specify parameters being given to kaiju-mkbwt", "help_text": "See [Kaiju documentation](https://github.com/bioinformatics-centre/kaiju/tree/master#custom-database)" }, - "malt_build_params": { + "malt_build_params": { "type": "string", "description": "Specify parameters for malt-build", "help_text": "At least the type of reference sequences should be specified. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).\n\n> Modifies tool(s) parameter(s)\n> - malt-build: `--sequenceType` " From 9d09377fff1ffb6f6d43e86b3722c13fbeb5bc11 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 19 Dec 2024 11:40:42 +0000 Subject: [PATCH 20/31] Move MALT check to pipeline initilaisation --- conf/modules.config | 11 ++-------- .../utils_nfcore_createtaxdb_pipeline/main.nf | 21 ++++++++++++++----- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 5533b21..708bf7f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -47,17 +47,10 @@ process { ] } - - if(params.build_malt && (params.malt_build_params.contains('--sequenceType DNA') || params.malt_build_params.contains('--sequenceType Protein'))) { - withName: MALT_BUILD { - ext.args = { "${params.malt_build_params}" } - } - } - else { - println "Please specify the input sequence type using --sequenceType DNA or --sequenceType Protein" + withName: MALT_BUILD { + ext.args = { "${params.malt_build_params}" } } - withName: GANON_BUILD { ext.args = { "--verbose" } } diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf index 7fb98b7..9b9b7a8 100644 --- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf @@ -43,7 +43,7 @@ workflow PIPELINE_INITIALISATION { version, true, outdir, - workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1, ) // @@ -52,7 +52,7 @@ workflow PIPELINE_INITIALISATION { UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, - null + null, ) // @@ -62,6 +62,11 @@ workflow PIPELINE_INITIALISATION { nextflow_cli_args ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + // // Create channel from input file provided through params.input // @@ -104,7 +109,7 @@ workflow PIPELINE_COMPLETION { plaintext_email, outdir, monochrome_logs, - multiqc_report.toList() + multiqc_report.toList(), ) } @@ -130,6 +135,8 @@ workflow PIPELINE_COMPLETION { // def validateInputParameters() { + println('CHECKING INPUT') + // Validate CENTRIFUGE auxiliary file combinations if (params.build_centrifuge && [!params.nucl2taxid, !params.nodesdmp, !params.namesdmp].any()) { error('[nf-core/createtaxdb] Supplied --build_centrifuge, but missing at least one of: --nucl2taxid, --nodesdmp, or --namesdmp (all are mandatory for CENTRIFUGE)') @@ -159,6 +166,10 @@ def validateInputParameters() { if (params.build_krakenuniq && [!params.malt_mapdb].any()) { error('[nf-core/createtaxdb] Supplied --build_malt, but missing: --malt_mapdb (all are mandatory for MALT)') } + + if (params.build_malt && !(params.malt_build_params.contains('--sequenceType DNA') || params.malt_build_params.contains('--sequenceType Protein'))) { + error('[nf-core/createtaxdb] Supplied --build_malt, but --malt_build_params must contain at a minimum malt-build parameters --sequenceType DNA or --sequenceType Protein') + } } // @@ -193,7 +204,7 @@ def toolCitationText() { params.build_krakenuniq ? "KrakenUniq (Breitwieser et al. 2018)," : "", params.build_malt ? "MALT (Vågene et al. 2018)," : "", "and MultiQC (Ewels et al. 2016)", - "." + ".", ].join(' ').trim() return citation_text @@ -212,7 +223,7 @@ def toolBibliographyText() { params.build_kraken2 ? '
  • Wood, D. E., Lu, J., & Langmead, B. (2019). Improved metagenomic analysis with Kraken 2. Genome Biology, 20(1), 257. 10.1186/s13059-019-1891-0
  • ' : "", params.build_krakenuniq ? '
  • Breitwieser, F. P., Baker, D. N., & Salzberg, S. L. (2018). KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology, 19(1), 198. 10.1186/s13059-018-1568-0
  • ' : "", params.build_malt ? '
  • Vågene, Å. J., Herbig, A., Campana, M. G., Robles García, N. M., Warinner, C., Sabin, S., Spyrou, M. A., Andrades Valtueña, A., Huson, D., Tuross, N., Bos, K. I., & Krause, J. (2018). Salmonella enterica genomes from victims of a major sixteenth-century epidemic in Mexico. Nature Ecology & Evolution, 2(3), 520–528. 10.1038/s41559-017-0446-6
  • ' : "", - '
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • ' + '
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • ', ].join(' ').trim() return reference_text From 590e51e84c101fb5cc101ad9812b0711d520e8b8 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 19 Dec 2024 12:02:46 +0000 Subject: [PATCH 21/31] Add missing flags --- conf/modules.config | 36 ++-- nextflow.config | 33 +-- nextflow_schema.json | 34 ++-- .../nf-core/utils_nfcore_pipeline/main.nf | 191 +++++++++--------- tests/test.nf.test.snap | 8 +- 5 files changed, 159 insertions(+), 143 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 708bf7f..e839bb0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -15,7 +15,7 @@ process { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] withName: MULTIQC { @@ -23,7 +23,7 @@ process { publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } @@ -33,7 +33,7 @@ process { path: { "${params.outdir}/cat" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_concatenated_fastas + enabled: params.save_concatenated_fastas, ] } @@ -43,26 +43,14 @@ process { path: { "${params.outdir}/cat" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_concatenated_fastas + enabled: params.save_concatenated_fastas, ] } - withName: MALT_BUILD { - ext.args = { "${params.malt_build_params}" } - } - - withName: GANON_BUILD { - ext.args = { "--verbose" } - } - withName: BRACKEN_BUILD { ext.args = { "${params.bracken_build_params}" } } - withName: KRAKEN2_BUILD { - ext.args = { "${params.kraken2_build_params}" } - } - withName: CENTRIFUGE_BUILD { ext.args = { "${params.centrifuge_build_params}" } } @@ -71,7 +59,23 @@ process { ext.args = { "${params.diamond_build_params}" } } + withName: GANON_BUILD { + ext.args = { "${params.ganon_build_params}" } + } + withName: KAIJU_MKFMI { ext.args = { "${params.kaiju_build_params}" } } + + withName: KRAKEN2_BUILD { + ext.args = { "${params.kraken2_build_params}" } + } + + withName: KRAKENUNIQ_BUILD { + ext.args = { "${params.krakenuniq_build_params}" } + } + + withName: MALT_BUILD { + ext.args = { "${params.malt_build_params}" } + } } diff --git a/nextflow.config b/nextflow.config index 7d82c12..25cd41f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -58,22 +58,23 @@ params { malt_mapdb = null // tool specific options - build_bracken = false - build_diamond = false - build_ganon = false - build_kaiju = false - build_malt = false - malt_build_params = "--sequenceType DNA" - build_centrifuge = false - build_kraken2 = false - kraken2_keepintermediate = false - build_krakenuniq = false - bracken_build_params = '' - kraken2_build_params = '' - centrifuge_build_params = '' - diamond_build_params = '' - kaiju_build_params = '' - + build_bracken = false + build_diamond = false + build_ganon = false + build_kaiju = false + build_malt = false + build_centrifuge = false + build_kraken2 = false + kraken2_keepintermediate = false + build_krakenuniq = false + bracken_build_params = '' + centrifuge_build_params = '' + diamond_build_params = '' + ganon_build_params = '' + kaiju_build_params = '' + kraken2_build_params = '' + krakenuniq_build_params = '' + malt_build_params = "--sequenceType DNA" } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 964ca2f..fb8ca8b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -123,11 +123,6 @@ "fa_icon": "fas fa-toggle-on", "description": "Turn on building of DIAMOND database. Requires amino-acid FASTA file input." }, - "build_malt": { - "type": "boolean", - "fa_icon": "fas fa-toggle-on", - "description": "Turn on building of MALT database. Requires nucleotide FASTA file input." - }, "build_ganon": { "type": "boolean", "description": "Turn on building of ganon database. Requires nucleotide FASTA file input.", @@ -153,16 +148,16 @@ "fa_icon": "fas fa-toggle-on", "description": "Turn on building of KrakenUniq database. Requires nucleotide FASTA file input." }, + "build_malt": { + "type": "boolean", + "fa_icon": "fas fa-toggle-on", + "description": "Turn on building of MALT database. Requires nucleotide FASTA file input." + }, "bracken_build_params": { "type": "string", "description": "Specify parameters being given to bracken build", "help_text": "See [Bracken documentation](https://github.com/jenniferlu717/Bracken?tab=readme-ov-file#step-1-generate-the-bracken-database-file-databasexmerskmer_distrib)" }, - "kraken2_build_params": { - "type": "string", - "description": "Specify parameters being given to kraken2 build", - "help_text": "See [Kraken2 documentation](https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown#custom-databases)" - }, "centrifuge_build_params": { "type": "string", "description": "Specify parameters being given to centrifuge-build", @@ -173,15 +168,30 @@ "description": "Specify parameters being given to diamond makedb", "help_text": "See [diamond documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options)" }, + "ganon_build_params": { + "type": "string", + "description": "Specify parameters being given to ganon buildcustom", + "help_text": "See [ganon documentation](https://pirovc.github.io/ganon/custom_databases/)" + }, "kaiju_build_params": { "type": "string", "description": "Specify parameters being given to kaiju-mkbwt", "help_text": "See [Kaiju documentation](https://github.com/bioinformatics-centre/kaiju/tree/master#custom-database)" }, + "kraken2_build_params": { + "type": "string", + "description": "Specify parameters being given to kraken2 build", + "help_text": "See [Kraken2 documentation](https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown#custom-databases)" + }, + "krakenuniq_build_params": { + "type": "string", + "description": "Specify parameters being given to krakenuniq build", + "help_text": "See [KrakenUniq documentation](https://github.com/fbreitwieser/krakenuniq?tab=readme-ov-file#database-building)" + }, "malt_build_params": { "type": "string", - "description": "Specify parameters for malt-build", - "help_text": "At least the type of reference sequences should be specified. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).\n\n> Modifies tool(s) parameter(s)\n> - malt-build: `--sequenceType` " + "description": "Specify parameters given to malt-build. Must include --sequenceType DNA or Protein.", + "help_text": "At least the type of reference sequences should be specified. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/)." } }, "fa_icon": "fas fa-database" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index 5cb7baf..521135d 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -140,12 +140,12 @@ def paramsSummaryMultiqc(summary_params) { } def yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" as String - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" return yaml_file_text } @@ -153,7 +153,7 @@ def paramsSummaryMultiqc(summary_params) { // // nf-core logo // -def nfCoreLogo(monochrome_logs=true) { +def nfCoreLogo(monochrome_logs = true) { def colors = logColours(monochrome_logs) as Map String.format( """\n @@ -172,7 +172,7 @@ def nfCoreLogo(monochrome_logs=true) { // // Return dashed line // -def dashedLine(monochrome_logs=true) { +def dashedLine(monochrome_logs = true) { def colors = logColours(monochrome_logs) as Map return "-${colors.dim}----------------------------------------------------${colors.reset}-" } @@ -180,67 +180,67 @@ def dashedLine(monochrome_logs=true) { // // ANSII colours used for terminal logging // -def logColours(monochrome_logs=true) { +def logColours(monochrome_logs = true) { def colorcodes = [:] as Map // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" return colorcodes } @@ -272,7 +272,7 @@ def attachMultiqcReport(multiqc_report) { // // Construct and send completion email // -def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs = true, multiqc_report = null) { // Set up the e-mail variables def subject = "[${workflow.manifest.name}] Successful: ${workflow.runName}" @@ -289,35 +289,35 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi } def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId + misc_fields['Pipeline script hash ID'] = workflow.scriptId if (workflow.repository) { - misc_fields['Pipeline repository Git URL'] = workflow.repository + misc_fields['Pipeline repository Git URL'] = workflow.repository } if (workflow.commitId) { misc_fields['Pipeline repository Git Commit'] = workflow.commitId } if (workflow.revision) { - misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Pipeline Git branch/tag'] = workflow.revision } - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp def email_fields = [:] - email_fields['version'] = getWorkflowVersion() - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields // On success try attach the multiqc report def mqc_report = attachMultiqcReport(multiqc_report) @@ -329,29 +329,30 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi } // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() + def email_txt = txt_template.toString() // Render the HTML template - def hf = new File("${workflow.projectDir}/assets/email_template.html") + def hf = new File("${workflow.projectDir}/assets/email_template.html") def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() + def email_html = html_template.toString() // Render the sendmail template def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] - def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() + def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() // Send the HTML e-mail def colors = logColours(monochrome_logs) as Map if (email_address) { try { if (plaintext_email) { -new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') } + new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') + } // Try to send HTML e-mail using sendmail def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") sendmail_tf.withWriter { w -> w << sendmail_html } @@ -382,7 +383,7 @@ new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') // // Print pipeline summary on completion // -def completionSummary(monochrome_logs=true) { +def completionSummary(monochrome_logs = true) { def colors = logColours(monochrome_logs) as Map if (workflow.success) { if (workflow.stats.ignoredCount == 0) { @@ -410,44 +411,44 @@ def imNotification(summary_params, hook_url) { } def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId if (workflow.repository) { misc_fields['repository'] = workflow.repository } if (workflow.commitId) { - misc_fields['commitid'] = workflow.commitId + misc_fields['commitid'] = workflow.commitId } if (workflow.revision) { - misc_fields['revision'] = workflow.revision + misc_fields['revision'] = workflow.revision } - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp def msg_fields = [:] - msg_fields['version'] = getWorkflowVersion() - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() + def engine = new groovy.text.GStringTemplateEngine() // Different JSON depending on the service provider // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() + def json_message = json_template.toString() // POST def post = new URL(hook_url).openConnection() diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap index dc60a3f..a1ce2b0 100644 --- a/tests/test.nf.test.snap +++ b/tests/test.nf.test.snap @@ -12,7 +12,7 @@ ], "database.dmnd:md5,b2ea49ef5490c526e2c56cae19bcb462", "database.hibf:md5,af913cecda744b02751e2f5320c35c7c", - "database.tax:md5,30f327fbe453aa1a981363fd9f4df21b", + "database.tax:md5,e041b05ce29813656f529560dc8a19ae", "database.fmi:md5,54fd89f5e4eab61af30175e8aa389598", "hash.k2d:md5,941118164b4bcc010593f7a7c7b30029", "opts.k2d", @@ -31,9 +31,9 @@ "taxonomy.tre:md5,f76fb2d5aa9b0d637234d48175841e0e" ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-11-28T13:27:57.851046024" + "timestamp": "2024-12-19T11:45:30.380109094" } } \ No newline at end of file From 52cac9157d0ef52568b44bc39c63f55cafee4728 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 19 Dec 2024 12:18:13 +0000 Subject: [PATCH 22/31] Add caveat about double/single quotes for build params --- nextflow_schema.json | 83 ++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index fb8ca8b..a54d509 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -113,31 +113,67 @@ "description": "Turn on extending of Kraken2 database to include Bracken files. Requires nucleotide FASTA File input.", "help_text": "Bracken2 databases are simply just a Kraken2 database with two additional files.\n\nNote however this requires a Kraken2 database _with_ intermediate files still in it, thus can result in large database directories." }, + "bracken_build_params": { + "type": "string", + "description": "Specify parameters being given to bracken build. Must be wrapped in double and single quotes: --bracken_build_params \"'--your_param'\"", + "help_text": "See [Bracken documentation](https://github.com/jenniferlu717/Bracken?tab=readme-ov-file#step-1-generate-the-bracken-database-file-databasexmerskmer_distrib)", + "fa_icon": "fas fa-users-cog" + }, "build_centrifuge": { "type": "boolean", "description": "Turn on building of Centrifuge database. Requires nucleotide FASTA file input.", "fa_icon": "fas fa-toggle-on" }, + "centrifuge_build_params": { + "type": "string", + "description": "Specify parameters being given to centrifuge-build. Must be wrapped in double and single quotes: --centrifuge_build_params \"'--your_param'\"", + "help_text": "See [Centrifuge documentation](https://github.com/DaehwanKimLab/centrifuge/blob/master/MANUAL.markdown#database-download-and-index-building)", + "fa_icon": "fas fa-users-cog" + }, "build_diamond": { "type": "boolean", "fa_icon": "fas fa-toggle-on", "description": "Turn on building of DIAMOND database. Requires amino-acid FASTA file input." }, + "diamond_build_params": { + "type": "string", + "description": "Specify parameters being given to diamond makedb. Must be wrapped in double and single quotes: --diamond_build_params \"'--your_param'\"", + "help_text": "See [diamond documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options)", + "fa_icon": "fas fa-users-cog" + }, "build_ganon": { "type": "boolean", "description": "Turn on building of ganon database. Requires nucleotide FASTA file input.", "fa_icon": "fas fa-toggle-on" }, + "ganon_build_params": { + "type": "string", + "description": "Specify parameters being given to ganon buildcustom. Must be wrapped in double and single quotes: --ganon_build_params \"'--your_param'\"", + "help_text": "See [ganon documentation](https://pirovc.github.io/ganon/custom_databases/)", + "fa_icon": "fas fa-users-cog" + }, "build_kaiju": { "type": "boolean", "description": "Turn on building of Kaiju database. Requires amino-acid FASTA file input.", "fa_icon": "fas fa-toggle-on" }, + "kaiju_build_params": { + "type": "string", + "description": "Specify parameters being given to kaiju-mkbwt. Must be wrapped in double and single quotes: --kaiju_build_params \"'--your_param'\"", + "help_text": "See [Kaiju documentation](https://github.com/bioinformatics-centre/kaiju/tree/master#custom-database)", + "fa_icon": "fas fa-users-cog" + }, "build_kraken2": { "type": "boolean", "description": "Turn on building of Kraken2 database. Requires nucleotide FASTA file input.", "fa_icon": "fas fa-toggle-on" }, + "kraken2_build_params": { + "type": "string", + "description": "Specify parameters being given to kraken2 build. Must be wrapped in double and single quotes: --kraken2_build_params \"'--your_param'\"", + "help_text": "See [Kraken2 documentation](https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown#custom-databases)", + "fa_icon": "fas fa-users-cog" + }, "kraken2_keepintermediate": { "type": "boolean", "fa_icon": "fas fa-save", @@ -148,50 +184,23 @@ "fa_icon": "fas fa-toggle-on", "description": "Turn on building of KrakenUniq database. Requires nucleotide FASTA file input." }, + "krakenuniq_build_params": { + "type": "string", + "description": "Specify parameters being given to krakenuniq build. Must be wrapped in double and single quotes: --krakenuniq_build_params \"'--your_param'\"", + "help_text": "See [KrakenUniq documentation](https://github.com/fbreitwieser/krakenuniq?tab=readme-ov-file#database-building)", + "fa_icon": "fas fa-users-cog" + }, "build_malt": { "type": "boolean", "fa_icon": "fas fa-toggle-on", "description": "Turn on building of MALT database. Requires nucleotide FASTA file input." }, - "bracken_build_params": { - "type": "string", - "description": "Specify parameters being given to bracken build", - "help_text": "See [Bracken documentation](https://github.com/jenniferlu717/Bracken?tab=readme-ov-file#step-1-generate-the-bracken-database-file-databasexmerskmer_distrib)" - }, - "centrifuge_build_params": { - "type": "string", - "description": "Specify parameters being given to centrifuge-build", - "help_text": "See [Centrifuge documentation](https://github.com/DaehwanKimLab/centrifuge/blob/master/MANUAL.markdown#database-download-and-index-building)" - }, - "diamond_build_params": { - "type": "string", - "description": "Specify parameters being given to diamond makedb", - "help_text": "See [diamond documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options)" - }, - "ganon_build_params": { - "type": "string", - "description": "Specify parameters being given to ganon buildcustom", - "help_text": "See [ganon documentation](https://pirovc.github.io/ganon/custom_databases/)" - }, - "kaiju_build_params": { - "type": "string", - "description": "Specify parameters being given to kaiju-mkbwt", - "help_text": "See [Kaiju documentation](https://github.com/bioinformatics-centre/kaiju/tree/master#custom-database)" - }, - "kraken2_build_params": { - "type": "string", - "description": "Specify parameters being given to kraken2 build", - "help_text": "See [Kraken2 documentation](https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown#custom-databases)" - }, - "krakenuniq_build_params": { - "type": "string", - "description": "Specify parameters being given to krakenuniq build", - "help_text": "See [KrakenUniq documentation](https://github.com/fbreitwieser/krakenuniq?tab=readme-ov-file#database-building)" - }, "malt_build_params": { "type": "string", - "description": "Specify parameters given to malt-build. Must include --sequenceType DNA or Protein.", - "help_text": "At least the type of reference sequences should be specified. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/)." + "description": "Specify parameters given to malt-build. Must include --sequenceType DNA or Protein and be wrapped in double and single quotes: --malt_build_params \"'--sequenceType DNA --your_param'\"", + "help_text": "At least the type of reference sequences should be specified. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).", + "default": "--sequenceType DNA", + "fa_icon": "fas fa-users-cog" } }, "fa_icon": "fas fa-database" From 79739e0812bbe7013ab8ac9c45920f38e30d9bba Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 19 Dec 2024 12:19:32 +0000 Subject: [PATCH 23/31] Remove debuggin code --- subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf index 9b9b7a8..19fe859 100644 --- a/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createtaxdb_pipeline/main.nf @@ -135,8 +135,6 @@ workflow PIPELINE_COMPLETION { // def validateInputParameters() { - println('CHECKING INPUT') - // Validate CENTRIFUGE auxiliary file combinations if (params.build_centrifuge && [!params.nucl2taxid, !params.nodesdmp, !params.namesdmp].any()) { error('[nf-core/createtaxdb] Supplied --build_centrifuge, but missing at least one of: --nucl2taxid, --nodesdmp, or --namesdmp (all are mandatory for CENTRIFUGE)') From e3b8e9b5568323f35bbb2cb8adca3a95d26a33d3 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 19 Dec 2024 12:26:54 +0000 Subject: [PATCH 24/31] Add more documentation --- nextflow_schema.json | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index a54d509..eb59c15 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -115,8 +115,8 @@ }, "bracken_build_params": { "type": "string", - "description": "Specify parameters being given to bracken build. Must be wrapped in double and single quotes: --bracken_build_params \"'--your_param'\"", - "help_text": "See [Bracken documentation](https://github.com/jenniferlu717/Bracken?tab=readme-ov-file#step-1-generate-the-bracken-database-file-databasexmerskmer_distrib)", + "description": "Specify parameters being given to bracken build. Must be wrapped in single and double quotes: --bracken_build_params \"'--your_param'\"", + "help_text": "See [Bracken documentation](https://github.com/jenniferlu717/Bracken?tab=readme-ov-file#step-1-generate-the-bracken-database-file-databasexmerskmer_distrib).\n\nParameters must be wrapped in _both_ single and then double quotes outside these, to ensure the parameters are interpreted as a string for internal use rather than interpreted by the terminal as a pipeline-level parameter.", "fa_icon": "fas fa-users-cog" }, "build_centrifuge": { @@ -126,8 +126,8 @@ }, "centrifuge_build_params": { "type": "string", - "description": "Specify parameters being given to centrifuge-build. Must be wrapped in double and single quotes: --centrifuge_build_params \"'--your_param'\"", - "help_text": "See [Centrifuge documentation](https://github.com/DaehwanKimLab/centrifuge/blob/master/MANUAL.markdown#database-download-and-index-building)", + "description": "Specify parameters being given to centrifuge-build. Must be wrapped in single and double quotes: --centrifuge_build_params \"'--your_param'\"", + "help_text": "See [Centrifuge documentation](https://github.com/DaehwanKimLab/centrifuge/blob/master/MANUAL.markdown#database-download-and-index-building).\n\nParameters must be wrapped in _both_ single and then double quotes outside these, to ensure the parameters are interpreted as a string for internal use rather than interpreted by the terminal as a pipeline-level parameter.", "fa_icon": "fas fa-users-cog" }, "build_diamond": { @@ -137,8 +137,8 @@ }, "diamond_build_params": { "type": "string", - "description": "Specify parameters being given to diamond makedb. Must be wrapped in double and single quotes: --diamond_build_params \"'--your_param'\"", - "help_text": "See [diamond documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options)", + "description": "Specify parameters being given to diamond makedb. Must be wrapped in single and double quotes: --diamond_build_params \"'--your_param'\"", + "help_text": "See [diamond documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options).\n\nParameters must be wrapped in _both_ single and then double quotes outside these, to ensure the parameters are interpreted as a string for internal use rather than interpreted by the terminal as a pipeline-level parameter.", "fa_icon": "fas fa-users-cog" }, "build_ganon": { @@ -148,8 +148,8 @@ }, "ganon_build_params": { "type": "string", - "description": "Specify parameters being given to ganon buildcustom. Must be wrapped in double and single quotes: --ganon_build_params \"'--your_param'\"", - "help_text": "See [ganon documentation](https://pirovc.github.io/ganon/custom_databases/)", + "description": "Specify parameters being given to ganon buildcustom. Must be wrapped in single and double quotes: --ganon_build_params \"'--your_param'\"", + "help_text": "See [ganon documentation](https://pirovc.github.io/ganon/custom_databases/).\n\nParameters must be wrapped in _both_ single and then double quotes outside these, to ensure the parameters are interpreted as a string for internal use rather than interpreted by the terminal as a pipeline-level parameter.", "fa_icon": "fas fa-users-cog" }, "build_kaiju": { @@ -159,8 +159,8 @@ }, "kaiju_build_params": { "type": "string", - "description": "Specify parameters being given to kaiju-mkbwt. Must be wrapped in double and single quotes: --kaiju_build_params \"'--your_param'\"", - "help_text": "See [Kaiju documentation](https://github.com/bioinformatics-centre/kaiju/tree/master#custom-database)", + "description": "Specify parameters being given to kaiju-mkbwt. Must be wrapped in single and double quotes: --kaiju_build_params \"'--your_param'\"", + "help_text": "See [Kaiju documentation](https://github.com/bioinformatics-centre/kaiju/tree/master#custom-database).\n\nParameters must be wrapped in _both_ single and then double quotes outside these, to ensure the parameters are interpreted as a string for internal use rather than interpreted by the terminal as a pipeline-level parameter.", "fa_icon": "fas fa-users-cog" }, "build_kraken2": { @@ -170,8 +170,8 @@ }, "kraken2_build_params": { "type": "string", - "description": "Specify parameters being given to kraken2 build. Must be wrapped in double and single quotes: --kraken2_build_params \"'--your_param'\"", - "help_text": "See [Kraken2 documentation](https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown#custom-databases)", + "description": "Specify parameters being given to kraken2 build. Must be wrapped in single and double quotes: --kraken2_build_params \"'--your_param'\"", + "help_text": "See [Kraken2 documentation](https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown#custom-databases).\n\nParameters must be wrapped in _both_ single and then double quotes outside these, to ensure the parameters are interpreted as a string for internal use rather than interpreted by the terminal as a pipeline-level parameter.", "fa_icon": "fas fa-users-cog" }, "kraken2_keepintermediate": { @@ -186,8 +186,8 @@ }, "krakenuniq_build_params": { "type": "string", - "description": "Specify parameters being given to krakenuniq build. Must be wrapped in double and single quotes: --krakenuniq_build_params \"'--your_param'\"", - "help_text": "See [KrakenUniq documentation](https://github.com/fbreitwieser/krakenuniq?tab=readme-ov-file#database-building)", + "description": "Specify parameters being given to krakenuniq build. Must be wrapped in single and double quotes: --krakenuniq_build_params \"'--your_param'\"", + "help_text": "See [KrakenUniq documentation](https://github.com/fbreitwieser/krakenuniq?tab=readme-ov-file#database-building).\n\nParameters must be wrapped in _both_ single and then double quotes outside these, to ensure the parameters are interpreted as a string for internal use rather than interpreted by the terminal as a pipeline-level parameter.", "fa_icon": "fas fa-users-cog" }, "build_malt": { @@ -198,7 +198,7 @@ "malt_build_params": { "type": "string", "description": "Specify parameters given to malt-build. Must include --sequenceType DNA or Protein and be wrapped in double and single quotes: --malt_build_params \"'--sequenceType DNA --your_param'\"", - "help_text": "At least the type of reference sequences should be specified. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).", + "help_text": "At least the type of reference sequences should be specified. (For RNA sequences, use the DNA setting) - from [MALT manual](https://software-ab.cs.uni-tuebingen.de/download/malt/).\n\nParameters must be wrapped in _both_ single and then double quotes outside these, to ensure the parameters are interpreted as a string for internal use rather than interpreted by the terminal as a pipeline-level parameter.", "default": "--sequenceType DNA", "fa_icon": "fas fa-users-cog" } From f91664f0e57f00e78028a8712d1828d3936238d2 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 19 Dec 2024 12:30:55 +0000 Subject: [PATCH 25/31] Try and get Krakenuniq towork --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index e839bb0..b40ddaa 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -72,7 +72,7 @@ process { } withName: KRAKENUNIQ_BUILD { - ext.args = { "${params.krakenuniq_build_params}" } + ext.args = { "${params.krakenuniq_build_params} --jellyfish-bin type -P -a jellyfish)" } } withName: MALT_BUILD { From 58a0f702cb99b0c72869be7ae3058ec9fb2f4a60 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 19 Dec 2024 13:57:44 +0100 Subject: [PATCH 26/31] Apply suggestions from code review --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 3c6d679..7521f54 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -73,7 +73,7 @@ process { } withName: KRAKENUNIQ_BUILD { - ext.args = { "${params.krakenuniq_build_params} --jellyfish-bin type -P -a jellyfish)" } + ext.args = { "${params.krakenuniq_build_params}" } } withName: MALT_BUILD { From 1559578d1bb7733f9189de91ad2335e9fe11af21 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 20 Dec 2024 09:38:10 +0100 Subject: [PATCH 27/31] Remove affiliation Alexander --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index a22030c..fcf4b63 100644 --- a/nextflow.config +++ b/nextflow.config @@ -268,7 +268,7 @@ manifest { ], [ name: 'Alexander Ramos Díaz', - affiliation: 'Solena, Leon, Mexico', + affiliation: '', email: '', github: 'https://github.com/alxndrdiaz', contribution: ['contributor'], From 479f5d580721eca8f4ce6f3534c6a0794247c720 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 20 Dec 2024 09:39:43 +0100 Subject: [PATCH 28/31] Specify path to jellyfish bin --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 7521f54..7ea477a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -73,7 +73,7 @@ process { } withName: KRAKENUNIQ_BUILD { - ext.args = { "${params.krakenuniq_build_params}" } + ext.args = { "${params.krakenuniq_build_params} --jellyfish-bin $(type -P -a jellyfish)" } } withName: MALT_BUILD { From 167a25da67fdf49d1e4f8011f4eb9ab86123e605 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 20 Dec 2024 09:43:31 +0100 Subject: [PATCH 29/31] Escape bash variable --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 7ea477a..32f6b6c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -73,7 +73,7 @@ process { } withName: KRAKENUNIQ_BUILD { - ext.args = { "${params.krakenuniq_build_params} --jellyfish-bin $(type -P -a jellyfish)" } + ext.args = { "${params.krakenuniq_build_params} --jellyfish-bin \$(type -P -a jellyfish)" } } withName: MALT_BUILD { From 4d4a905c551463cbbae961ee33c5d58142bf1ec4 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 20 Dec 2024 09:29:13 +0000 Subject: [PATCH 30/31] Get jellyfish working with the bin --- conf/modules.config | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 32f6b6c..e8c4992 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,9 +18,8 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } - + withName: MULTIQC { + ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, @@ -73,7 +72,7 @@ process { } withName: KRAKENUNIQ_BUILD { - ext.args = { "${params.krakenuniq_build_params} --jellyfish-bin \$(type -P -a jellyfish)" } + ext.args = { "--jellyfish-bin \"\$(which jellyfish)\" ${params.krakenuniq_build_params}" } } withName: MALT_BUILD { From 8c7c80392d82a6f91af643f3fbd824e9530f78b2 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 20 Dec 2024 11:56:35 +0000 Subject: [PATCH 31/31] Move test profile params for KU to new build params --- conf/modules.config | 2 +- conf/test.config | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index e8c4992..e839bb0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -72,7 +72,7 @@ process { } withName: KRAKENUNIQ_BUILD { - ext.args = { "--jellyfish-bin \"\$(which jellyfish)\" ${params.krakenuniq_build_params}" } + ext.args = { "${params.krakenuniq_build_params}" } } withName: MALT_BUILD { diff --git a/conf/test.config b/conf/test.config index d582ba1..5bd05e9 100644 --- a/conf/test.config +++ b/conf/test.config @@ -14,7 +14,7 @@ process { resourceLimits = [ cpus: 4, memory: '15.GB', - time: '1.h' + time: '1.h', ] } @@ -38,6 +38,8 @@ params { build_kraken2 = true build_krakenuniq = true + krakenuniq_build_params = "--work-on-disk --max-db-size 14 --kmer-len 15 --minimizer-len 13 --jellyfish-bin \"\$(which jellyfish)\"" + accession2taxid = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/nucl_gb.accession2taxid' nucl2taxid = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/nucl2tax.map' prot2taxid = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/prot.accession2taxid.gz' @@ -48,7 +50,6 @@ params { process { withName: KRAKENUNIQ_BUILD { - memory = { 12.GB * task.attempt } - ext.args = '--work-on-disk --max-db-size 14 --kmer-len 15 --minimizer-len 13 --jellyfish-bin $(which jellyfish)' + memory = { 12.GB * task.attempt } } }