From b442f47045bd0dcbb2754552ae0adb5adcae625b Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 9 Jan 2025 14:58:51 +0100 Subject: [PATCH 1/4] Decompress AA not compress to allow MALT building (need ot test other profilers) --- workflows/createtaxdb.nf | 81 ++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf index f49d318..6bc79c1 100644 --- a/workflows/createtaxdb.nf +++ b/workflows/createtaxdb.nf @@ -4,29 +4,28 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createtaxdb_pipeline' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createtaxdb_pipeline' // Preprocessing -include { GUNZIP as GUNZIP_DNA } from '../modules/nf-core/gunzip/main' -include { PIGZ_COMPRESS as PIGZ_COMPRESS_DNA } from '../modules/nf-core/pigz/compress/main' -include { PIGZ_COMPRESS as PIGZ_COMPRESS_AA } from '../modules/nf-core/pigz/compress/main' -include { CAT_CAT as CAT_CAT_DNA } from '../modules/nf-core/cat/cat/main' -include { CAT_CAT as CAT_CAT_AA } from '../modules/nf-core/cat/cat/main' +include { GUNZIP as GUNZIP_DNA } from '../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_AA } from '../modules/nf-core/gunzip/main' +include { CAT_CAT as CAT_CAT_DNA } from '../modules/nf-core/cat/cat/main' +include { CAT_CAT as CAT_CAT_AA } from '../modules/nf-core/cat/cat/main' // Database building (with specific auxiliary modules) -include { CENTRIFUGE_BUILD } from '../modules/nf-core/centrifuge/build/main' -include { DIAMOND_MAKEDB } from '../modules/nf-core/diamond/makedb/main' -include { GANON_BUILDCUSTOM } from '../modules/nf-core/ganon/buildcustom/main' -include { KAIJU_MKFMI } from '../modules/nf-core/kaiju/mkfmi/main' -include { KRAKENUNIQ_BUILD } from '../modules/nf-core/krakenuniq/build/main' -include { UNZIP } from '../modules/nf-core/unzip/main' -include { MALT_BUILD } from '../modules/nf-core/malt/build/main' +include { CENTRIFUGE_BUILD } from '../modules/nf-core/centrifuge/build/main' +include { DIAMOND_MAKEDB } from '../modules/nf-core/diamond/makedb/main' +include { GANON_BUILDCUSTOM } from '../modules/nf-core/ganon/buildcustom/main' +include { KAIJU_MKFMI } from '../modules/nf-core/kaiju/mkfmi/main' +include { KRAKENUNIQ_BUILD } from '../modules/nf-core/krakenuniq/build/main' +include { UNZIP } from '../modules/nf-core/unzip/main' +include { MALT_BUILD } from '../modules/nf-core/malt/build/main' -include { FASTA_BUILD_ADD_KRAKEN2_BRACKEN } from '../subworkflows/nf-core/fasta_build_add_kraken2_bracken/main' +include { FASTA_BUILD_ADD_KRAKEN2_BRACKEN } from '../subworkflows/nf-core/fasta_build_add_kraken2_bracken/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -54,10 +53,13 @@ workflow CREATETAXDB { DATA PREPARATION ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - // PREPARE: Prepare input for single file inputs modules + def malt_build_mode = null + if (params.build_malt) { + malt_build_mode = params.malt_build_params.contains('--sequenceType Protein') ? 'protein' : 'nucleotide' + } - if ([params.build_malt, params.build_centrifuge, params.build_kraken2, params.build_bracken, params.build_krakenuniq, params.build_ganon].any()) { + if ([(params.build_malt && malt_build_mode == 'nucleotide'), params.build_centrifuge, params.build_kraken2, params.build_bracken, params.build_krakenuniq, params.build_ganon].any()) { // Pull just DNA sequences ch_dna_refs_for_singleref = ch_samplesheet @@ -66,14 +68,14 @@ workflow CREATETAXDB { fasta_dna } - ch_dna_for_unzipping = ch_dna_refs_for_singleref.branch { meta, fasta -> + ch_dna_for_unzipping = ch_dna_refs_for_singleref.branch { _meta, fasta -> zipped: fasta.extension == 'gz' unzipped: true } GUNZIP_DNA(ch_dna_for_unzipping.zipped) ch_prepped_dna_fastas_ungrouped = GUNZIP_DNA.out.gunzip.mix(ch_dna_for_unzipping.unzipped) - ch_prepped_dna_fastas = ch_prepped_dna_fastas_ungrouped.map { meta, fasta -> [[id: params.dbname], fasta] }.groupTuple() + ch_prepped_dna_fastas = ch_prepped_dna_fastas_ungrouped.map { _meta, fasta -> [[id: params.dbname], fasta] }.groupTuple() ch_versions = ch_versions.mix(GUNZIP_DNA.out.versions.first()) // Place in single file @@ -87,22 +89,23 @@ workflow CREATETAXDB { // docs: https://github.com/bioinformatics-centre/kaiju#custom-database // docs: https://github.com/nf-core/test-datasets/tree/taxprofiler#kaiju // idea: try just appending `_` to end of each sequence header using a local sed module... it might be sufficient - if ([params.build_kaiju, params.build_diamond].any()) { + if ([(params.build_malt && malt_build_mode == 'protein'), params.build_kaiju, params.build_diamond].any()) { ch_aa_refs_for_singleref = ch_samplesheet - .map { meta, fasta_dna, fasta_aa -> [[id: params.dbname], fasta_aa] } - .filter { meta, fasta_aa -> + .map { _meta, _fasta_dna, fasta_aa -> [[id: params.dbname], fasta_aa] } + .filter { _meta, fasta_aa -> fasta_aa } - ch_aa_for_zipping = ch_aa_refs_for_singleref.branch { meta, fasta -> + ch_aa_for_unzipping = ch_aa_refs_for_singleref.branch { _meta, fasta -> zipped: fasta.extension == 'gz' unzipped: true } - PIGZ_COMPRESS_AA(ch_aa_for_zipping.unzipped) - ch_prepped_aa_fastas = PIGZ_COMPRESS_AA.out.archive.mix(ch_aa_for_zipping.zipped).groupTuple() - //ch_versions = ch_versions.mix( PIGZ_COMPRESS_AA.versions.first() ) + GUNZIP_AA(ch_aa_for_unzipping.zipped) + ch_prepped_aa_fastas_ungrouped = GUNZIP_AA.out.gunzip.mix(ch_aa_for_unzipping.unzipped) + ch_prepped_aa_fastas = ch_prepped_aa_fastas_ungrouped.map { _meta, fasta -> [[id: params.dbname], fasta] }.groupTuple() + ch_versions = ch_versions.mix(GUNZIP_AA.out.versions.first()) CAT_CAT_AA(ch_prepped_aa_fastas) ch_singleref_for_aa = CAT_CAT_AA.out.file_out @@ -148,7 +151,7 @@ workflow CREATETAXDB { .map { it.join("\t") } .collectFile( name: "ganon_fasta_input.tsv", - newLine: true + newLine: true, ) .map { [[id: params.dbname], it] @@ -209,17 +212,17 @@ workflow CREATETAXDB { // The map DB file comes zipped (for some reason) from MEGAN6 website if (file(params.malt_mapdb).extension == 'zip') { - ch_malt_mapdb = UNZIP([[], params.malt_mapdb]).unzipped_archive.map { meta, file -> [file] } + ch_malt_mapdb = UNZIP([[], params.malt_mapdb]).unzipped_archive.map { _meta, file -> [file] } } else { ch_malt_mapdb = file(params.malt_mapdb) } - if (params.malt_build_params.contains('--sequenceType Protein')) { - ch_input_for_malt = ch_prepped_aa_fastas.map { meta, file -> file } + if (malt_build_mode == 'protein') { + ch_input_for_malt = ch_prepped_aa_fastas.map { _meta, file -> file } } else { - ch_input_for_malt = ch_prepped_dna_fastas.map { meta, file -> file } + ch_input_for_malt = ch_prepped_dna_fastas.map { _meta, file -> file } } MALT_BUILD(ch_input_for_malt, [], ch_malt_mapdb) @@ -236,11 +239,9 @@ workflow CREATETAXDB { softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info", - - name: 'nf_core_' + 'createtaxdb_software_' + 'mqc_' + 'versions.yml', - + name: 'nf_core_' + 'createtaxdb_software_' + 'mqc_' + 'versions.yml', sort: true, - newLine: true + newLine: true, ) .set { ch_collated_versions } @@ -278,7 +279,7 @@ workflow CREATETAXDB { ch_multiqc_files = ch_multiqc_files.mix( ch_methods_description.collectFile( name: 'methods_description_mqc.yaml', - sort: true + sort: true, ) ) @@ -288,7 +289,7 @@ workflow CREATETAXDB { ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), [], - [] + [], ) emit: From b41c213533b38a0394424e61c8de93732d4b23ab Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 9 Jan 2025 16:43:33 +0100 Subject: [PATCH 2/4] Update tests --- tests/test.nf.test.snap | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap index c6205b7..f7387ac 100644 --- a/tests/test.nf.test.snap +++ b/tests/test.nf.test.snap @@ -12,7 +12,7 @@ ], "database.dmnd:md5,b2ea49ef5490c526e2c56cae19bcb462", "database.hibf:md5,af913cecda744b02751e2f5320c35c7c", - "database.tax:md5,e041b05ce29813656f529560dc8a19ae", + "database.tax:md5,85d15469eb6fd11ed2a60890cfdeae82", "database.fmi:md5,54fd89f5e4eab61af30175e8aa389598", "hash.k2d:md5,941118164b4bcc010593f7a7c7b30029", "opts.k2d", @@ -31,9 +31,9 @@ "taxonomy.tre:md5,f76fb2d5aa9b0d637234d48175841e0e" ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.0", + "nextflow": "24.10.2" }, - "timestamp": "2024-12-19T11:45:30.380109094" + "timestamp": "2025-01-09T16:42:06.335852855" } -} +} \ No newline at end of file From bff51b5ad8c657161cd204df04cc1f9965b66654 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 9 Jan 2025 16:51:07 +0100 Subject: [PATCH 3/4] Add alternatives test for e.g. malt protein build --- conf/test_alternatives.config | 56 ++++++++++++++++++++++++++++ nextflow.config | 3 ++ tests/test_alternatives.nf.test | 38 +++++++++++++++++++ tests/test_alternatives.nf.test.snap | 19 ++++++++++ 4 files changed, 116 insertions(+) create mode 100644 conf/test_alternatives.config create mode 100644 tests/test_alternatives.nf.test create mode 100644 tests/test_alternatives.nf.test.snap diff --git a/conf/test_alternatives.config b/conf/test_alternatives.config new file mode 100644 index 0000000..20117a8 --- /dev/null +++ b/conf/test_alternatives.config @@ -0,0 +1,56 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/createtaxdb -profile test_alternatives, --outdir + + This config is for testing mutually exclusive options to those in the default `test` + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h', + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = params.pipelines_testdata_base_path + 'createtaxdb/samplesheets/test.csv' + + dbname = "database" + + build_bracken = false + build_diamond = false + build_ganon = false + build_kaiju = false + build_malt = true + build_centrifuge = false + build_kraken2 = false + build_krakenuniq = false + + krakenuniq_build_params = "--work-on-disk --max-db-size 14 --kmer-len 15 --minimizer-len 13 --jellyfish-bin \"\$(which jellyfish)\"" + malt_build_params = "--sequenceType Protein" + + accession2taxid = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/nucl_gb.accession2taxid' + nucl2taxid = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/nucl2tax.map' + prot2taxid = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/prot.accession2taxid.gz' + nodesdmp = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/nodes.dmp' + namesdmp = params.pipelines_testdata_base_path + 'createtaxdb/data/taxonomy/names.dmp' + malt_mapdb = 's3://ngi-igenomes/test-data/createtaxdb/taxonomy/megan-nucl-Feb2022.db.zip' +} + +process { + withName: KRAKENUNIQ_BUILD { + memory = { 12.GB * task.attempt } + } +} diff --git a/nextflow.config b/nextflow.config index fcf4b63..4d5b410 100644 --- a/nextflow.config +++ b/nextflow.config @@ -196,6 +196,9 @@ profiles { test_nothing { includeConfig 'conf/test_nothing.config' } + test_alternatives { + includeConfig 'conf/test_alternatives.config' + } } // Load nf-core custom profiles from different Institutions diff --git a/tests/test_alternatives.nf.test b/tests/test_alternatives.nf.test new file mode 100644 index 0000000..75b83c1 --- /dev/null +++ b/tests/test_alternatives.nf.test @@ -0,0 +1,38 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_CREATETAXDB" + script "main.nf" + tag "pipeline" + tag "nfcore_createtaxdb" + tag "test_alternatives" + + test("test_alternatives_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/malt/malt-build.log").readLines().last().contains('Peak memory'), + path("$outputDir/malt/malt_index/index0.idx"), + path("$outputDir/malt/malt_index/ref.db"), + path("$outputDir/malt/malt_index/ref.idx"), + path("$outputDir/malt/malt_index/ref.inf"), + path("$outputDir/malt/malt_index/taxonomy.idx"), + path("$outputDir/malt/malt_index/taxonomy.map"), + path("$outputDir/malt/malt_index/taxonomy.tre") + ).match() + }, + { assert new File("$outputDir/pipeline_info/nf_core_createtaxdb_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + { assert path("$outputDir/malt/malt_index/table0.db").exists() }, + { assert path("$outputDir/malt/malt_index/table0.idx").exists() }, + ) + } + } +} diff --git a/tests/test_alternatives.nf.test.snap b/tests/test_alternatives.nf.test.snap new file mode 100644 index 0000000..4e810bd --- /dev/null +++ b/tests/test_alternatives.nf.test.snap @@ -0,0 +1,19 @@ +{ + "test_alternatives_profile": { + "content": [ + true, + "index0.idx:md5,ef349ebc456d6446b75570b8a0e57c0d", + "ref.db:md5,9372b49e5990fb4b29ed986a1fc737c4", + "ref.idx:md5,b99c3dc612948a7e4e490c3c74498c80", + "ref.inf:md5,e7ae0e08f730a851abba085e6cc7b4ec", + "taxonomy.idx:md5,f6c520613e1154909658f976498dd4a8", + "taxonomy.map:md5,5bb3f2192e925bca2e61e4b54f1671e0", + "taxonomy.tre:md5,f76fb2d5aa9b0d637234d48175841e0e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2025-01-09T16:49:09.883808608" + } +} \ No newline at end of file From dc76850f81624d69c91ef8db45f49132fe9c2869 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 9 Jan 2025 16:53:39 +0100 Subject: [PATCH 4/4] Add alternative tests to CI --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a7abe4d..0fc0b5c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,6 +54,7 @@ jobs: - "singularity" test_name: - "test" + - "test_alternatives" isMaster: - ${{ github.base_ref == 'master' }} # Exclude conda and singularity on dev