Update tests

nf-core · Feb 15, 2024 · 83960dd · 83960dd
1 parent bc4c7ba
commit 83960dd
Show file tree

Hide file tree

Showing 8 changed files with 56 additions and 21 deletions.
diff --git a/conf/test.config b/conf/test.config
@@ -28,8 +28,9 @@ params {
     build_malt       = true
     build_centrifuge = true
 
+    nucl2taxid       = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/nucl2tax.map'
     prot2taxid       = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/prot.accession2taxid.gz'
-    nodesdmp         = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/prot_nodes.dmp'
-    namesdmp         = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/prot_names.dmp'
+    nodesdmp         = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/nodes.dmp'
+    namesdmp         = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/names.dmp'
     malt_mapdb       = 's3://ngi-igenomes/test-data/createtaxdb/taxonomy/megan-nucl-Feb2022.db.zip'
 }
diff --git a/conf/test_nothing.config b/conf/test_nothing.config
@@ -10,6 +10,8 @@
 ----------------------------------------------------------------------------------------
 */
 
+includeConfig 'test.config'
+
 params {
     config_profile_name        = 'Test profile'
     config_profile_description = 'Minimal test dataset to check pipeline function'
@@ -23,13 +25,9 @@ params {
 
     input         = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/samplesheets/test.csv'
 
-    build_diamond    = true
-    build_kaiju      = true
-    build_malt       = true
-    build_centrifuge = true
+    build_diamond    = false
+    build_kaiju      = false
+    build_malt       = false
+    build_centrifuge = false
 
-    prot2taxid       = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/prot.accession2taxid.gz'
-    nodesdmp         = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/prot_nodes.dmp'
-    namesdmp         = 'https://raw.githubusercontent.com/nf-core/test-datasets/createtaxdb/data/taxonomy/prot_names.dmp'
-    malt_mapdb       = 's3://ngi-igenomes/test-data/createtaxdb/taxonomy/megan-nucl-Feb2022.db.zip'
 }
diff --git a/docs/output.md b/docs/output.md
@@ -14,6 +14,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 
 - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
+- [Centrifuge](#centrifuge) - Database files for Centrifuge
 - [DIAMOND](#diamond) - Database files for DIAMOND
 - [Kaiju](#kaiju) - Database files for Kaiju
 - [MALT](#malt) - Database files for MALT
@@ -49,6 +50,20 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
 
 [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage.
 
+### Centrifuge
+
+[Centrifuge](https://github.com/bbuchfink/diamond) is a very rapid and memory-efficient system for the classification of DNA sequences from microbial samples.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `diamond/`
+  - `<database>.*.cf`: Centrifuge database files
+
+</details>
+
+A directory and `cf` files can be given to the Centrifuge command with `centrifuge -x /<path>/<to>/<cf_files_basename>` etc.
+
 ### Diamond
 
 [DIAMOND](https://github.com/bbuchfink/diamond) is a accelerated BLAST compatible local sequence aligner particularly used for protein alignment.

diff --git a/nextflow.config b/nextflow.config
@@ -63,7 +63,7 @@ params {
     save_concatenated_fastas = false
 
     prot2taxid = null
-    nuc2taxid  = null
+    nucl2taxid  = null
     nodesdmp   = null
     namesdmp   = null
     malt_mapdb = null

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -53,7 +53,7 @@
                     "description": "Path to (NCBI-style) protein accession2taxid file.",
                     "help_text": "A two column file tab-separated file with `accession.version` and `taxid`. The first refers to an accession ID in each FASTA entry header. The second refers to the taxonomy ID of the organism the sequence belongs to, as listed in `nodes.dmp`."
                 },
-                "nuc2taxid": {
+                "nucl2taxid": {
                     "type": "string",
                     "fa_icon": "far fa-address-card",
                     "description": "Path to (NCBI-style) nucleotide accession2taxid file.",

diff --git a/tests/test.nf.test b/tests/test.nf.test
@@ -18,6 +18,7 @@ nextflow_pipeline {
             assertAll(
                 { assert workflow.success },
                 { assert snapshot(
+                        path("$outputDir/centrifuge/"),
                         path("$outputDir/diamond/database.dmnd"),
                         path("$outputDir/kaiju/database.fmi"),
                         path("$outputDir/malt/malt-build.log").readLines().last().contains('Peak memory'),

diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap
@@ -1,7 +1,13 @@
 {
     "test_profile": {
         "content": [
-            "database.dmnd:md5,9d57aa88cd1766adfda8360876fc0e4f",
+            [
+                "database.1.cf:md5,4218d7b46893c4ecfbc6919b2f072a6b",
+                "database.2.cf:md5,5b9565dc528314e931de5c03a70f3409",
+                "database.3.cf:md5,51f6eb8c75aa64cc60cd733967278866",
+                "database.4.cf:md5,954c85fd4df6ea5d37906d5027f18688"
+            ],
+            "database.dmnd:md5,b2ea49ef5490c526e2c56cae19bcb462",
             "database.fmi:md5,54fd89f5e4eab61af30175e8aa389598",
             true,
             "index0.idx:md5,876139dc930e68992cd2625e08bba48a",
@@ -16,6 +22,6 @@
             "nf-test": "0.8.4",
             "nextflow": "23.10.1"
         },
-        "timestamp": "2024-02-08T10:59:42.922218767"
+        "timestamp": "2024-02-15T11:47:20.945591503"
     }
 }
diff --git a/workflows/createtaxdb.nf b/workflows/createtaxdb.nf
@@ -18,7 +18,7 @@ WorkflowCreatetaxdb.initialise(params, log)
 // Validate input files parameters (from Sarek)
 def checkPathParamList = [
     params.prot2taxid,
-    params.nuc2taxid,
+    params.nucl2taxid,
     params.nodesdmp,
     params.namesdmp,
     params.malt_mapdb,
@@ -86,9 +86,9 @@ workflow CREATETAXDB {
     //
     ch_input = Channel.fromSamplesheet("input")
 
-    // Prepare input for single file inputs modules
+    // PREPARE: Prepare input for single file inputs modules
 
-    if ( [params.build_malt].any() ) {  // Pull just DNA sequences
+    if ( [params.build_malt, params.build_centrifuge].any() ) {  // Pull just DNA sequences
 
         ch_dna_refs_for_singleref = ch_input
                                         .map{meta, fasta_dna, fasta_aa  -> [[id: params.dbname], fasta_dna]}
@@ -147,6 +147,9 @@ workflow CREATETAXDB {
     if ( params.build_diamond  ) {
         DIAMOND_MAKEDB ( CAT_CAT_AA.out.file_out, params.prot2taxid, params.nodesdmp, params.namesdmp )
         ch_versions = ch_versions.mix(DIAMOND_MAKEDB.out.versions.first())
+        ch_diamond_output = DIAMOND_MAKEDB.out.db
+    } else {
+        ch_diamond_output = Channel.empty()
     }
 
     //
@@ -156,13 +159,19 @@ workflow CREATETAXDB {
     if ( params.build_kaiju ) {
         KAIJU_MKFMI ( CAT_CAT_AA.out.file_out )
         ch_versions = ch_versions.mix(KAIJU_MKFMI.out.versions.first())
+        ch_kaiju_output = KAIJU_MKFMI.out.fmi
+    } else {
+        ch_kaiju_output = Channel.empty()
     }
 
     // Module: Run CENTRIFUGE/BUILD
 
     if ( params.build_centrifuge ) {
-        CENTRIFUGE_BUILD ( CAT_CAT_DNA.out.file_out, params.nuc2taxid, params.nodesdmp, params.namesdmp, [] )
+        CENTRIFUGE_BUILD ( CAT_CAT_DNA.out.file_out, params.nucl2taxid, params.nodesdmp, params.namesdmp, [] )
         ch_versions = ch_versions.mix(CENTRIFUGE_BUILD.out.versions.first())
+        ch_centrifuge_output = CENTRIFUGE_BUILD.out.cf
+    } else {
+        ch_centrifuge_output = Channel.empty()
     }
 
     //
@@ -185,6 +194,10 @@ workflow CREATETAXDB {
         }
 
         MALT_BUILD (ch_input_for_malt, [], ch_malt_mapdb)
+        ch_versions = ch_versions.mix(MALT_BUILD.out.versions.first())
+        ch_malt_output = MALT_BUILD.out.index
+    } else {
+        ch_malt_output = Channel.empty()
     }
 
     CUSTOM_DUMPSOFTWAREVERSIONS (
@@ -216,9 +229,10 @@ workflow CREATETAXDB {
     emit:
     versions            = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions
     multiqc_report_html = MULTIQC.out.report
-    diamond_database    = DIAMOND_MAKEDB.out.db
-    kaiju_database      = KAIJU_MKFMI.out.fmi
-    malt_database       = MALT_BUILD.out.index
+    centrifuge_database = ch_centrifuge_output
+    diamond_database    = ch_diamond_output
+    kaiju_database      = ch_kaiju_output
+    malt_database       = ch_malt_output
 }
 
 /*