diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index a78bd61..7919df0 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,19 +1,18 @@ ## PR checklist -- [ ] PR to `nextflow_reboot` branch -- [ ] `conda` and `container` directives. -- [ ] Docker container + singularity container (optional) +For Nextflow implementation, + +- [ ] `conda` and `container` directives are included for each process +- [ ] Docker container + singularity container (optional) are included for each process - [ ] Flow `meta.id` with each data channel - [ ] Use nf-core resource labels such as `process_high` - [ ] Used nf-core module diff --git a/.github/include.yaml b/.github/include.yaml new file mode 100644 index 0000000..5d850e1 --- /dev/null +++ b/.github/include.yaml @@ -0,0 +1,10 @@ +".": + - ./.github/workflows/** + - ./nf-test.config +tests: + - ./assets/* + - ./bin/* + - ./conf/* + - ./main.nf + - ./nextflow_schema.json + - ./nextflow.config diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6c27abd..20d4b62 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,14 +1,12 @@ name: EDTA Nextflow CI on: - push: - branches: - - nextflow_reboot pull_request: - branches: - - nextflow_reboot env: NXF_ANSI_LOG: false + NFT_WORKDIR: "~" + NFT_DIFF: "pdiff" + NFT_DIFF_ARGS: "--line-numbers --expand-tabs=2" NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity @@ -17,62 +15,93 @@ concurrency: cancel-in-progress: true jobs: + nf-test-changes: + name: Check for changes + runs-on: ubuntu-latest + outputs: + nf_test_files: ${{ steps.list.outputs.components }} + steps: + - uses: actions/checkout@v4.2.1 + with: + fetch-depth: 0 + + - name: List nf-test files + id: list + uses: adamrtalbot/detect-nf-test-changes@v0.0.4 + with: + head: ${{ github.sha }} + base: origin/${{ github.base_ref }} + include: .github/include.yaml + + - name: print list of nf-test files + run: | + echo ${{ steps.list.outputs.components }} + test: - name: Run pipeline with test data - # Only run on push if this is the jguhlin nextflow_reboot branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'jguhlin/EDTA') }}" + name: ${{ matrix.nf_test_files }} ${{ matrix.profile }} NF-${{ matrix.NXF_VER }} + needs: [nf-test-changes] + if: needs.nf-test-changes.outputs.nf_test_files != '[]' runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - nextflow: - - '24.04.4' - profile: - - docker - - singularity - - conda + NXF_VER: + - "24.04.2" + + nf_test_files: ["${{ fromJson(needs.nf-test-changes.outputs.nf_test_files) }}"] + profile: [conda, docker, singularity] steps: - name: Check out pipeline code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + uses: actions/checkout@v4.2.1 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 with: - version: ${{ matrix.nextflow }} + version: "${{ matrix.NXF_VER }}" - # - name: Disk space cleanup - # uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - # Will be needed with large data sets. Can take long to run + - uses: actions/setup-python@v5.2.0 + with: + python-version: "3.11" + architecture: "x64" + - name: Install pdiff to see diff between nf-test snapshots + run: | + python -m pip install --upgrade pip + pip install pdiff + + - uses: nf-core/setup-nf-test@v1.1.2 + with: + version: 0.9.0 + - name: Setup apptainer if: matrix.profile == 'singularity' uses: eWaterCycle/setup-apptainer@main - + - name: Set up Singularity if: matrix.profile == 'singularity' run: | mkdir -p $NXF_SINGULARITY_CACHEDIR mkdir -p $NXF_SINGULARITY_LIBRARYDIR - - - name: Set up miniconda - if: matrix.profile == 'conda' - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 - with: - miniconda-version: "latest" - auto-update-conda: true - channels: conda-forge,bioconda - - name: Conda setup - if: matrix.profile == 'conda' + - name: Run nf-test run: | - conda clean -a - conda install -n base conda-libmamba-solver - conda config --set solver libmamba - echo $(realpath $CONDA)/condabin >> $GITHUB_PATH - echo $(realpath python) >> $GITHUB_PATH + nf-test test --verbose ${{ matrix.nf_test_files }} --profile "+${{ matrix.profile }}" + + confirm-pass: + runs-on: ubuntu-latest + needs: [test] + if: always() + steps: + - name: All tests ok + if: ${{ !contains(needs.*.result, 'failure') }} + run: exit 0 + - name: One or more tests failed + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 - - name: Run pipeline with test data + - name: debug-print + if: always() run: | - nextflow run \ - ${GITHUB_WORKSPACE} \ - -profile ${{ matrix.profile }},test + echo "toJSON(needs) = ${{ toJSON(needs) }}" + echo "toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" diff --git a/.gitignore b/.gitignore index 9b1e821..f636970 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ bin/TIR-Learner*/Module3/Maize_model.sav work/ .nextflow.log* .nextflow/* +results/ +.nf-test* diff --git a/cleanNXF.sh b/cleanNXF.sh new file mode 100755 index 0000000..81b8d85 --- /dev/null +++ b/cleanNXF.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +rm -rf .nextflow* +echo "Cleaned .nextflow..." +rm -rf .nextflow.pid +echo "Cleaned .nextflow.pid..." +for i in $(ls work | grep -v "conda"); +do + rm -rf "work/$i" +done +echo "Cleaned work..." + +rm -f .nf-test.log +rm -rf .nf-test +echo "Cleaned nf-test..." diff --git a/conf/modules.config b/conf/modules.config new file mode 100644 index 0000000..39b1af8 --- /dev/null +++ b/conf/modules.config @@ -0,0 +1,42 @@ +process { + withName: 'EDTA:LTRHARVEST' { + ext.args = '-size 1000000 -time 300' + ext.prefix = { "${meta.id}_ltrharvest" } + } + + withName: 'EDTA:LTRFINDER' { + ext.args = '-harvest_out -size 1000000 -time 300' + } + + withName: 'EDTA:CAT_CAT' { + ext.prefix = { "${meta.id}_ltrharvest_ltrfinder.tabout" } + } + + withName: 'EDTA:ANNOSINE' { + ext.args = '--num_alignments 50000 -rpm 0 --copy_number 3 --shift 100 -auto 1' + } + + withName: 'EDTA:REPEATMODELER_REPEATMODELER' { + ext.args = '-engine ncbi' + } + + withName: 'EDTA:FASTA_HELITRONSCANNER_SCAN_DRAW:HELITRONSCANNER_DRAW' { + ext.args = '-pure_helitron' + } + + withName: 'EDTA:FASTA_HELITRONSCANNER_SCAN_DRAW:HELITRONSCANNER_SCAN_HEAD_RC' { + ext.prefix = { "${meta.id}.rc" } + ext.args = '--rc' + } + + withName: 'EDTA:FASTA_HELITRONSCANNER_SCAN_DRAW:HELITRONSCANNER_SCAN_TAIL_RC' { + ext.prefix = { "${meta.id}.rc" } + ext.args = '--rc' + } + + withName: 'EDTA:FASTA_HELITRONSCANNER_SCAN_DRAW:HELITRONSCANNER_DRAW_RC' { + ext.prefix = { "${meta.id}.rc" } + ext.args = '-pure_helitron' + } + +} \ No newline at end of file diff --git a/conf/test.config b/conf/test.config new file mode 100644 index 0000000..e921133 --- /dev/null +++ b/conf/test.config @@ -0,0 +1,11 @@ +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + genomes = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta' +} \ No newline at end of file diff --git a/main.nf b/main.nf new file mode 100755 index 0000000..fa74a8b --- /dev/null +++ b/main.nf @@ -0,0 +1,27 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +params.genomes = 'genomes/*' +params.species = 'others' +params.cds = '' +params.curatedlib = '' +params.rmlib = '' +params.sensitive = false +params.anno = false +params.rmout = '' +params.maxdiv = 40 +params.evaluate = true +params.exclude = '' +params.maxint = 5000 +params.outdir = 'results' + +include { EDTA } from './workflows/edta.nf' + +// Test run: +// ./main.nf -profile docker,test +// ./main.nf -profile conda,test + +workflow { + EDTA() +} diff --git a/modules.json b/modules.json new file mode 100644 index 0000000..5d34d4e --- /dev/null +++ b/modules.json @@ -0,0 +1,87 @@ +{ + "name": "", + "homePage": "", + "repos": { + "https://github.com/GallVp/nxf-components.git": { + "modules": { + "gallvp": { + "annosine": { + "branch": "main", + "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4", + "installed_by": ["modules"] + }, + "gunzip": { + "branch": "main", + "git_sha": "ae9714c21ede9199a3118e3c20b65484aa73e232", + "installed_by": ["modules"] + }, + "helitronscanner/draw": { + "branch": "main", + "git_sha": "929d59d82f2e90fe79eb6f93d1ae739f22a894e1", + "installed_by": ["fasta_helitronscanner_scan_draw"] + }, + "helitronscanner/scan": { + "branch": "main", + "git_sha": "929d59d82f2e90fe79eb6f93d1ae739f22a894e1", + "installed_by": ["fasta_helitronscanner_scan_draw"] + }, + "tirlearner": { + "branch": "main", + "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4", + "installed_by": ["modules"] + } + } + }, + "subworkflows": { + "gallvp": { + "fasta_helitronscanner_scan_draw": { + "branch": "main", + "git_sha": "970e3af38229845dd38c13d29b0905651a8e61f0", + "installed_by": ["subworkflows"] + } + } + } + }, + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "cat/cat": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "gunzip": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "ltrfinder": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "ltrharvest": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "ltrretriever/ltrretriever": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "repeatmodeler/builddatabase": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "repeatmodeler/repeatmodeler": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + } + } + } + } + } +} diff --git a/modules/gallvp/annosine/environment.yml b/modules/gallvp/annosine/environment.yml new file mode 100644 index 0000000..9aef5ec --- /dev/null +++ b/modules/gallvp/annosine/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - "bioconda::annosine2=2.0.7" diff --git a/modules/gallvp/annosine/main.nf b/modules/gallvp/annosine/main.nf new file mode 100644 index 0000000..6408b28 --- /dev/null +++ b/modules/gallvp/annosine/main.nf @@ -0,0 +1,58 @@ +process ANNOSINE { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/annosine2:2.0.7--pyh7cba7a3_0': + 'biocontainers/annosine2:2.0.7--pyh7cba7a3_0' }" + + input: + tuple val(meta), path(fasta) + val mode + + output: + tuple val(meta), path("*.fa") , emit: fa , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.7' // WARN: Manually update when changing Bioconda assets + if ( "$fasta" == "${prefix}.fa" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + AnnoSINE_v2 \\ + $args \\ + --threads $task.cpus \\ + $mode \\ + $fasta \\ + $prefix + + mv \\ + $prefix/Seed_SINE.fa \\ + ${prefix}.fa \\ + || echo 'AnnoSINE_v2 did not find SINE sequences. See log for details!' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + annosine: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.7' // WARN: Manually update when changing Bioconda assets + if ( "$fasta" == "${prefix}.fa" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + annosine: $VERSION + END_VERSIONS + """ +} diff --git a/modules/gallvp/annosine/meta.yml b/modules/gallvp/annosine/meta.yml new file mode 100644 index 0000000..ad205b5 --- /dev/null +++ b/modules/gallvp/annosine/meta.yml @@ -0,0 +1,51 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "annosine" +description: Accelerating de novo SINE annotation in plant and animal genomes +keywords: + - genomics + - SINE + - annotation + - plant +tools: + - "annosine": + description: "AnnoSINE_v2 - SINE Annotation Tool for Plant and Animal Genomes" + homepage: "https://github.com/liaoherui/AnnoSINE_v2" + documentation: "https://github.com/liaoherui/AnnoSINE_v2" + tool_dev_url: "https://github.com/liaoherui/AnnoSINE_v2" + doi: "10.1101/2024.03.01.582874" + licence: ["MIT"] + identifier: biotools:annosine + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: Input genome assembly + pattern: "*.{fasta,fa,fsa}" + - - mode: + type: integer + description: Run mode +output: + - fa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.fa": + type: file + description: Non-redundant SINE library with serial number + pattern: "*.fa" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/gallvp/annosine/tests/main.nf.test b/modules/gallvp/annosine/tests/main.nf.test new file mode 100644 index 0000000..3bafb5d --- /dev/null +++ b/modules/gallvp/annosine/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process ANNOSINE" + script "../main.nf" + process "ANNOSINE" + + tag "modules" + tag "modules_gallvp" + tag "annosine" + + test("actinidia_chinensis - fasta") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] + input[1] = 3 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("actinidia_chinensis - fasta - stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] + input[1] = 3 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/gallvp/annosine/tests/main.nf.test.snap b/modules/gallvp/annosine/tests/main.nf.test.snap new file mode 100644 index 0000000..4c87403 --- /dev/null +++ b/modules/gallvp/annosine/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "actinidia_chinensis - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,fc3bd39555133a1b50623bccc7d4cf9b" + ] + ], + "1": [ + "versions.yml:md5,e4a37dd3eccd5ff39c2262542db40e98" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,fc3bd39555133a1b50623bccc7d4cf9b" + ] + ], + "versions": [ + "versions.yml:md5,e4a37dd3eccd5ff39c2262542db40e98" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-02T15:32:32.714269" + }, + "actinidia_chinensis - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,e4a37dd3eccd5ff39c2262542db40e98" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e4a37dd3eccd5ff39c2262542db40e98" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-02T15:32:37.586287" + } +} \ No newline at end of file diff --git a/modules/gallvp/annosine/tests/nextflow.config b/modules/gallvp/annosine/tests/nextflow.config new file mode 100644 index 0000000..efdc449 --- /dev/null +++ b/modules/gallvp/annosine/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: ANNOSINE { + ext.args = '-a 2 -rpm 0 --copy_number 1 --shift 200' + } +} diff --git a/modules/gallvp/gunzip/environment.yml b/modules/gallvp/gunzip/environment.yml new file mode 100644 index 0000000..c779485 --- /dev/null +++ b/modules/gallvp/gunzip/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/gallvp/gunzip/main.nf b/modules/gallvp/gunzip/main.nf new file mode 100644 index 0000000..5e67e3b --- /dev/null +++ b/modules/gallvp/gunzip/main.nf @@ -0,0 +1,55 @@ +process GUNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + $args \\ + $archive \\ + > $gunzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/gallvp/gunzip/meta.yml b/modules/gallvp/gunzip/meta.yml new file mode 100644 index 0000000..9066c03 --- /dev/null +++ b/modules/gallvp/gunzip/meta.yml @@ -0,0 +1,47 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + - meta: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - $gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" + - "@gallvp" diff --git a/modules/gallvp/gunzip/tests/main.nf.test b/modules/gallvp/gunzip/tests/main.nf.test new file mode 100644 index 0000000..f661057 --- /dev/null +++ b/modules/gallvp/gunzip/tests/main.nf.test @@ -0,0 +1,121 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_gallvp" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/gallvp/gunzip/tests/main.nf.test.snap b/modules/gallvp/gunzip/tests/main.nf.test.snap new file mode 100644 index 0000000..069967e --- /dev/null +++ b/modules/gallvp/gunzip/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" + } +} \ No newline at end of file diff --git a/modules/gallvp/gunzip/tests/nextflow.config b/modules/gallvp/gunzip/tests/nextflow.config new file mode 100644 index 0000000..dec7764 --- /dev/null +++ b/modules/gallvp/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/gallvp/gunzip/tests/tags.yml b/modules/gallvp/gunzip/tests/tags.yml new file mode 100644 index 0000000..fd3f691 --- /dev/null +++ b/modules/gallvp/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/gallvp/helitronscanner/draw/environment.yml b/modules/gallvp/helitronscanner/draw/environment.yml new file mode 100644 index 0000000..479f61c --- /dev/null +++ b/modules/gallvp/helitronscanner/draw/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::helitronscanner=1.0" diff --git a/modules/gallvp/helitronscanner/draw/main.nf b/modules/gallvp/helitronscanner/draw/main.nf new file mode 100644 index 0000000..16ee59a --- /dev/null +++ b/modules/gallvp/helitronscanner/draw/main.nf @@ -0,0 +1,73 @@ +process HELITRONSCANNER_DRAW { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/helitronscanner:1.0--hdfd78af_0': + 'biocontainers/helitronscanner:1.0--hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(head) + tuple val(meta3), path(tail) + + output: + tuple val(meta), path("*.draw") , emit: draw + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ( !task.memory ) { error '[HELITRONSCANNER_DRAW] Available memory not known. Specify process memory requirements to fix this.' } + def avail_mem = (task.memory.giga*0.8).intValue() + """ + # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) + # Check for container variable initialisation script and source it. + if [ -f "/usr/local/env-activate.sh" ]; then + set +u # Otherwise, errors out because of various unbound variables + . "/usr/local/env-activate.sh" + set -u + fi + + HelitronScanner \\ + pairends \\ + -Xmx${avail_mem}g \\ + -head_score $head \\ + -tail_score $tail \\ + -output ${prefix}.pairends \\ + ${args2} + + HelitronScanner \\ + draw \\ + -Xmx${avail_mem}g \\ + -pscore ${prefix}.pairends \\ + -g $fasta \\ + -output ${prefix}.draw \\ + ${args} + + mv ${prefix}.draw.hel.fa \\ + ${prefix}.draw + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + helitronscanner: \$(HelitronScanner |& sed -n 's/HelitronScanner V\\(.*\\)/V\\1/p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.draw + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + helitronscanner: \$(HelitronScanner |& sed -n 's/HelitronScanner V\\(.*\\)/V\\1/p') + END_VERSIONS + """ +} diff --git a/modules/gallvp/helitronscanner/draw/meta.yml b/modules/gallvp/helitronscanner/draw/meta.yml new file mode 100644 index 0000000..96bd260 --- /dev/null +++ b/modules/gallvp/helitronscanner/draw/meta.yml @@ -0,0 +1,69 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "helitronscanner_draw" +description: HelitronScanner draw tool for Helitron transposons in genomes +keywords: + - genomics + - helitron + - scanner +tools: + - "helitronscanner": + description: "HelitronScanner uncovers a large overlooked cache of Helitron transposons + in many genomes" + homepage: "https://sourceforge.net/projects/helitronscanner" + documentation: "https://sourceforge.net/projects/helitronscanner" + tool_dev_url: "https://sourceforge.net/projects/helitronscanner" + doi: "10.1073/pnas.1410068111" + licence: ["GPL v3-or-later"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: Genome data to scan for Helitrons + pattern: "*.{fa,fsa,fasta}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - head: + type: file + description: Output of the HelitronScanner head command + pattern: "*.head" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - tail: + type: file + description: Output of the HelitronScanner tail command + pattern: "*.tail" +output: + - draw: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + pattern: "*.draw" + - "*.draw": + type: map + description: | + The draw output from HelitronScanner + pattern: "*.draw" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" + - "@jguhlin" +maintainers: + - "@GallVp" diff --git a/modules/gallvp/helitronscanner/draw/tests/main.nf.test b/modules/gallvp/helitronscanner/draw/tests/main.nf.test new file mode 100644 index 0000000..331f563 --- /dev/null +++ b/modules/gallvp/helitronscanner/draw/tests/main.nf.test @@ -0,0 +1,159 @@ +nextflow_process { + + name "Test Process HELITRONSCANNER_DRAW" + script "../main.nf" + config "./nextflow.config" + process "HELITRONSCANNER_DRAW" + + tag "modules" + tag "modules_gallvp" + tag "helitronscanner" + tag "helitronscanner/draw" + tag "helitronscanner/scan" + tag "gunzip" + + setup { + run('GUNZIP') { + script "../../../gunzip/main.nf" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + run('HELITRONSCANNER_SCAN', alias: 'HELITRONSCANNER_SCANHEAD') { + script "../../scan/main.nf" + + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'head' + input[2] = [] + input[3] = 0 + """ + } + } + + run('HELITRONSCANNER_SCAN', alias: 'HELITRONSCANNER_SCANTAIL') { + script "../../scan/main.nf" + + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'tail' + input[2] = [] + input[3] = 0 + """ + } + } + } + + test("actinidia_chinensis-genome_1_fasta_gz-success") { + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = HELITRONSCANNER_SCANHEAD.out.scan + input[2] = HELITRONSCANNER_SCANTAIL.out.scan + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("sarscov2 - genome - success") { + + setup { + run('HELITRONSCANNER_SCAN', alias: 'HELITRONSCANNER_SCANHEAD_SARSCOV2') { + script "../../scan/main.nf" + + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'head' + input[2] = [] + input[3] = 0 + """ + } + } + + run('HELITRONSCANNER_SCAN', alias: 'HELITRONSCANNER_SCANTAIL_SARSCOV2') { + script "../../scan/main.nf" + + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'tail' + input[2] = [] + input[3] = 0 + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = HELITRONSCANNER_SCANHEAD_SARSCOV2.out.scan + input[2] = HELITRONSCANNER_SCANTAIL_SARSCOV2.out.scan + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.draw[0][1] != null }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("stub") { + + options "-stub" + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = HELITRONSCANNER_SCANHEAD.out.scan + input[2] = HELITRONSCANNER_SCANTAIL.out.scan + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/gallvp/helitronscanner/draw/tests/main.nf.test.snap b/modules/gallvp/helitronscanner/draw/tests/main.nf.test.snap new file mode 100644 index 0000000..208d14d --- /dev/null +++ b/modules/gallvp/helitronscanner/draw/tests/main.nf.test.snap @@ -0,0 +1,80 @@ +{ + "actinidia_chinensis-genome_1_fasta_gz-success": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.draw:md5,434aaaa70c294464e6bae93b2895b13c" + ] + ], + "1": [ + "versions.yml:md5,4b206968a702782fa04d2ba560c89728" + ], + "draw": [ + [ + { + "id": "test" + }, + "test.draw:md5,434aaaa70c294464e6bae93b2895b13c" + ] + ], + "versions": [ + "versions.yml:md5,4b206968a702782fa04d2ba560c89728" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-17T11:51:49.477212" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.draw:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,4b206968a702782fa04d2ba560c89728" + ], + "draw": [ + [ + { + "id": "test" + }, + "test.draw:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,4b206968a702782fa04d2ba560c89728" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-17T11:52:47.744899" + }, + "sarscov2 - genome - success": { + "content": [ + [ + "versions.yml:md5,4b206968a702782fa04d2ba560c89728" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-17T11:58:21.800568" + } +} \ No newline at end of file diff --git a/modules/gallvp/helitronscanner/draw/tests/nextflow.config b/modules/gallvp/helitronscanner/draw/tests/nextflow.config new file mode 100644 index 0000000..57f4900 --- /dev/null +++ b/modules/gallvp/helitronscanner/draw/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'HELITRONSCANNER_DRAW' { + ext.args = '-pure_helitron' + } +} diff --git a/modules/gallvp/helitronscanner/scan/environment.yml b/modules/gallvp/helitronscanner/scan/environment.yml new file mode 100644 index 0000000..ea89d39 --- /dev/null +++ b/modules/gallvp/helitronscanner/scan/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::helitronscanner=1.0" diff --git a/modules/gallvp/helitronscanner/scan/main.nf b/modules/gallvp/helitronscanner/scan/main.nf new file mode 100644 index 0000000..0f748ab --- /dev/null +++ b/modules/gallvp/helitronscanner/scan/main.nf @@ -0,0 +1,72 @@ +process HELITRONSCANNER_SCAN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/helitronscanner:1.0--hdfd78af_0': + 'biocontainers/helitronscanner:1.0--hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + val command + path lcv_filepath + val buffer_size + + output: + tuple val(meta), path("*.$command") , emit: scan + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + if ( command !in [ 'head', 'tail' ] ) error "[HELITRONSCANNER_SCAN] command argument should be 'head' or 'tail'" + if ( !task.memory ) { error '[HELITRONSCANNER_SCAN] Available memory not known. Specify process memory requirements to fix this.' } + + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def is_head = { command == 'head' }() + def subcommand = is_head ? 'scanHead' : 'scanTail' + def lcvs_file = is_head ? 'head.lcvs' : 'tail.lcvs' + def lcv_arg = lcv_filepath ? "-lcv_filepath $lcv_filepath" : "-lcv_filepath \$HELITRONSCANNER_TRAININGSET_PATH/$lcvs_file" + def avail_mem = (task.memory.giga*0.8).intValue() + """ + # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) + # Check for container variable initialisation script and source it. + if [ -f "/usr/local/env-activate.sh" ]; then + set +u # Otherwise, errors out because of various unbound variables + . "/usr/local/env-activate.sh" + set -u + fi + + HelitronScanner \\ + $subcommand \\ + -Xmx${avail_mem}g \\ + $lcv_arg \\ + -genome $fasta \\ + -buffer_size $buffer_size \\ + -threads_LCV $task.cpus \\ + $args \\ + -output ${prefix}.${command} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + helitronscanner: \$(HelitronScanner |& sed -n 's/HelitronScanner V\\(.*\\)/V\\1/p') + END_VERSIONS + """ + + stub: + if ( command !in [ 'head', 'tail' ] ) error "[HELITRONSCANNER_SCAN] command argument should be 'head' or 'tail'" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${command} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + helitronscanner: \$(HelitronScanner |& sed -n 's/HelitronScanner V\\(.*\\)/V\\1/p') + END_VERSIONS + """ +} diff --git a/modules/gallvp/helitronscanner/scan/meta.yml b/modules/gallvp/helitronscanner/scan/meta.yml new file mode 100644 index 0000000..ed8f956 --- /dev/null +++ b/modules/gallvp/helitronscanner/scan/meta.yml @@ -0,0 +1,60 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "helitronscanner_scan" +description: HelitronScanner scanHead and scanTail tools for Helitron transposons + in genomes +keywords: + - genomics + - helitron + - scanner +tools: + - "helitronscanner": + description: "HelitronScanner uncovers a large overlooked cache of Helitron transposons + in many genomes" + homepage: "https://sourceforge.net/projects/helitronscanner" + documentation: "https://sourceforge.net/projects/helitronscanner" + tool_dev_url: "https://sourceforge.net/projects/helitronscanner" + doi: "10.1073/pnas.1410068111" + licence: ["GPL v3-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: Genome data to scan for Helitrons + pattern: "*.{fa,fsa,fasta}" + - - command: + type: string + description: Command to execute. One of [ 'head', 'tail' ] + - - lcv_filepath: + type: file + description: LCV file path. If not provided by setting it to [], the LCV file + packaged with the module will be used + pattern: "*.lcvs" + - - buffer_size: + type: integer + description: Genome slice size (use negative or zero for non-buffer, i.e. treat + every whole chromosome) +output: + - scan: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.$command": + type: file + description: Head or tail file depending on the command + pattern: "*.$command" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/gallvp/helitronscanner/scan/tests/main.nf.test b/modules/gallvp/helitronscanner/scan/tests/main.nf.test new file mode 100644 index 0000000..55bcfed --- /dev/null +++ b/modules/gallvp/helitronscanner/scan/tests/main.nf.test @@ -0,0 +1,148 @@ +nextflow_process { + + name "Test Process HELITRONSCANNER_SCAN" + script "../main.nf" + process "HELITRONSCANNER_SCAN" + + tag "modules" + tag "modules_gallvp" + tag "helitronscanner" + tag "helitronscanner/scan" + tag "gunzip" + + setup { + run('GUNZIP') { + script "../../../gunzip/main.nf" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + } + + test("actinidia_chinensis - genome_1_fasta_gz - head - success") { + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'head' + input[2] = [] + input[3] = 0 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.scan[0][1]).text.contains('2729827:1 2729980:1 2730005:1') }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("actinidia_chinensis - genome_1_fasta_gz - tail - success") { + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'tail' + input[2] = [] + input[3] = 0 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.scan[0][1]).text.contains('7265:1 7951:1 9264:2 9398:1') }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("sarscov2 - genome - head - success") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'head' + input[2] = [] + input[3] = 0 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.scan[0][1]).text.contains('113:2 236:1 1158:1 1754:2 1771:1') }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("stub") { + + options "-stub" + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'head' + input[2] = [] + input[3] = 0 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("stub - tail") { + + options "-stub" + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'tail' + input[2] = [] + input[3] = 0 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/gallvp/helitronscanner/scan/tests/main.nf.test.snap b/modules/gallvp/helitronscanner/scan/tests/main.nf.test.snap new file mode 100644 index 0000000..ac7ae5e --- /dev/null +++ b/modules/gallvp/helitronscanner/scan/tests/main.nf.test.snap @@ -0,0 +1,104 @@ +{ + "actinidia_chinensis - genome_1_fasta_gz - tail - success": { + "content": [ + [ + "versions.yml:md5,3c675f4be863f58c88db5a2936beaa88" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-03T11:51:54.403269" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.head:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,3c675f4be863f58c88db5a2936beaa88" + ], + "scan": [ + [ + { + "id": "test" + }, + "test.head:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3c675f4be863f58c88db5a2936beaa88" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-03T11:52:10.250343" + }, + "sarscov2 - genome - head - success": { + "content": [ + [ + "versions.yml:md5,3c675f4be863f58c88db5a2936beaa88" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-03T11:43:05.068944" + }, + "stub - tail": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tail:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,3c675f4be863f58c88db5a2936beaa88" + ], + "scan": [ + [ + { + "id": "test" + }, + "test.tail:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3c675f4be863f58c88db5a2936beaa88" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-03T11:39:42.225262" + }, + "actinidia_chinensis - genome_1_fasta_gz - head - success": { + "content": [ + [ + "versions.yml:md5,3c675f4be863f58c88db5a2936beaa88" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-03T11:51:18.942428" + } +} \ No newline at end of file diff --git a/modules/gallvp/tirlearner/environment.yml b/modules/gallvp/tirlearner/environment.yml new file mode 100644 index 0000000..d60c615 --- /dev/null +++ b/modules/gallvp/tirlearner/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::tir-learner=3.0.3" diff --git a/modules/gallvp/tirlearner/main.nf b/modules/gallvp/tirlearner/main.nf new file mode 100644 index 0000000..c253245 --- /dev/null +++ b/modules/gallvp/tirlearner/main.nf @@ -0,0 +1,75 @@ +process TIRLEARNER { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/tir-learner:3.0.3--hdfd78af_0': + 'biocontainers/tir-learner:3.0.3--hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + val species + + output: + tuple val(meta), path("${prefix}.log") , emit: log + tuple val(meta), path("${prefix}.fa") , emit: fasta , optional: true + tuple val(meta), path("${prefix}.gff3") , emit: gff , optional: true + tuple val(meta), path("${prefix}.filtered.fa") , emit: filtered_fasta , optional: true + tuple val(meta), path("${prefix}.filtered.gff3"), emit: filtered_gff , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def is_zipped = fasta.toString().endsWith('.gz') + def input_name = "$fasta" - ( is_zipped ? '.gz' : '' ) + def unzip_fasta = is_zipped ? "gzip -cdf $fasta > $input_name" : '' + + if ( "$input_name" == "${prefix}.fa" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + $unzip_fasta + + TIR-Learner \\ + -f $input_name \\ + -s $species \\ + -t $task.cpus \\ + -o $prefix \\ + $args \\ + &> >(tee "${prefix}.log" 2>&1) \\ + || echo "TIR-Learner failed! See ${prefix}.log" + + mv "${prefix}/TIR-Learner-Result/TIR-Learner_FinalAnn.fa" "${prefix}.fa" || echo "TIR-Learner failed to find TIRs. See ${prefix}.log" + mv "${prefix}/TIR-Learner-Result/TIR-Learner_FinalAnn.gff3" "${prefix}.gff3" || echo "TIR-Learner failed to find TIRs. See ${prefix}.log" + + mv "${prefix}/TIR-Learner-Result/TIR-Learner_FinalAnn_filter.fa" "${prefix}.filtered.fa" || echo "TIR-Learner failed to find TIRs. See ${prefix}.log" + mv "${prefix}/TIR-Learner-Result/TIR-Learner_FinalAnn_filter.gff3" "${prefix}.filtered.gff3" || echo "TIR-Learner failed to find TIRs. See ${prefix}.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + TIR-Learner: \$(TIR-Learner -v | head -n 1 | sed 's/TIR-Learner //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + if ( "$fasta" == "${prefix}.fa" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.log + + touch "${prefix}.fa" + touch "${prefix}.gff3" + + touch "${prefix}.filtered.fa" + touch "${prefix}.filtered.gff3" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + TIR-Learner: \$(TIR-Learner -v | head -n 1 | sed 's/TIR-Learner //') + END_VERSIONS + """ +} diff --git a/modules/gallvp/tirlearner/meta.yml b/modules/gallvp/tirlearner/meta.yml new file mode 100644 index 0000000..ff2c9c2 --- /dev/null +++ b/modules/gallvp/tirlearner/meta.yml @@ -0,0 +1,94 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "tirlearner" +description: | + Ensemble Method for TIR Transposable Element Annotation +keywords: + - genomics + - annotation + - repeat + - transposable +tools: + - "TIR-Learner": + description: TIR-Learner + homepage: "https://github.com/lutianyu2001/TIR-Learner" + documentation: "https://github.com/lutianyu2001/TIR-Learner" + tool_dev_url: "https://github.com/lutianyu2001/TIR-Learner" + doi: "10.1016/j.molp.2019.02.008" + licence: ["GPLv3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: Genome sequences in fasta format + pattern: "*.{fsa,fa,fasta}" + - - species: + type: string + description: | + "maize", "rice" or "others" +output: + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.log: + type: file + description: Log from TIR-Learner + pattern: "*.log" + - fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.fa: + type: file + description: Putative TIRs in fasta format + pattern: "*.fa" + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.gff3: + type: file + description: Putative TIRs in GFF3 format + pattern: "*.gff3" + - filtered_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.filtered.fa: + type: file + description: Filtered TIRs in fasta format + pattern: "*.fa" + - filtered_gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.filtered.gff3: + type: file + description: Filtered TIRs in GFF3 format + pattern: "*.gff3" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jguhlin" + - "@GallVp" +maintainers: + - "@jguhlin" + - "@GallVp" diff --git a/modules/gallvp/tirlearner/tests/main.nf.test b/modules/gallvp/tirlearner/tests/main.nf.test new file mode 100644 index 0000000..034fbac --- /dev/null +++ b/modules/gallvp/tirlearner/tests/main.nf.test @@ -0,0 +1,91 @@ +nextflow_process { + + name "Test Process TIRLEARNER" + script "../main.nf" + process "TIRLEARNER" + + tag "modules" + tag "modules_gallvp" + tag "tirlearner" + + + test("actinidia_chinensis-genome_1_fasta_gz-success") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] + input[1] = 'others' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.log[0][1]).text.contains('Module 4 Begin') }, + { assert snapshot( + process.out.fasta, + process.out.gff, + process.out.filtered_fasta, + process.out.filtered_gff, + process.out.versions + ).match() + } + ) + } + + } + + test("sarscov2-genome") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'others' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.log[0][1]).text.contains('ValueError: All objects passed were None') }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] + input[1] = 'others' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/gallvp/tirlearner/tests/main.nf.test.snap b/modules/gallvp/tirlearner/tests/main.nf.test.snap new file mode 100644 index 0000000..dd1a6d0 --- /dev/null +++ b/modules/gallvp/tirlearner/tests/main.nf.test.snap @@ -0,0 +1,155 @@ +{ + "sarscov2-genome": { + "content": [ + [ + "versions.yml:md5,9db74013377c09d8452530315592acdf" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-22T13:03:25.394783" + }, + "actinidia_chinensis-genome_1_fasta_gz-success": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.fa:md5,65987decb67d1e3f95b993f86d14c0d0" + ] + ], + [ + [ + { + "id": "test" + }, + "test.gff3:md5,90bf040e7b29763351237207297dc801" + ] + ], + [ + [ + { + "id": "test" + }, + "test.filtered.fa:md5,65987decb67d1e3f95b993f86d14c0d0" + ] + ], + [ + [ + { + "id": "test" + }, + "test.filtered.gff3:md5,90bf040e7b29763351237207297dc801" + ] + ], + [ + "versions.yml:md5,9db74013377c09d8452530315592acdf" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-22T13:21:08.136874" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.filtered.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.filtered.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,9db74013377c09d8452530315592acdf" + ], + "fasta": [ + [ + { + "id": "test" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtered_fasta": [ + [ + { + "id": "test" + }, + "test.filtered.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtered_gff": [ + [ + { + "id": "test" + }, + "test.filtered.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,9db74013377c09d8452530315592acdf" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-22T12:56:16.022839" + } +} \ No newline at end of file diff --git a/modules/local/sanitize/main.nf b/modules/local/sanitize/main.nf new file mode 100644 index 0000000..c1519af --- /dev/null +++ b/modules/local/sanitize/main.nf @@ -0,0 +1,38 @@ +// Rename FASTA headers (just makes everything easier later) + +process SANITIZE_HEADERS { + tag "$meta.id" + label 'process_single' + + // Eventually port fffx (pronounced f3x) to bioconda + // conda "${moduleDir}/environment.yml" + // container "docker.io/gallvp/edta-components:v0.1" + + container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' + ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' + : 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.sanitized.fasta'), emit: fasta + tuple val(meta), path('*.sanitized.translation_table.tsv'), emit: translation_table + eval('fffx --version'), topic: versions + + + when: + task.ext.when == null || task.ext.when + + script: + """ + fffx length-filter ${fasta} filtered.fa 1000 + fffx sanitize filtered.fa ${fasta.baseName}.sanitized + """ + + stub: + """ + touch ${fasta.baseName}.sanitized.fasta + touch ${fasta.baseName}.sanitized.translation_table.tsv + """ +} \ No newline at end of file diff --git a/modules/local/utils/main.nf b/modules/local/utils/main.nf new file mode 100644 index 0000000..fee91f3 --- /dev/null +++ b/modules/local/utils/main.nf @@ -0,0 +1,44 @@ +// +// Generate workflow version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + def yaml = new org.yaml.snakeyaml.Yaml() + def versions = yaml.load(yaml_file).collectEntries { k, v -> [k.tokenize(':')[-1], v] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + ${workflow.manifest.name}: ${getWorkflowVersion()} + Nextflow: ${workflow.nextflow.version} + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 0000000..9b01c86 --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 0000000..2862c64 --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,78 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 0000000..81778a0 --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,43 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" +output: + - file_out: + - meta: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 0000000..9cb1617 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,191 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_name_conflict") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 0000000..b7623ee --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,147 @@ +{ + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ], + 78, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:46.802978" + }, + "test_cat_name_conflict": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:29.45394" + }, + "test_cat_one_file_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 374, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:52:02.774016" + }, + "test_cat_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 375, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:57.581523" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 0000000..ec26b0f --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 0000000..fbc7978 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 0000000..37b578f --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 0000000..c779485 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 0000000..5e67e3b --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,55 @@ +process GUNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + $args \\ + $archive \\ + > $gunzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 0000000..9066c03 --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,47 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + - meta: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - $gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 0000000..776211a --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,121 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 0000000..069967e --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 0000000..dec7764 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 0000000..fd3f691 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/ltrfinder/environment.yml b/modules/nf-core/ltrfinder/environment.yml new file mode 100644 index 0000000..2932d5c --- /dev/null +++ b/modules/nf-core/ltrfinder/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::ltr_finder_parallel=1.1" diff --git a/modules/nf-core/ltrfinder/main.nf b/modules/nf-core/ltrfinder/main.nf new file mode 100644 index 0000000..3e59e3c --- /dev/null +++ b/modules/nf-core/ltrfinder/main.nf @@ -0,0 +1,53 @@ +process LTRFINDER { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ltr_finder_parallel:1.1--hdfd78af_0': + 'biocontainers/ltr_finder_parallel:1.1--hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.scn") , emit: scn + tuple val(meta), path("*.gff3") , emit: gff + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + LTR_FINDER_parallel \\ + -seq $fasta \\ + -threads $task.cpus \\ + $args + + mv "${fasta}.finder.combine.scn" "${prefix}.scn" + mv "${fasta}.finder.combine.gff3" "${prefix}.gff3" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + LTR_FINDER_parallel: \$(LTR_FINDER_parallel -h | grep 'Version:' | sed 's/Version: //') + ltr_finder: \$(ltr_finder -h 2>&1 | grep 'ltr_finder' | sed 's/ltr_finder //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}.scn" + touch "${prefix}.gff3" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + LTR_FINDER_parallel: \$(LTR_FINDER_parallel -h | grep 'Version:' | sed 's/Version: //') + ltr_finder: \$(ltr_finder -h 2>&1 | grep 'ltr_finder' | sed 's/ltr_finder //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ltrfinder/meta.yml b/modules/nf-core/ltrfinder/meta.yml new file mode 100644 index 0000000..547fb67 --- /dev/null +++ b/modules/nf-core/ltrfinder/meta.yml @@ -0,0 +1,70 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "ltrfinder" +description: | + Finds full-length LTR retrotranspsons in genome sequences using the + parallel version of LTR_Finder +keywords: + - genomics + - annotation + - parallel + - repeat + - long terminal retrotransposon + - retrotransposon +tools: + - "LTR_FINDER_parallel": + description: A Perl wrapper for LTR_FINDER + homepage: "https://github.com/oushujun/LTR_FINDER_parallel" + documentation: "https://github.com/oushujun/LTR_FINDER_parallel" + tool_dev_url: "https://github.com/oushujun/LTR_FINDER_parallel" + doi: "10.1186/s13100-019-0193-0" + licence: ["MIT"] + identifier: "" + - "LTR_Finder": + description: An efficient program for finding full-length LTR retrotranspsons + in genome sequences + homepage: "https://github.com/xzhub/LTR_Finder" + documentation: "https://github.com/xzhub/LTR_Finder" + tool_dev_url: "https://github.com/xzhub/LTR_Finder" + doi: "10.1093/nar/gkm286" + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: Genome sequences in fasta format + pattern: "*.{fsa,fa,fasta}" +output: + - scn: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.scn": + type: file + description: Annotation in LTRharvest or LTR_FINDER format + pattern: "*.scn" + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.gff3": + type: file + description: Annotation in gff3 format + pattern: "*.gff3" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/ltrfinder/tests/main.nf.test b/modules/nf-core/ltrfinder/tests/main.nf.test new file mode 100644 index 0000000..dc8c803 --- /dev/null +++ b/modules/nf-core/ltrfinder/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process LTRFINDER" + script "../main.nf" + process "LTRFINDER" + + tag "modules" + tag "modules_nfcore" + tag "ltrfinder" + tag "gunzip" + + test("actinidia_chinensis-genome_21_fasta_gz-success") { + + setup { + run('GUNZIP') { + script "../../gunzip/main.nf" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2-genome_fasta-no_ltr") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/ltrfinder/tests/main.nf.test.snap b/modules/nf-core/ltrfinder/tests/main.nf.test.snap new file mode 100644 index 0000000..0f1790f --- /dev/null +++ b/modules/nf-core/ltrfinder/tests/main.nf.test.snap @@ -0,0 +1,149 @@ +{ + "actinidia_chinensis-genome_21_fasta_gz-success": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.scn:md5,006193c9eaf3f552ccb0369f159e7660" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,96e5305163939e4381e1b94b660dc0a2" + ] + ], + "2": [ + "versions.yml:md5,7b24225b810fa88cfb2a887de11be333" + ], + "gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,96e5305163939e4381e1b94b660dc0a2" + ] + ], + "scn": [ + [ + { + "id": "test" + }, + "test.scn:md5,006193c9eaf3f552ccb0369f159e7660" + ] + ], + "versions": [ + "versions.yml:md5,7b24225b810fa88cfb2a887de11be333" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T09:14:38.509965" + }, + "sarscov2-genome_fasta-no_ltr": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.scn:md5,2ce449dff751e59dbc292b6888491954" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,bddeb04277af08b5850e64708e8af02a" + ] + ], + "2": [ + "versions.yml:md5,7b24225b810fa88cfb2a887de11be333" + ], + "gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,bddeb04277af08b5850e64708e8af02a" + ] + ], + "scn": [ + [ + { + "id": "test" + }, + "test.scn:md5,2ce449dff751e59dbc292b6888491954" + ] + ], + "versions": [ + "versions.yml:md5,7b24225b810fa88cfb2a887de11be333" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-16T13:03:03.505263" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.scn:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,7b24225b810fa88cfb2a887de11be333" + ], + "gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scn": [ + [ + { + "id": "test" + }, + "test.scn:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,7b24225b810fa88cfb2a887de11be333" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T09:14:43.054758" + } +} \ No newline at end of file diff --git a/modules/nf-core/ltrharvest/environment.yml b/modules/nf-core/ltrharvest/environment.yml new file mode 100644 index 0000000..c6cac36 --- /dev/null +++ b/modules/nf-core/ltrharvest/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::ltr_harvest_parallel=1.1" diff --git a/modules/nf-core/ltrharvest/main.nf b/modules/nf-core/ltrharvest/main.nf new file mode 100644 index 0000000..1e5e06d --- /dev/null +++ b/modules/nf-core/ltrharvest/main.nf @@ -0,0 +1,56 @@ +process LTRHARVEST { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ltr_harvest_parallel:1.1--hdfd78af_0': + 'biocontainers/ltr_harvest_parallel:1.1--hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.gff3") , emit: gff3 + tuple val(meta), path("*.scn") , emit: scn + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + LTR_HARVEST_parallel \\ + -seq $fasta \\ + $args \\ + -threads $task.cpus + + mv "${fasta}.harvest.combine.gff3" \\ + "${prefix}.gff3" + + mv "${fasta}.harvest.combine.scn" \\ + "${prefix}.scn" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + LTR_HARVEST_parallel: \$(LTR_HARVEST_parallel -h | sed -n '/Version/s/Version: //p') + genometools: \$(gt --version | sed '1!d ; s/gt (GenomeTools) //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}.gff3" + touch "${prefix}.scn" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + LTR_HARVEST_parallel: \$(LTR_HARVEST_parallel -h | sed -n '/Version/s/Version: //p') + genometools: \$(gt --version | sed '1!d ; s/gt (GenomeTools) //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ltrharvest/meta.yml b/modules/nf-core/ltrharvest/meta.yml new file mode 100644 index 0000000..1806418 --- /dev/null +++ b/modules/nf-core/ltrharvest/meta.yml @@ -0,0 +1,68 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "ltrharvest" +description: | + Predicts LTR retrotransposons using the parallel version of GenomeTools gt-ltrharvest + utility included in the EDTA toolchain +keywords: + - genomics + - genome + - annotation + - repeat + - transposons + - retrotransposons +tools: + - "LTR_HARVEST_parallel": + description: A Perl wrapper for LTR_harvest + homepage: "https://github.com/oushujun/EDTA/tree/v2.2.0/bin/LTR_HARVEST_parallel" + documentation: "https://github.com/oushujun/EDTA/tree/v2.2.0/bin/LTR_HARVEST_parallel" + tool_dev_url: "https://github.com/oushujun/EDTA/tree/v2.2.0/bin/LTR_HARVEST_parallel" + licence: ["MIT"] + identifier: "" + - "gt": + description: "The GenomeTools genome analysis system" + homepage: "https://genometools.org/index.html" + documentation: "https://genometools.org/documentation.html" + tool_dev_url: "https://github.com/genometools/genometools" + doi: "10.1109/TCBB.2013.68" + licence: ["ISC"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: Input genome fasta + pattern: "*.{fsa,fa,fasta}" +output: + - gff3: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.gff3": + type: file + description: Predicted LTR candidates in gff3 format + pattern: "*.gff3" + - scn: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.scn": + type: file + description: Predicted LTR candidates in scn format + pattern: "*.scn" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/ltrharvest/tests/main.nf.test b/modules/nf-core/ltrharvest/tests/main.nf.test new file mode 100644 index 0000000..e200fde --- /dev/null +++ b/modules/nf-core/ltrharvest/tests/main.nf.test @@ -0,0 +1,89 @@ +nextflow_process { + + name "Test Process LTRHARVEST" + script "../main.nf" + process "LTRHARVEST" + + tag "modules" + tag "modules_nfcore" + tag "ltrharvest" + + test("homo_sapiens - genome_21_fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.gff3, + process.out.versions + ).match() + }, + { assert path(process.out.scn[0][1]).text.contains("46510803 46520182 9380 46510803 46510940 138 46520042 46520182 141 86.52 0 chr21") }, + ) + } + + } + + test("sarscov2 - genome_fasta - no_ltr") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.gff3, + process.out.versions + ).match() + }, + { assert path(process.out.scn[0][1]).text.contains("predictions are reported in the following way") }, + ) + } + + } + + test("homo_sapiens - genome_fasta - stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/ltrharvest/tests/main.nf.test.snap b/modules/nf-core/ltrharvest/tests/main.nf.test.snap new file mode 100644 index 0000000..f3a8da8 --- /dev/null +++ b/modules/nf-core/ltrharvest/tests/main.nf.test.snap @@ -0,0 +1,91 @@ +{ + "sarscov2 - genome_fasta - no_ltr": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.gff3:md5,bddeb04277af08b5850e64708e8af02a" + ] + ], + [ + "versions.yml:md5,51e82185b713482d1d48b6f15abe7fcc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-17T10:40:36.380052" + }, + "homo_sapiens - genome_fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.scn:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,51e82185b713482d1d48b6f15abe7fcc" + ], + "gff3": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scn": [ + [ + { + "id": "test" + }, + "test.scn:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,51e82185b713482d1d48b6f15abe7fcc" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-17T10:40:40.967557" + }, + "homo_sapiens - genome_21_fasta": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.gff3:md5,da13c4ba22e44ef944ddec38aa72c468" + ] + ], + [ + "versions.yml:md5,51e82185b713482d1d48b6f15abe7fcc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-17T10:40:30.946" + } +} \ No newline at end of file diff --git a/modules/nf-core/ltrretriever/ltrretriever/environment.yml b/modules/nf-core/ltrretriever/ltrretriever/environment.yml new file mode 100644 index 0000000..f1c392a --- /dev/null +++ b/modules/nf-core/ltrretriever/ltrretriever/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::LTR_retriever=2.9.9" diff --git a/modules/nf-core/ltrretriever/ltrretriever/main.nf b/modules/nf-core/ltrretriever/ltrretriever/main.nf new file mode 100644 index 0000000..8e1e2be --- /dev/null +++ b/modules/nf-core/ltrretriever/ltrretriever/main.nf @@ -0,0 +1,94 @@ +process LTRRETRIEVER_LTRRETRIEVER { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ltr_retriever:2.9.9--hdfd78af_0': + 'biocontainers/ltr_retriever:2.9.9--hdfd78af_0' }" + + input: + tuple val(meta), path(genome) + path(harvest) + path(finder) + path(mgescan) + path(non_tgca) + + output: + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("${prefix}.pass.list"), emit: pass_list , optional: true + tuple val(meta), path("*.pass.list.gff3") , emit: pass_list_gff , optional: true + tuple val(meta), path("*.LTRlib.fa") , emit: ltrlib , optional: true + tuple val(meta), path("${prefix}.out") , emit: annotation_out , optional: true + tuple val(meta), path("*.out.gff3") , emit: annotation_gff , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def inharvest = harvest ? "-inharvest $harvest" : '' + def infinder = finder ? "-infinder $finder" : '' + def inmgescan = mgescan ? "-inmgescan $mgescan" : '' + def non_tgca_file = non_tgca ? "-nonTGCA $non_tgca" : '' + def writable_genome = "${genome.baseName}.writable.${genome.extension}" + // writable_genome: + // This is needed to avoid LTR_retriever:2.9.9 failure when the input `genome` is + // readonly. LTR_retriever triggers a 'die' if the genome is readonly. + // See: https://github.com/oushujun/LTR_retriever/blob/4039eb7778fd9cbc60021e99a8693285e0fa2daf/LTR_retriever#L312 + // + // This copy with permissions logic can be removed once https://github.com/oushujun/LTR_retriever/issues/176 + // has been resolved. + """ + cp \\ + $genome \\ + $writable_genome + + chmod \\ + a+w \\ + $writable_genome + + LTR_retriever \\ + -genome $writable_genome \\ + $inharvest \\ + $infinder \\ + $inmgescan \\ + $non_tgca_file \\ + -threads $task.cpus \\ + $args \\ + &> >(tee "${prefix}.log" 2>&1) \\ + || echo "Errors from LTR_retriever printed to ${prefix}.log" + + mv "${writable_genome}.pass.list" "${prefix}.pass.list" || echo ".pass.list was not produced" + mv "${writable_genome}.pass.list.gff3" "${prefix}.pass.list.gff3" || echo ".pass.list.gff3 was not produced" + mv "${writable_genome}.LTRlib.fa" "${prefix}.LTRlib.fa" || echo ".LTRlib.fa was not produced" + mv "${writable_genome}.out" "${prefix}.out" || echo ".out was not produced" + mv "${writable_genome}.out.gff3" "${prefix}.out.gff3" || echo ".out.gff3 was not produced" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + LTR_retriever: \$(LTR_retriever -h 2>&1 | grep '### LTR_retriever' | sed 's/### LTR_retriever //; s/ ###//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def touch_out = args.contains('-noanno') ? '' : "touch ${prefix}.out" + def touch_out_gff = args.contains('-noanno') ? '' : "touch ${prefix}.out.gff3" + """ + touch "${prefix}.log" + touch "${prefix}.pass.list" + touch "${prefix}.pass.list.gff3" + touch "${prefix}.LTRlib.fa" + $touch_out + $touch_out_gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + LTR_retriever: \$(LTR_retriever -h 2>&1 | grep '### LTR_retriever' | sed 's/### LTR_retriever //; s/ ###//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ltrretriever/ltrretriever/meta.yml b/modules/nf-core/ltrretriever/ltrretriever/meta.yml new file mode 100644 index 0000000..9645de2 --- /dev/null +++ b/modules/nf-core/ltrretriever/ltrretriever/meta.yml @@ -0,0 +1,119 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "ltrretriever_ltrretriever" +description: Identifies LTR retrotransposons using LTR_retriever +keywords: + - genomics + - annotation + - repeat + - long terminal repeat + - retrotransposon +tools: + - "LTR_retriever": + description: Sensitive and accurate identification of LTR retrotransposons + homepage: "https://github.com/oushujun/LTR_retriever" + documentation: "https://github.com/oushujun/LTR_retriever" + tool_dev_url: "https://github.com/oushujun/LTR_retriever" + doi: "10.1104/pp.17.01310" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - genome: + type: file + description: Genomic sequences in fasta format + pattern: "*.{fsa,fa,fasta}" + - - harvest: + type: file + description: LTR-RT candidates from GenomeTools ltrharvest in the old tabular + format + pattern: "*.tabout" + - - finder: + type: file + description: LTR-RT candidates from LTR_FINDER + pattern: "*.scn" + - - mgescan: + type: file + description: LTR-RT candidates from MGEScan_LTR + pattern: "*.out" + - - non_tgca: + type: file + description: Non-canonical LTR-RT candidates from GenomeTools ltrharvest in + the old tabular format + pattern: "*.tabout" +output: + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.log": + type: file + description: Output log from LTR_retriever + pattern: "*.log" + - pass_list: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.pass.list: + type: file + description: Intact LTR-RTs with coordinate and structural information in summary + table format + pattern: "*.pass.list" + - pass_list_gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.pass.list.gff3": + type: file + description: Intact LTR-RTs with coordinate and structural information in gff3 + format + pattern: "*.pass.list.gff3" + - ltrlib: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.LTRlib.fa": + type: file + description: All non-redundant LTR-RTs + pattern: "*.LTRlib.fa" + - annotation_out: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.out: + type: file + description: Whole-genome LTR-RT annotation by the non-redundant library + pattern: "*.out" + - annotation_gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.out.gff3": + type: file + description: Whole-genome LTR-RT annotation by the non-redundant library in + gff3 format + pattern: "*.out.gff3" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/ltrretriever/ltrretriever/tests/main.nf.test b/modules/nf-core/ltrretriever/ltrretriever/tests/main.nf.test new file mode 100644 index 0000000..4d512f3 --- /dev/null +++ b/modules/nf-core/ltrretriever/ltrretriever/tests/main.nf.test @@ -0,0 +1,186 @@ +nextflow_process { + + name "Test Process LTRRETRIEVER_LTRRETRIEVER" + script "../main.nf" + process "LTRRETRIEVER_LTRRETRIEVER" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "ltrretriever" + tag "ltrretriever/ltrretriever" + tag "gunzip" + tag "ltrharvest" + tag "ltrfinder" + tag "cat/cat" + + test("sarscov2-genome-no_ltr") { + + setup { + + run("LTRHARVEST") { + script "../../../ltrharvest/main.nf" + + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + run("LTRFINDER") { + script "../../../ltrfinder/main.nf" + + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + run("CAT_CAT") { + script "../../../cat/cat/main.nf" + + process { + """ + input[0] = LTRHARVEST.out.scn.mix(LTRFINDER.out.scn).groupTuple() + """ + } + } + } + + when { + process { + """ + input[0] = input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = CAT_CAT.out.file_out.map { meta, tabout -> tabout } + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.log[0][1]).text.contains("ERROR: No candidate is found in the file(s) you specified.") }, + { assert snapshot(process.out.versions).match("versions_no_ltr") } + ) + } + + } + + test("actinidia_chinensis-genome_21_fasta_gz-success") { + + setup { + + run('GUNZIP') { + script "../../../gunzip/main.nf" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + run("LTRHARVEST") { + script "../../../ltrharvest/main.nf" + + process { + """ + input[0] = GUNZIP.out.gunzip + """ + } + } + + run("LTRFINDER") { + script "../../../ltrfinder/main.nf" + + process { + """ + input[0] = GUNZIP.out.gunzip + """ + } + } + + run("CAT_CAT") { + script "../../../cat/cat/main.nf" + + process { + """ + input[0] = LTRHARVEST.out.scn.mix(LTRFINDER.out.scn).groupTuple() + """ + } + } + } + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = CAT_CAT.out.file_out.map { meta, tabout -> tabout } + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.log[0][1]).text.contains("####### Result files #########") }, + { assert path(process.out.pass_list[0][1]).text.contains("Copia\tLTR") }, + { assert path(process.out.pass_list_gff[0][1]).text.contains("chr1\tLTR_retriever\ttarget_site_duplication") }, + { assert path(process.out.ltrlib[0][1]).text.contains("LTR#LTR/Copia") }, + { assert snapshot(process.out.annotation_out).match("annotation_out") }, + { assert path(process.out.annotation_gff[0][1]).text.contains("Classification=LTR/Copia") }, + { assert snapshot(path(process.out.versions[0]).text).match("versions") } + ) + } + + } + + test("stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/ltrretriever/ltrretriever/tests/main.nf.test.snap b/modules/nf-core/ltrretriever/ltrretriever/tests/main.nf.test.snap new file mode 100644 index 0000000..825b1e5 --- /dev/null +++ b/modules/nf-core/ltrretriever/ltrretriever/tests/main.nf.test.snap @@ -0,0 +1,154 @@ +{ + "versions": { + "content": [ + "\"LTRRETRIEVER_LTRRETRIEVER\":\n LTR_retriever: v2.9.9\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T11:17:50.208819" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.pass.list:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.pass.list.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.LTRlib.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.out.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,3ab159acaee06b342b56e2d35e5e669b" + ], + "annotation_gff": [ + [ + { + "id": "test" + }, + "test.out.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "annotation_out": [ + [ + { + "id": "test" + }, + "test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ltrlib": [ + [ + { + "id": "test" + }, + "test.LTRlib.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pass_list": [ + [ + { + "id": "test" + }, + "test.pass.list:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pass_list_gff": [ + [ + { + "id": "test" + }, + "test.pass.list.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3ab159acaee06b342b56e2d35e5e669b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T11:04:15.954424" + }, + "annotation_out": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.out:md5,33d89bea9031f25de8f0d3591ab94d87" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-16T14:18:02.458476" + }, + "versions_no_ltr": { + "content": [ + [ + "versions.yml:md5,3ab159acaee06b342b56e2d35e5e669b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-16T14:03:52.324194" + } +} diff --git a/modules/nf-core/ltrretriever/ltrretriever/tests/nextflow.config b/modules/nf-core/ltrretriever/ltrretriever/tests/nextflow.config new file mode 100644 index 0000000..7f67556 --- /dev/null +++ b/modules/nf-core/ltrretriever/ltrretriever/tests/nextflow.config @@ -0,0 +1,15 @@ +process { + + withName: LTRFINDER { + ext.args = '-harvest_out' + // LTRRETRIEVER_LTRRETRIEVER requires -harvest_out + } + + withName: LTRHARVEST { + ext.prefix = { "${meta.id}_ltrharvest" } + } + + withName: CAT_CAT { + ext.prefix = { "${meta.id}_ltrharvest_ltrfinder.tabout" } + } +} diff --git a/modules/nf-core/repeatmodeler/builddatabase/environment.yml b/modules/nf-core/repeatmodeler/builddatabase/environment.yml new file mode 100644 index 0000000..5314307 --- /dev/null +++ b/modules/nf-core/repeatmodeler/builddatabase/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::repeatmodeler=2.0.5" diff --git a/modules/nf-core/repeatmodeler/builddatabase/main.nf b/modules/nf-core/repeatmodeler/builddatabase/main.nf new file mode 100644 index 0000000..6fe244b --- /dev/null +++ b/modules/nf-core/repeatmodeler/builddatabase/main.nf @@ -0,0 +1,50 @@ +process REPEATMODELER_BUILDDATABASE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/repeatmodeler:2.0.5--pl5321hdfd78af_0': + 'biocontainers/repeatmodeler:2.0.5--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("${prefix}.*") , emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + """ + BuildDatabase \\ + -name $prefix \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + repeatmodeler: \$(RepeatModeler --version | sed 's/RepeatModeler version //') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.nhr + touch ${prefix}.nin + touch ${prefix}.njs + touch ${prefix}.nnd + touch ${prefix}.nni + touch ${prefix}.nog + touch ${prefix}.nsq + touch ${prefix}.translation + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + repeatmodeler: \$(RepeatModeler --version | sed 's/RepeatModeler version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/repeatmodeler/builddatabase/meta.yml b/modules/nf-core/repeatmodeler/builddatabase/meta.yml new file mode 100644 index 0000000..cc78cf0 --- /dev/null +++ b/modules/nf-core/repeatmodeler/builddatabase/meta.yml @@ -0,0 +1,47 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "repeatmodeler_builddatabase" +description: Create a database for RepeatModeler +keywords: + - genomics + - fasta + - repeat +tools: + - "repeatmodeler": + description: "RepeatModeler is a de-novo repeat family identification and modeling + package." + homepage: "https://github.com/Dfam-consortium/RepeatModeler" + documentation: "https://github.com/Dfam-consortium/RepeatModeler" + tool_dev_url: "https://github.com/Dfam-consortium/RepeatModeler" + licence: ["Open Software License v2.1"] + identifier: biotools:repeatmodeler + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: Fasta file + pattern: "*.{fasta,fsa,fa}" +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.*: + type: file + description: Database files for repeatmodeler + pattern: "`${prefix}.*`" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/repeatmodeler/builddatabase/tests/main.nf.test b/modules/nf-core/repeatmodeler/builddatabase/tests/main.nf.test new file mode 100644 index 0000000..78b78a6 --- /dev/null +++ b/modules/nf-core/repeatmodeler/builddatabase/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process REPEATMODELER_BUILDDATABASE" + script "../main.nf" + process "REPEATMODELER_BUILDDATABASE" + + tag "modules" + tag "modules_nfcore" + tag "repeatmodeler" + tag "repeatmodeler/builddatabase" + + test("sarscov2-genome_fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert snapshot(process.out.db[0][1].collect { file(it).name }.sort().toString()).match("db") }, + { assert snapshot(process.out.db[0][1].findAll { ! ( "$it"[-3..-1] in [ 'nin', 'njs' ] ) } ).match("stable_md5") } + ) + } + + } + + test("sarscov2-genome_fasta-stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/repeatmodeler/builddatabase/tests/main.nf.test.snap b/modules/nf-core/repeatmodeler/builddatabase/tests/main.nf.test.snap new file mode 100644 index 0000000..1f1a551 --- /dev/null +++ b/modules/nf-core/repeatmodeler/builddatabase/tests/main.nf.test.snap @@ -0,0 +1,92 @@ +{ + "sarscov2-genome_fasta-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.njs:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nnd:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nni:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nog:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.translation:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,7944637266bc3e2726899eaad5e46c87" + ], + "db": [ + [ + { + "id": "test" + }, + [ + "test.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.njs:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nnd:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nni:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nog:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.translation:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,7944637266bc3e2726899eaad5e46c87" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-02T12:06:44.261566" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,7944637266bc3e2726899eaad5e46c87" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-09T15:14:48.807063" + }, + "stable_md5": { + "content": [ + [ + "test.nhr:md5,1a41cb6d0b00c28f62ad60e75ae2f6fc", + "test.nnd:md5,2002e13acf59079a1a5782c918894579", + "test.nni:md5,26a954ba0fd80983b550d8f6b8b35ff8", + "test.nog:md5,30896f123998e926ea2237b89091e7fe", + "test.nsq:md5,982cbc7d9e38743b9b1037588862b9da", + "test.translation:md5,ccbb119522c09daa976a9015ba999329" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T10:03:41.669433" + }, + "db": { + "content": [ + "[test.nhr, test.nin, test.njs, test.nnd, test.nni, test.nog, test.nsq, test.translation]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-02T12:08:36.94713" + } +} \ No newline at end of file diff --git a/modules/nf-core/repeatmodeler/builddatabase/tests/tags.yml b/modules/nf-core/repeatmodeler/builddatabase/tests/tags.yml new file mode 100644 index 0000000..c524294 --- /dev/null +++ b/modules/nf-core/repeatmodeler/builddatabase/tests/tags.yml @@ -0,0 +1,2 @@ +repeatmodeler/builddatabase: + - "modules/nf-core/repeatmodeler/builddatabase/**" diff --git a/modules/nf-core/repeatmodeler/repeatmodeler/environment.yml b/modules/nf-core/repeatmodeler/repeatmodeler/environment.yml new file mode 100644 index 0000000..5314307 --- /dev/null +++ b/modules/nf-core/repeatmodeler/repeatmodeler/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::repeatmodeler=2.0.5" diff --git a/modules/nf-core/repeatmodeler/repeatmodeler/main.nf b/modules/nf-core/repeatmodeler/repeatmodeler/main.nf new file mode 100644 index 0000000..9d0449f --- /dev/null +++ b/modules/nf-core/repeatmodeler/repeatmodeler/main.nf @@ -0,0 +1,54 @@ +process REPEATMODELER_REPEATMODELER { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/repeatmodeler:2.0.5--pl5321hdfd78af_0': + 'biocontainers/repeatmodeler:2.0.5--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(db) + + output: + tuple val(meta), path("*.fa") , emit: fasta + tuple val(meta), path("*.stk"), emit: stk + tuple val(meta), path("*.log"), emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def db_name = file(db[0]).getBaseName() + """ + RepeatModeler \\ + -database $db_name \\ + $args \\ + -threads $task.cpus + + mv ${db_name}-families.fa ${prefix}.fa + mv ${db_name}-families.stk ${prefix}.stk + mv ${db_name}-rmod.log ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + repeatmodeler: \$(RepeatModeler --version | sed 's/RepeatModeler version //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fa + touch ${prefix}.stk + touch ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + repeatmodeler: \$(RepeatModeler --version | sed 's/RepeatModeler version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/repeatmodeler/repeatmodeler/meta.yml b/modules/nf-core/repeatmodeler/repeatmodeler/meta.yml new file mode 100644 index 0000000..6693ae9 --- /dev/null +++ b/modules/nf-core/repeatmodeler/repeatmodeler/meta.yml @@ -0,0 +1,68 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "repeatmodeler_repeatmodeler" +description: Performs de novo transposable element (TE) family identification with + RepeatModeler +keywords: + - genomics + - fasta + - repeat + - transposable element +tools: + - "repeatmodeler": + description: "RepeatModeler is a de-novo repeat family identification and modeling + package." + homepage: "https://github.com/Dfam-consortium/RepeatModeler" + documentation: "https://github.com/Dfam-consortium/RepeatModeler" + tool_dev_url: "https://github.com/Dfam-consortium/RepeatModeler" + licence: ["Open Software License v2.1"] + identifier: biotools:repeatmodeler +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - db: + type: file + description: RepeatModeler database files generated with REPEATMODELER_BUILDDATABASE + pattern: "*" +output: + - fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.fa": + type: file + description: Consensus repeat sequences + pattern: "*.fa" + - stk: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.stk": + type: file + description: Seed alignments + pattern: "*.stk" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.log": + type: file + description: A summarized log of the run + pattern: "*.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/repeatmodeler/repeatmodeler/tests/main.nf.test b/modules/nf-core/repeatmodeler/repeatmodeler/tests/main.nf.test new file mode 100644 index 0000000..829e222 --- /dev/null +++ b/modules/nf-core/repeatmodeler/repeatmodeler/tests/main.nf.test @@ -0,0 +1,74 @@ +nextflow_process { + + name "Test Process REPEATMODELER_REPEATMODELER" + script "../main.nf" + process "REPEATMODELER_REPEATMODELER" + + tag "modules" + tag "modules_nfcore" + tag "repeatmodeler" + tag "repeatmodeler/repeatmodeler" + tag "repeatmodeler/builddatabase" + + test("homo_sapiens-genome_fasta") { + + setup { + run("REPEATMODELER_BUILDDATABASE") { + script "../../../../nf-core/repeatmodeler/builddatabase" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = REPEATMODELER_BUILDDATABASE.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fasta).match("fasta") }, + { assert snapshot(process.out.stk).match("stk") }, + { assert file(process.out.log[0][1]).text.contains('1 families discovered.') }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-genome_fasta-stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/repeatmodeler/repeatmodeler/tests/main.nf.test.snap b/modules/nf-core/repeatmodeler/repeatmodeler/tests/main.nf.test.snap new file mode 100644 index 0000000..e923952 --- /dev/null +++ b/modules/nf-core/repeatmodeler/repeatmodeler/tests/main.nf.test.snap @@ -0,0 +1,113 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,1bb6846ecf1304c262eaef4d3de60cf9" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-09T15:06:55.753492" + }, + "homo_sapiens-genome_fasta-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.stk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,1bb6846ecf1304c262eaef4d3de60cf9" + ], + "fasta": [ + [ + { + "id": "test" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stk": [ + [ + { + "id": "test" + }, + "test.stk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1bb6846ecf1304c262eaef4d3de60cf9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:16:41.45166" + }, + "stk": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.stk:md5,acd01ad35763c11315e2297a4f051d57" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-09T15:06:55.740963" + }, + "fasta": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.fa:md5,e25326771341204e1f8054d9529411e5" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-09T15:06:55.737658" + } +} \ No newline at end of file diff --git a/modules/nf-core/repeatmodeler/repeatmodeler/tests/tags.yml b/modules/nf-core/repeatmodeler/repeatmodeler/tests/tags.yml new file mode 100644 index 0000000..df65110 --- /dev/null +++ b/modules/nf-core/repeatmodeler/repeatmodeler/tests/tags.yml @@ -0,0 +1,2 @@ +repeatmodeler/repeatmodeler: + - "modules/nf-core/repeatmodeler/repeatmodeler/**" diff --git a/nextflow.config b/nextflow.config new file mode 100644 index 0000000..ccf5850 --- /dev/null +++ b/nextflow.config @@ -0,0 +1,101 @@ +process { + + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + withLabel:process_single { + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + } + withLabel:process_low { + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } + } + withLabel:process_medium { + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } + } + withLabel:process_high { + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 16.h * task.attempt } + } + withLabel:process_long { + time = { 20.h * task.attempt } + } + withLabel:process_high_memory { + memory = { 200.GB * task.attempt } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } +} + +// Max resources +process { + resourceLimits = [ + cpus: 12, + memory: '16.GB', + time: '1.hour' + ] +} + +profiles { + singularity { + singularity.enabled = true + singularity.autoMounts = true + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + } + conda { + conda.enabled = true + } + mamba { + conda.enabled = true + conda.useMamba = true + } + podman { + podman.enabled = true + podman.userEmulation = true + podman.runOptions = "--runtime crun --platform linux/x86_64 --systemd=always" + } + docker { + docker.enabled = true + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } + test { includeConfig './conf/test.config' } +} + +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +apptainer.registry = 'quay.io' + +// Increase time available to build Conda environment +conda { + createTimeout = "120 min" +} + +manifest { + name = 'jguhlin/EDTA' + description = 'Extensive de-novo TE Annotator on Nextflow' + author = 'Usman Rashid, Joseph Guhlin & Shujun Ou' + version = '0.1.0dev' + nextflowVersion = '!>=23.04.0' +} + +includeConfig 'conf/modules.config' diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..c3a9a1e --- /dev/null +++ b/nf-test.config @@ -0,0 +1,10 @@ +config { + testsDir "." + workDir System.getenv("NFT_WORKDIR") ?: ".nf-test" + configFile "test/nf-test/nextflow.config" + + plugins { + load "nft-bam@0.4.0" + load "nft-utils@0.0.3" + } +} diff --git a/subworkflows/gallvp/fasta_helitronscanner_scan_draw/main.nf b/subworkflows/gallvp/fasta_helitronscanner_scan_draw/main.nf new file mode 100644 index 0000000..40c2541 --- /dev/null +++ b/subworkflows/gallvp/fasta_helitronscanner_scan_draw/main.nf @@ -0,0 +1,86 @@ +include { HELITRONSCANNER_SCAN as HELITRONSCANNER_SCAN_HEAD } from '../../../modules/gallvp/helitronscanner/scan/main.nf' +include { HELITRONSCANNER_SCAN as HELITRONSCANNER_SCAN_TAIL } from '../../../modules/gallvp/helitronscanner/scan/main.nf' +include { HELITRONSCANNER_DRAW } from '../../../modules/gallvp/helitronscanner/draw/main.nf' + +include { HELITRONSCANNER_SCAN as HELITRONSCANNER_SCAN_HEAD_RC } from '../../../modules/gallvp/helitronscanner/scan/main.nf' +include { HELITRONSCANNER_SCAN as HELITRONSCANNER_SCAN_TAIL_RC } from '../../../modules/gallvp/helitronscanner/scan/main.nf' +include { HELITRONSCANNER_DRAW as HELITRONSCANNER_DRAW_RC } from '../../../modules/gallvp/helitronscanner/draw/main.nf' + +workflow FASTA_HELITRONSCANNER_SCAN_DRAW { + + take: + ch_fasta // channel: [ val(meta), fasta ] + + main: + + ch_versions = Channel.empty() + + // MODULE: HELITRONSCANNER_SCAN as HELITRONSCANNER_SCAN_HEAD + HELITRONSCANNER_SCAN_HEAD ( + ch_fasta, + 'head', // command + [], // lcv_filepath + 0 // buffer_size + ) + + ch_helitronscanner_scan_head = HELITRONSCANNER_SCAN_HEAD.out.scan + ch_versions = ch_versions.mix(HELITRONSCANNER_SCAN_HEAD.out.versions) + + // MODULE: HELITRONSCANNER_SCAN as HELITRONSCANNER_SCAN_TAIL + HELITRONSCANNER_SCAN_TAIL ( + ch_fasta, + 'tail', // command + [], // lcv_filepath + 0 // buffer_size + ) + + ch_helitronscanner_scan_tail = HELITRONSCANNER_SCAN_TAIL.out.scan + ch_versions = ch_versions.mix(HELITRONSCANNER_SCAN_TAIL.out.versions) + + // MODULE: HELITRONSCANNER_DRAW + HELITRONSCANNER_DRAW ( + ch_fasta, + ch_helitronscanner_scan_head, + ch_helitronscanner_scan_tail + ) + + ch_helitronscanner_draw = HELITRONSCANNER_DRAW.out.draw + ch_versions = ch_versions.mix(HELITRONSCANNER_DRAW.out.versions) + + // MODULE: HELITRONSCANNER_SCAN as HELITRONSCANNER_SCAN_HEAD_RC + HELITRONSCANNER_SCAN_HEAD_RC ( + ch_fasta, + 'head', // command + [], // lcv_filepath + 0 // buffer_size + ) + + ch_helitronscanner_scan_head_rc = HELITRONSCANNER_SCAN_HEAD_RC.out.scan + ch_versions = ch_versions.mix(HELITRONSCANNER_SCAN_HEAD_RC.out.versions) + + // MODULE: HELITRONSCANNER_SCAN as HELITRONSCANNER_SCAN_TAIL_RC + HELITRONSCANNER_SCAN_TAIL_RC ( + ch_fasta, + 'tail', // command + [], // lcv_filepath + 0 // buffer_size + ) + + ch_helitronscanner_scan_tail_rc = HELITRONSCANNER_SCAN_TAIL_RC.out.scan + ch_versions = ch_versions.mix(HELITRONSCANNER_SCAN_TAIL_RC.out.versions) + + // MODULE: HELITRONSCANNER_DRAW as HELITRONSCANNER_DRAW_RC + HELITRONSCANNER_DRAW_RC ( + ch_fasta, + ch_helitronscanner_scan_head_rc, + ch_helitronscanner_scan_tail_rc + ) + + ch_helitronscanner_draw_rc = HELITRONSCANNER_DRAW_RC.out.draw + ch_versions = ch_versions.mix(HELITRONSCANNER_DRAW_RC.out.versions) + + emit: + helitronscanner_draw = ch_helitronscanner_draw // channel: [ val(meta), draw ] + helitronscanner_draw_rc = ch_helitronscanner_draw_rc // channel: [ val(meta), rc.draw ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/gallvp/fasta_helitronscanner_scan_draw/meta.yml b/subworkflows/gallvp/fasta_helitronscanner_scan_draw/meta.yml new file mode 100644 index 0000000..62c4b39 --- /dev/null +++ b/subworkflows/gallvp/fasta_helitronscanner_scan_draw/meta.yml @@ -0,0 +1,45 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fasta_helitronscanner_scan_draw" +description: Find helitrons suing Helitronscanner scan, pairends and draw subcommands +keywords: + - helitron + - scan + - draw + - pairends + - repeat + - genomics + - fasta +components: + - helitronscanner/scan + - helitronscanner/draw +input: + - ch_fasta: + type: file + description: | + Genome fasta + Structure: [ val(meta), fasta ] + pattern: "*.fasta" +output: + - helitronscanner_draw: + type: file + description: | + Helitronscanner draw file + Structure: [ val(meta), draw ] + pattern: "*.draw" + - helitronscanner_draw_rc: + type: file + description: | + Helitronscanner reverse complement draw file + Structure: [ val(meta), rc.draw ] + pattern: "*.rc.draw" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" + - "@jguhlin" diff --git a/subworkflows/gallvp/fasta_helitronscanner_scan_draw/tests/main.nf.test b/subworkflows/gallvp/fasta_helitronscanner_scan_draw/tests/main.nf.test new file mode 100644 index 0000000..7a9bd05 --- /dev/null +++ b/subworkflows/gallvp/fasta_helitronscanner_scan_draw/tests/main.nf.test @@ -0,0 +1,97 @@ +nextflow_workflow { + + name "Test Subworkflow FASTA_HELITRONSCANNER_SCAN_DRAW" + script "../main.nf" + workflow "FASTA_HELITRONSCANNER_SCAN_DRAW" + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_gallvp" + tag "subworkflows/fasta_helitronscanner_scan_draw" + + tag "helitronscanner/draw" + tag "helitronscanner/scan" + tag "gunzip" + + setup { + run('GUNZIP') { + script "../../../../modules/gallvp/gunzip/main.nf" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + } + + test("actinidia_chinensis - genome_1_fasta_gz") { + + when { + workflow { + """ + input[0] = GUNZIP.out.gunzip + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + } + + test("sarscov2 - genome") { + + when { + workflow { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.helitronscanner_draw }, // empty + { assert workflow.out.helitronscanner_draw_rc }, // empty + { assert snapshot( + workflow.out.versions + ).match() + } + ) + } + + } + + test("stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = GUNZIP.out.gunzip + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + } +} diff --git a/subworkflows/gallvp/fasta_helitronscanner_scan_draw/tests/main.nf.test.snap b/subworkflows/gallvp/fasta_helitronscanner_scan_draw/tests/main.nf.test.snap new file mode 100644 index 0000000..f6b8077 --- /dev/null +++ b/subworkflows/gallvp/fasta_helitronscanner_scan_draw/tests/main.nf.test.snap @@ -0,0 +1,137 @@ +{ + "actinidia_chinensis - genome_1_fasta_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.draw:md5,434aaaa70c294464e6bae93b2895b13c" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.rc.draw:md5,7aeed0fdf50f8cb2516b2e92f456835e" + ] + ], + "2": [ + "versions.yml:md5,42ab90da7ef86125b9dec685f4f08e84", + "versions.yml:md5,9b6a0f2f56d89451cf16702e2f358520", + "versions.yml:md5,a928ae7c94a23e5c62c27915dada660c", + "versions.yml:md5,c417497b1caea1cd51f8440df92daa40", + "versions.yml:md5,d786d80e914a86ce0361cfc0bc62cda9", + "versions.yml:md5,f3b6aa20e81f04fd731fad64f7ea3c14" + ], + "helitronscanner_draw": [ + [ + { + "id": "test" + }, + "test.draw:md5,434aaaa70c294464e6bae93b2895b13c" + ] + ], + "helitronscanner_draw_rc": [ + [ + { + "id": "test" + }, + "test.rc.draw:md5,7aeed0fdf50f8cb2516b2e92f456835e" + ] + ], + "versions": [ + "versions.yml:md5,42ab90da7ef86125b9dec685f4f08e84", + "versions.yml:md5,9b6a0f2f56d89451cf16702e2f358520", + "versions.yml:md5,a928ae7c94a23e5c62c27915dada660c", + "versions.yml:md5,c417497b1caea1cd51f8440df92daa40", + "versions.yml:md5,d786d80e914a86ce0361cfc0bc62cda9", + "versions.yml:md5,f3b6aa20e81f04fd731fad64f7ea3c14" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-11-21T11:53:51.717511" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.draw:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.rc.draw:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,42ab90da7ef86125b9dec685f4f08e84", + "versions.yml:md5,9b6a0f2f56d89451cf16702e2f358520", + "versions.yml:md5,a928ae7c94a23e5c62c27915dada660c", + "versions.yml:md5,c417497b1caea1cd51f8440df92daa40", + "versions.yml:md5,d786d80e914a86ce0361cfc0bc62cda9", + "versions.yml:md5,f3b6aa20e81f04fd731fad64f7ea3c14" + ], + "helitronscanner_draw": [ + [ + { + "id": "test" + }, + "test.draw:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "helitronscanner_draw_rc": [ + [ + { + "id": "test" + }, + "test.rc.draw:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,42ab90da7ef86125b9dec685f4f08e84", + "versions.yml:md5,9b6a0f2f56d89451cf16702e2f358520", + "versions.yml:md5,a928ae7c94a23e5c62c27915dada660c", + "versions.yml:md5,c417497b1caea1cd51f8440df92daa40", + "versions.yml:md5,d786d80e914a86ce0361cfc0bc62cda9", + "versions.yml:md5,f3b6aa20e81f04fd731fad64f7ea3c14" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-11-21T11:54:17.824589" + }, + "sarscov2 - genome": { + "content": [ + [ + "versions.yml:md5,42ab90da7ef86125b9dec685f4f08e84", + "versions.yml:md5,9b6a0f2f56d89451cf16702e2f358520", + "versions.yml:md5,a928ae7c94a23e5c62c27915dada660c", + "versions.yml:md5,c417497b1caea1cd51f8440df92daa40", + "versions.yml:md5,d786d80e914a86ce0361cfc0bc62cda9", + "versions.yml:md5,f3b6aa20e81f04fd731fad64f7ea3c14" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-11-21T12:03:43.135817" + } +} \ No newline at end of file diff --git a/subworkflows/gallvp/fasta_helitronscanner_scan_draw/tests/nextflow.config b/subworkflows/gallvp/fasta_helitronscanner_scan_draw/tests/nextflow.config new file mode 100644 index 0000000..b98d370 --- /dev/null +++ b/subworkflows/gallvp/fasta_helitronscanner_scan_draw/tests/nextflow.config @@ -0,0 +1,20 @@ +process { + withName: 'HELITRONSCANNER_DRAW' { + ext.args = '-pure_helitron' + } + + withName: 'HELITRONSCANNER_SCAN_HEAD_RC' { + ext.prefix = { "${meta.id}.rc" } + ext.args = '--rc' + } + + withName: 'HELITRONSCANNER_SCAN_TAIL_RC' { + ext.prefix = { "${meta.id}.rc" } + ext.args = '--rc' + } + + withName: 'HELITRONSCANNER_DRAW_RC' { + ext.prefix = { "${meta.id}.rc" } + ext.args = '-pure_helitron' + } +} diff --git a/test/nf-test/nextflow.config b/test/nf-test/nextflow.config new file mode 100644 index 0000000..ed1a805 --- /dev/null +++ b/test/nf-test/nextflow.config @@ -0,0 +1,22 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ + +params { + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' +} + +timeline { enabled = false } +report { enabled = false } +trace { enabled = false } +dag { enabled = false } + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} diff --git a/test/nf-test/small/main.nf.test b/test/nf-test/small/main.nf.test new file mode 100644 index 0000000..7d40a02 --- /dev/null +++ b/test/nf-test/small/main.nf.test @@ -0,0 +1,32 @@ +nextflow_pipeline { + + name "Test with a small genome" + script "main.nf" + + test("small genome") { + + when { + params { + genomes = "https://raw.githubusercontent.com/jguhlin/EDTA/a2cd9a0777e4ac6e39545bacc3e752f94eb2f389/test/genome.fa" + outdir = "$outputDir" + } + } + + then { + def stable_path = getAllFilesFromDir(params.outdir, false, ['pipeline_info/*.{html,json,txt,yml}'], null, ['**']) + + assertAll( + { assert workflow.success}, + { assert snapshot( + [ + 'successful tasks': workflow.trace.succeeded().size(), + 'versions': removeNextflowVersion("$outputDir/pipeline_info/software_versions.yml"), + 'stable paths': stable_path + ] + ).match() } + ) + } + + } + +} diff --git a/test/nf-test/small/main.nf.test.snap b/test/nf-test/small/main.nf.test.snap new file mode 100644 index 0000000..9154059 --- /dev/null +++ b/test/nf-test/small/main.nf.test.snap @@ -0,0 +1,66 @@ +{ + "small genome": { + "content": [ + { + "successful tasks": 15, + "versions": { + "ANNOSINE": { + "annosine": "2.0.7" + }, + "CAT_CAT": { + "pigz": "2.3.4" + }, + "HELITRONSCANNER_DRAW": { + "helitronscanner": "V1.1" + }, + "HELITRONSCANNER_DRAW_RC": { + "helitronscanner": "V1.1" + }, + "HELITRONSCANNER_SCAN_HEAD": { + "helitronscanner": "V1.1" + }, + "HELITRONSCANNER_SCAN_HEAD_RC": { + "helitronscanner": "V1.1" + }, + "HELITRONSCANNER_SCAN_TAIL": { + "helitronscanner": "V1.1" + }, + "HELITRONSCANNER_SCAN_TAIL_RC": { + "helitronscanner": "V1.1" + }, + "LTRFINDER": { + "LTR_FINDER_parallel": "v1.1", + "ltr_finder": "v1.07" + }, + "LTRHARVEST": { + "LTR_HARVEST_parallel": "v1.1", + "genometools": "1.6.5" + }, + "LTRRETRIEVER_LTRRETRIEVER": { + "LTR_retriever": "v2.9.9" + }, + "REPEATMODELER_BUILDDATABASE": { + "repeatmodeler": "2.0.5" + }, + "REPEATMODELER_REPEATMODELER": { + "repeatmodeler": "2.0.5" + }, + "TIRLEARNER": { + "TIR-Learner": "v3.0.2 by Tianyu (Sky) Lu (tlu83@wisc.edu) published under GPLv3" + }, + "Workflow": { + "jguhlin/EDTA": "v0.1.0dev" + } + }, + "stable paths": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T22:38:20.031297" + } +} \ No newline at end of file diff --git a/test/nf-test/tiny/main.nf.test b/test/nf-test/tiny/main.nf.test new file mode 100644 index 0000000..5022c49 --- /dev/null +++ b/test/nf-test/tiny/main.nf.test @@ -0,0 +1,32 @@ +nextflow_pipeline { + + name "Test with a tiny genome" + script "main.nf" + + test("tiny genome") { + + when { + params { + genomes = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta" + outdir = "$outputDir" + } + } + + then { + def stable_path = getAllFilesFromDir(params.outdir, false, ['pipeline_info/*.{html,json,txt,yml}'], null, ['**']) + + assertAll( + { assert workflow.success}, + { assert snapshot( + [ + 'successful tasks': workflow.trace.succeeded().size(), + 'versions': removeNextflowVersion("$outputDir/pipeline_info/software_versions.yml"), + 'stable paths': stable_path + ] + ).match() } + ) + } + + } + +} diff --git a/test/nf-test/tiny/main.nf.test.snap b/test/nf-test/tiny/main.nf.test.snap new file mode 100644 index 0000000..946ed5d --- /dev/null +++ b/test/nf-test/tiny/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "tiny genome": { + "content": [ + { + "successful tasks": 13, + "versions": { + "ANNOSINE": { + "annosine": "2.0.7" + }, + "CAT_CAT": { + "pigz": "2.3.4" + }, + "HELITRONSCANNER_DRAW": { + "helitronscanner": "V1.1" + }, + "HELITRONSCANNER_DRAW_RC": { + "helitronscanner": "V1.1" + }, + "HELITRONSCANNER_SCAN_HEAD": { + "helitronscanner": "V1.1" + }, + "HELITRONSCANNER_SCAN_HEAD_RC": { + "helitronscanner": "V1.1" + }, + "HELITRONSCANNER_SCAN_TAIL": { + "helitronscanner": "V1.1" + }, + "HELITRONSCANNER_SCAN_TAIL_RC": { + "helitronscanner": "V1.1" + }, + "LTRFINDER": { + "LTR_FINDER_parallel": "v1.1", + "ltr_finder": "v1.07" + }, + "LTRHARVEST": { + "LTR_HARVEST_parallel": "v1.1", + "genometools": "1.6.5" + }, + "LTRRETRIEVER_LTRRETRIEVER": { + "LTR_retriever": "v2.9.9" + }, + "TIRLEARNER": { + "TIR-Learner": "v3.0.2 by Tianyu (Sky) Lu (tlu83@wisc.edu) published under GPLv3" + }, + "Workflow": { + "jguhlin/EDTA": "v0.1.0dev" + } + }, + "stable paths": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T22:35:05.489004" + } +} \ No newline at end of file diff --git a/workflows/edta.nf b/workflows/edta.nf new file mode 100644 index 0000000..cf99f9f --- /dev/null +++ b/workflows/edta.nf @@ -0,0 +1,152 @@ +include { SANITIZE_HEADERS } from '../modules/local/sanitize/main.nf' +include { LTRHARVEST } from '../modules/nf-core/ltrharvest/main.nf' +include { LTRFINDER } from '../modules/nf-core/ltrfinder/main.nf' +include { CAT_CAT } from '../modules/nf-core/cat/cat/main.nf' +include { LTRRETRIEVER_LTRRETRIEVER } from '../modules/nf-core/ltrretriever/ltrretriever/main.nf' +include { TIRLEARNER } from '../modules/gallvp/tirlearner/main.nf' +include { ANNOSINE } from '../modules/gallvp/annosine/main.nf' +include { REPEATMODELER_BUILDDATABASE } from '../modules/nf-core/repeatmodeler/builddatabase/main.nf' +include { REPEATMODELER_REPEATMODELER } from '../modules/nf-core/repeatmodeler/repeatmodeler/main.nf' +include { FASTA_HELITRONSCANNER_SCAN_DRAW } from '../subworkflows/gallvp/fasta_helitronscanner_scan_draw/main.nf' + +include { softwareVersionsToYAML } from '../modules/local/utils/main.nf' + +workflow EDTA { + + // Versions channel + ch_versions = Channel.empty() + + + ch_genome = Channel.fromPath(params.genomes) + + // Create a meta object for each genome + ch_meta_genome = ch_genome.map { genome -> + def meta = [:] + meta.id = genome.baseName + + [ meta, genome ] + } + + // MODULE: SANITIZE_HEADERS + SANITIZE_HEADERS ( ch_meta_genome ) + + ch_sanitized_fasta = SANITIZE_HEADERS.out.fasta + + // MODULE: LTRHARVEST + LTRHARVEST ( ch_sanitized_fasta ) + + ch_ltrharvest_gff3 = LTRHARVEST.out.gff3 + ch_ltrharvest_scn = LTRHARVEST.out.scn + + ch_versions = ch_versions.mix(LTRHARVEST.out.versions) + + // MODULE: LTRFINDER + LTRFINDER { ch_sanitized_fasta } + + ch_ltrfinder_gff3 = LTRFINDER.out.gff + ch_ltrfinder_scn = LTRFINDER.out.scn + + ch_versions = ch_versions.mix(LTRFINDER.out.versions) + + // MODULE: CAT_CAT + ch_cat_cat_inputs = ch_ltrharvest_scn + | join(ch_ltrfinder_scn) + | map { meta, harvested, found -> [ meta, [ harvested, found ] ] } + CAT_CAT ( ch_cat_cat_inputs ) + + ch_ltr_candidates = CAT_CAT.out.file_out + ch_versions = ch_versions.mix(CAT_CAT.out.versions.first()) + + // MODULE: LTRRETRIEVER_LTRRETRIEVER + ch_ltrretriever_inputs = ch_sanitized_fasta.join(ch_ltr_candidates) + + LTRRETRIEVER_LTRRETRIEVER ( + ch_ltrretriever_inputs.map { meta, fasta, ltr -> [ meta, fasta ] }, + ch_ltrretriever_inputs.map { meta, fasta, ltr -> ltr }, + [], + [], + [] + ) + + ch_ltrretriever_log = LTRRETRIEVER_LTRRETRIEVER.out.log + ch_pass_list = LTRRETRIEVER_LTRRETRIEVER.out.pass_list + ch_annotation_out = LTRRETRIEVER_LTRRETRIEVER.out.annotation_out + ch_annotation_gff = LTRRETRIEVER_LTRRETRIEVER.out.annotation_gff + ch_ltrlib = LTRRETRIEVER_LTRRETRIEVER.out.ltrlib + ch_versions = ch_versions.mix(LTRRETRIEVER_LTRRETRIEVER.out.versions.first()) + + // MODULE: TIRLEARNER + TIRLEARNER ( + ch_sanitized_fasta, + params.species + ) + + ch_tirlearner_filtered_gff = TIRLEARNER.out.filtered_gff + ch_versions = ch_versions.mix(TIRLEARNER.out.versions.first()) + + // These can also run in parallel + // MODULE: ANNOSINE + ANNOSINE ( + ch_sanitized_fasta, + 3 // mode + ) + + // Currently it's a topic, so need to fix that + ch_versions = ch_versions.mix(ANNOSINE.out.versions) + cb_annosine_seed_sine = ANNOSINE.out.fa + + // MODULE: REPEATMODELER_BUILDDATABASE + ch_repeatmodeler_inputs = ch_sanitized_fasta + | map { meta, fasta -> + def size = fasta.size() + def size_threshold = 100_000 // bytes -> bp + + // TODO: Not the best way to set a size threshould + // but it is simple + // This is needed to avoid, + // Error: Database genome is not large enough ( minimum 40000 bp ) to process with RepeatModeler. + if ( size < size_threshold ) { + log.warn "RepeatModeler is skipped for genome '${meta.id}' as it is smaller than ${size_threshold} bytes" + return null + } + + return [ meta, fasta ] + } + | filter { it } + + REPEATMODELER_BUILDDATABASE ( ch_repeatmodeler_inputs ) + + ch_repeatmodeler_db = REPEATMODELER_BUILDDATABASE.out.db + ch_versions = ch_versions.mix(REPEATMODELER_BUILDDATABASE.out.versions.first()) + + // MODULE: REPEATMODELER_REPEATMODELER + REPEATMODELER_REPEATMODELER ( ch_repeatmodeler_db ) + + ch_repeatmodeler_fasta = REPEATMODELER_REPEATMODELER.out.fasta + ch_versions = ch_versions.mix(REPEATMODELER_REPEATMODELER.out.versions.first()) + + // MODULE: FASTA_HELITRONSCANNER_SCAN_DRAW + FASTA_HELITRONSCANNER_SCAN_DRAW ( ch_sanitized_fasta ) + + ch_helitronscanner_draw = FASTA_HELITRONSCANNER_SCAN_DRAW.out.helitronscanner_draw + ch_helitronscanner_draw_rc = FASTA_HELITRONSCANNER_SCAN_DRAW.out.helitronscanner_draw_rc + ch_versions = ch_versions.mix(FASTA_HELITRONSCANNER_SCAN_DRAW.out.versions) + + + // Function: Save versions + ch_versions = ch_versions + | unique + | map { yml -> + if ( yml ) { yml } + } + + ch_versions_yml = softwareVersionsToYAML(ch_versions) + | collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'software_versions.yml', + sort: true, + newLine: true, + cache: false + ) + +} \ No newline at end of file