Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented MSISENSORPRO tumor-only analysis #1737

Open
wants to merge 2 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions conf/modules/msisensorpro.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,14 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'MSISENSORPRO_MSITUMORONLY' {
ext.args = { params.wes ? '-c 20' : '-c 15' } // default values by MSIsensorpro
ext.prefix = { "${meta.id}.tumor_only" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/msisensorpro/${meta.id}/" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
}
17 changes: 15 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nf
include { PREPARE_GENOME } from './subworkflows/local/prepare_genome'
include { PREPARE_INTERVALS } from './subworkflows/local/prepare_intervals'
include { PREPARE_REFERENCE_CNVKIT } from './subworkflows/local/prepare_reference_cnvkit'
include { MSISENSORPRO_SCAN } from './modules/nf-core/msisensorpro/scan/main'

// Initialize fasta file with meta map:
fasta = params.fasta ? Channel.fromPath(params.fasta).map{ it -> [ [id:it.baseName], it ] }.collect() : Channel.empty()
Expand All @@ -90,6 +91,7 @@ germline_resource = params.germline_resource ? Channel.fromPath(para
known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([])
known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([])
mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([])
msisensorpro_baseline = params.msisensorpro_baseline ? Channel.fromPath(params.msisensorpro_baseline).collect() : Channel.empty()
pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended)
sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model).collect() : Channel.value([])

Expand Down Expand Up @@ -165,8 +167,18 @@ workflow NFCORE_SAREK {
aligner == "bwa-mem2" ? bwamem2 :
dragmap

// TODO: add a params for msisensorpro_scan
msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan
// Reference msi list for MSIsensorpro
if (params.tools && params.tools.split(',').contains('msisensorpro')) {
if (params.msisensorpro_scan) {
msisensorpro_scan = Channel.fromPath(params.msisensorpro_scan).collect()
} else {
MSISENSORPRO_SCAN(fasta)
msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] }
versions = versions.mix(MSISENSORPRO_SCAN.out.versions)
Comment on lines +175 to +177
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this something we can put in the PREPARE* subworkflows together with other reference computations for tools?

Copy link
Contributor Author

@bounlu bounlu Nov 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did that first, but it was being generated unnecessarily if msi analysis was not requested. I couldn't find a way to avoid that in the PREPARE* subworkflows so left it like this.

}
} else {
msisensorpro_scan = Channel.empty()
}

// For ASCAT, extracted from zip or tar.gz files
allele_files = PREPARE_GENOME.out.allele_files
Expand Down Expand Up @@ -294,6 +306,7 @@ workflow NFCORE_SAREK {
loci_files,
mappability,
msisensorpro_scan,
msisensorpro_baseline,
ngscheckmate_bed,
pon,
pon_tbi,
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/msisensorpro/msisomatic/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions modules/nf-core/msisensorpro/msitumoronly/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

46 changes: 46 additions & 0 deletions modules/nf-core/msisensorpro/msitumoronly/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

68 changes: 68 additions & 0 deletions modules/nf-core/msisensorpro/msitumoronly/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ params {
ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2
joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected
joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling
msisensorpro_scan = null // by default the reference is build from the fasta file
msisensorpro_baseline = null // by default tumor-only mode is not used in MSIsensorpro
only_paired_variant_calling = false // if true, skips germline variant calling for normal-paired sample
sentieon_dnascope_emit_mode = 'variant' // default value for Sentieon dnascope
sentieon_dnascope_pcr_indel_model = 'CONSERVATIVE'
Expand Down
10 changes: 10 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,16 @@
"description": "Path to Control-FREEC mappability file.",
"help_text": "If you use AWS iGenomes, this has already been set for you appropriately."
},
"msisensorpro_scan": {
"type": "string",
"fa_icon": "fas fa-file-alt",
"description": "Path to MSIsensorpro reference genome microsatellites information file."
},
"msisensorpro_baseline": {
"type": "string",
"fa_icon": "fas fa-file-alt",
"description": "Path to MSIsensorpro custom baseline file for tumor-only analysis."
},
"ngscheckmate_bed": {
"type": "string",
"fa_icon": "fas fa-file",
Expand Down
9 changes: 3 additions & 6 deletions subworkflows/local/bam_variant_calling_somatic_all/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,12 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL {
wes // boolean: [mandatory] [default: false] whether targeted data is processed

main:
versions = Channel.empty()
versions = Channel.empty()

//TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config
vcf_freebayes = Channel.empty()
vcf_manta = Channel.empty()
vcf_strelka = Channel.empty()
out_msisensorpro = Channel.empty()
vcf_mutect2 = Channel.empty()
vcf_tiddit = Channel.empty()
out_indexcov = Channel.empty()
Expand Down Expand Up @@ -191,11 +190,10 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL {
}

// MSISENSOR
if (tools.split(',').contains('msisensorpro')) {
if (tools.split(',').contains('msisensorpro') && msisensorpro_scan) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should not be strictly necessary since the empty channel should skip the tool

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember I tried without it but it failed for some reason that I can't remember now so I think it was necessary.

MSISENSORPRO_MSISOMATIC(cram.combine(intervals_bed_combined), fasta.map{ meta, fasta -> [ fasta ] }, msisensorpro_scan)

versions = versions.mix(MSISENSORPRO_MSISOMATIC.out.versions)
out_msisensorpro = out_msisensorpro.mix(MSISENSORPRO_MSISOMATIC.out.output_report)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason you are not emitting the output of the workflow anymore?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not used anywhere else, just published in the output directory. MultiQC does not support msisensorpro yet.

}

// MUTECT2
Expand All @@ -206,7 +204,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL {
// joint_mutect2 mode needs different meta.map than regular mode
cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai ->
joint_mutect2 ?
//we need to keep all fields and then remove on a per-tool-basis to ensure proper joining at the filtering step
// we need to keep all fields and then remove on a per-tool-basis to ensure proper joining at the filtering step
[ meta + [ id:meta.patient ], [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] :
[ meta, [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ]
},
Expand Down Expand Up @@ -249,7 +247,6 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL {

emit:
out_indexcov
out_msisensorpro
vcf_all
vcf_freebayes
vcf_manta
Expand Down
13 changes: 11 additions & 2 deletions subworkflows/local/bam_variant_calling_tumor_only_all/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ include { BAM_VARIANT_CALLING_TUMOR_ONLY_CONTROLFREEC } from '../bam_variant_cal
include { BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA } from '../bam_variant_calling_tumor_only_manta/main'
include { BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 } from '../bam_variant_calling_tumor_only_mutect2/main'
include { BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ } from '../bam_variant_calling_tumor_only_lofreq/main'
include { MSISENSORPRO_MSITUMORONLY } from '../../../modules/nf-core/msisensorpro/msitumoronly/main'

workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL {
take:
Expand All @@ -32,15 +33,16 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL {
intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped
intervals_bed_gz_tbi_combined // channel: [mandatory] intervals/target regions in one file zipped
mappability
msisensorpro_baseline // channel: [optional] msisensorpro_baseline
panel_of_normals // channel: [optional] panel_of_normals
panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi
joint_mutect2 // boolean: [mandatory] [default: false] run mutect2 in joint mode
wes // boolean: [mandatory] [default: false] whether targeted data is processed

main:
versions = Channel.empty()
versions = Channel.empty()

//TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config
// TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config
vcf_freebayes = Channel.empty()
vcf_manta = Channel.empty()
vcf_mpileup = Channel.empty()
Expand Down Expand Up @@ -109,6 +111,13 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL {
versions = versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions)
}

// MSISENSOR
if (tools.split(',').contains('msisensorpro') && msisensorpro_baseline) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

similar here, if the baseline is require than the tool shouldn't run on an empty channel

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember I tried without it but it failed for some reason that I can't remember now so I think it was necessary.

MSISENSORPRO_MSITUMORONLY(cram.combine(intervals_bed_combined), fasta.map{ meta, fasta -> [ fasta ] }, msisensorpro_baseline)

versions = versions.mix(MSISENSORPRO_MSITUMORONLY.out.versions)
}

// MUTECT2
if (tools.split(',').contains('mutect2')) {
BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2(
Expand Down
4 changes: 0 additions & 4 deletions subworkflows/local/prepare_genome/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ include { BWA_INDEX as BWAMEM1_INDEX } from '../../../modules/nf-
include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index/main'
include { DRAGMAP_HASHTABLE } from '../../../modules/nf-core/dragmap/hashtable/main'
include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main'
include { MSISENSORPRO_SCAN } from '../../../modules/nf-core/msisensorpro/scan/main'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the reason to move this out?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved it to main workflow file main.nf as it was unnecessarily generated if msisensorpro was not requested. As now I made msisensorpro_scan and msisensorpro_baseline optional params.

include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main'
include { TABIX_TABIX as TABIX_BCFTOOLS_ANNOTATIONS } from '../../../modules/nf-core/tabix/tabix/main'
include { TABIX_TABIX as TABIX_DBSNP } from '../../../modules/nf-core/tabix/tabix/main'
Expand Down Expand Up @@ -50,7 +49,6 @@ workflow PREPARE_GENOME {
DRAGMAP_HASHTABLE(fasta) // If aligner is dragmap

GATK4_CREATESEQUENCEDICTIONARY(fasta)
MSISENSORPRO_SCAN(fasta)
SAMTOOLS_FAIDX(fasta, [ [ id:'no_fai' ], [] ] )

// the following are flattened and mapped in case the user supplies more than one value for the param
Expand Down Expand Up @@ -105,7 +103,6 @@ workflow PREPARE_GENOME {
versions = versions.mix(BWAMEM2_INDEX.out.versions)
versions = versions.mix(DRAGMAP_HASHTABLE.out.versions)
versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions)
versions = versions.mix(MSISENSORPRO_SCAN.out.versions)
versions = versions.mix(SAMTOOLS_FAIDX.out.versions)
versions = versions.mix(TABIX_BCFTOOLS_ANNOTATIONS.out.versions)
versions = versions.mix(TABIX_DBSNP.out.versions)
Expand All @@ -125,7 +122,6 @@ workflow PREPARE_GENOME {
germline_resource_tbi = TABIX_GERMLINE_RESOURCE.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: germline_resource.vcf.gz.tbi
known_snps_tbi = TABIX_KNOWN_SNPS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi
known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi
msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] } // path: genome_msi.list
pon_tbi = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: pon.vcf.gz.tbi

allele_files // path: allele_files
Expand Down
4 changes: 2 additions & 2 deletions subworkflows/local/samplesheet_to_channel/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ workflow SAMPLESHEET_TO_CHANNEL{
}

input_sample.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples
def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro']
def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller']
def requested_tools_requiring_normal_samples = []
tools_requiring_normal_samples.each{ tool_requiring_normal_samples ->
if (tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples)
Expand All @@ -166,7 +166,7 @@ workflow SAMPLESHEET_TO_CHANNEL{
}
}

// Fails when wrongfull extension for intervals file
// Fails when wrongful extension for intervals file
if (wes && !step == 'annotate') {
if (intervals && !intervals.endsWith("bed")) error("Target file specified with `--intervals` must be in BED format for targeted data")
else log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.")
Expand Down
2 changes: 2 additions & 0 deletions subworkflows/local/utils_nfcore_sarek_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ workflow PIPELINE_INITIALISATION {
params.known_snps,
params.known_snps_tbi,
params.mappability,
params.msisensorpro_scan,
params.msisensorpro_baseline,
params.multiqc_config,
params.ngscheckmate_bed,
params.pon,
Expand Down
2 changes: 2 additions & 0 deletions workflows/sarek/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ workflow SAREK {
loci_files
mappability
msisensorpro_scan
msisensorpro_baseline
ngscheckmate_bed
pon
pon_tbi
Expand Down Expand Up @@ -740,6 +741,7 @@ workflow SAREK {
intervals_bed_combined,
intervals_bed_gz_tbi_combined, // [] if no_intervals, else interval_bed_combined_gz, interval_bed_combined_gz_tbi
mappability,
msisensorpro_baseline,
pon,
pon_tbi,
params.joint_mutect2,
Expand Down
Loading