diff --git a/README.md b/README.md
index 38ae4c5..42841ca 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,42 @@
-# Quality control workflows collection for Roddy
+# Alignment and Quality Control Plugin for Roddy
-This plugins contains a series of alignment and quality control related workflows:
-- PanCancer alignment workflow for whole genome and exome
-- Postmerge QC workflow for whole genome and exome
-- Bisulfite core workflow
+This plugins contains alignment and quality control related [Roddy](https://github.com/TheRoddyWMS/Roddy) workflows.
+This is the branch for 1.2.73 maintenance branch with only minimal documentation. Please see the master branch for details.
+## Read-Reordering with Unidirectional WGBS Data
+
+By setting `reorderUnidirectionalWGBSReadPairs` the read-reordering script will be run that decides based on the relative frequencies of TC and AG dinucleotides in both reads, what is the most likely correct orientations of the reads, and may swap the two reads.
+
+Note that after the swapping, the read-numbers are reversed. What was R1 in the input FASTQ will be R2 in the output BAM, and vice versa.
+
+Furthermore, not all reads can be unambiguously classified. These unclassified reads are currently dropped.
+
+The original script with a documentation of the underlying ideas can be found [here](https://github.com/cimbusch/TWGBS.git).
+
+## Change Logs
+
+* 1.2.73-3 (branch-specific change)
+ - Updated unidirectional WGBS read-reordering script from [here](https://github.com/cimbusch/TWGBS.git)
+ - Actually include the WGBS read-reordering script
+
+* 1.2.73-2 (branch-specific changes)
+ - Improved error checking and reporting for BWA and surrounding pipe
+
+* 1.2.73-1 (branch-specific changes)
+ - Lifted to Roddy 3.0 release (official LSF-capable release)
+ - Bugfix with wrong Bash function export
+
+* 1.2.73
+ - Lifted 1.1.73 to Roddy 2.4 (development-only release)
+ - Fingerprinting support also for WGBS
+ - sambamba 0.5.9 for sorting and viewing BAMS
+ - BAM termination sequence check
+
+* 1.1.73
+ - Bugfix mergeOnly step WGBS
+ - Substituted sambamba-based compression by samtools compression for improved stability, time, and memory consumption
+ - Tuning (tee -> mbuffer)
+ - Node-local scratch by default
+ - Fingerprinting (not for WGBS, yet)
+ - Bugfix affecting CLIP_INDEX in configuration
diff --git a/resources/analysisTools/bisulfiteWorkflow/WGBS_read_pair_reconstruction.py b/resources/analysisTools/bisulfiteWorkflow/WGBS_read_pair_reconstruction.py
index cc942e9..f557825 100755
--- a/resources/analysisTools/bisulfiteWorkflow/WGBS_read_pair_reconstruction.py
+++ b/resources/analysisTools/bisulfiteWorkflow/WGBS_read_pair_reconstruction.py
@@ -1,4 +1,10 @@
-#!/usr/bin/python
+#!/usr/bin/env python
+#
+# Copyright (c) 2018 German Cancer Research Center (DKFZ).
+#
+# Distributed under the MIT License (license terms are at https://github.com/cimbusch/TWGBS).
+# Commit: a202ae5d9b19c46b348c58523a17a4a7c66d116f
+#
"""
Code to reconstruct correct R1-R2 reads relation from base ratio:
@@ -8,22 +14,21 @@
read pairs which don't fall into these categories are ignored
Charles Imbusch (c.imbusch@dkfz.de), G200
-
"""
import gzip
import sys
import argparse
-parser = argparse.ArgumentParser(description='Program trying to reconstruct correct R1-R2 reads relation from base ratio in WGBS data.')
+parser = argparse.ArgumentParser(description='Alignment-free program to reconstruct correct R1-R2 reads relation from T/C and A/G base ratio in TWGBS data.')
parser.add_argument('--R1_in', help='R1 input file', required=True)
parser.add_argument('--R2_in', help='R2 input file', required=True)
-parser.add_argument('--gzip_input', help='Set if input fastq files are compressed', action='store_true', required=False, default=False)
+parser.add_argument('--input_ascii', help='Set if input fastq files are *not* compressed', action='store_true', required=False, default=False)
parser.add_argument('--R1_out', help='R1 output file', required=True)
parser.add_argument('--R2_out', help='R2 output file', required=True)
parser.add_argument('--R1_unassigned', help='Filename R1 for unassigned read pairs', required=True)
parser.add_argument('--R2_unassigned', help='Filename R2 for unassigned read pairs', required=True)
-parser.add_argument('--gzip_output', help='Set if output fastq files should be compressed', action='store_true', required=False, default=False)
+parser.add_argument('--output_ascii', help='Set if output fastq files should be *not* compressed', action='store_true', required=False, default=False)
parser.add_argument('--log', help='Write basic statistics to this file', required=False, default=False)
parser.add_argument('--debug', help='Debug and write more output to console', action='store_true', required=False, default=False)
@@ -42,15 +47,15 @@
R2_unassigned = None
# file handlers for input files
-if args_dict['gzip_input'] == True:
- f = open(args_dict['R1_in'], 'r')
- f2 = open(args_dict['R2_in'], 'r')
-else:
+if args_dict['input_ascii'] == False:
f = gzip.open(args_dict['R1_in'], 'r')
f2 = gzip.open(args_dict['R2_in'], 'r')
+else:
+ f = open(args_dict['R1_in'], 'r')
+ f2 = open(args_dict['R2_in'], 'r')
# file handlers for output files
-if args_dict['gzip_output'] == True:
+if args_dict['output_ascii'] == False:
f_R1_out = gzip.open(args_dict['R1_out'], 'wb')
f_R2_out = gzip.open(args_dict['R2_out'], 'wb')
R1_unassigned = gzip.open(args_dict['R1_unassigned'], 'wb')
@@ -71,22 +76,11 @@
### count A/T/C/G content in a given sequence
def getContent(DNAsequence):
- A = 0
- T = 0
- C = 0
- G = 0
- N = 0
- for c in DNAsequence:
- if c == 'A':
- A+=1
- if c == 'T':
- T+=1
- if c == 'C':
- C+=1
- if c == 'G':
- G+=1
- if c == 'N':
- N+=1
+ A = DNAsequence.count("A")
+ T = DNAsequence.count("T")
+ C = DNAsequence.count("C")
+ G = DNAsequence.count("G")
+ N = DNAsequence.count("N")
return (A, T, C, G, N)
### count base content masking CG/TG content
@@ -98,9 +92,9 @@ def getContentR1(DNAsequence):
### count base content masking GC/GT content
def getContentR2(DNAsequence):
- DNAsequence = DNAsequence.replace("GC", "")
- DNAsequence = DNAsequence.replace("GT", "")
- return getContent(DNAsequence)
+ DNAsequence = DNAsequence.replace("GC", "")
+ DNAsequence = DNAsequence.replace("GT", "")
+ return getContent(DNAsequence)
if args_dict['debug'] == True:
@@ -134,15 +128,8 @@ def getContentR2(DNAsequence):
print "=="
print "R1:R2\t" + str(R1_content) + "\t" + str(R2_content) + "\tR1:T:C|R1:A:G\t" + str(R1_T_C) + "\t" +str(R1_A_G)
print "R2:R1\t" + str(R2_content) + "\t" + str(R1_content) + "\tR2:T:C|R2:A:G\t" + str(R2_T_C) + "\t" +str(R2_A_G)
- # third idea
- #R1R2_layout = ( (R1_T_C > R2_T_C) or (R1_T_C > R1_A_G) ) and ( (R2_A_G > R1_A_G) or (R2_A_G > R2_T_C) )
- #R2R1_layout = ( (R1_T_C < R2_T_C) or (R1_T_C < R1_A_G) ) and ( (R2_A_G < R1_A_G) or (R2_A_G < R2_T_C) )
- # second idea
R1R2_layout = (R1_T_C > R2_T_C) and (R2_A_G > R1_A_G)
R2R1_layout = (R1_T_C < R2_T_C) and (R2_A_G < R1_A_G)
- # first idea
- #R1R2_layout = (R1_T_C > R1_A_G) and (R2_A_G > R2_T_C)
- #R2R1_layout = (R1_T_C < R1_A_G) and (R2_A_G < R2_T_C)
if R1R2_layout:
R1_output = header1 + "\n" + sequence1 + "\n" + spacer1 + "\n" + qual_scores1 + "\n"
R2_output = header2 + "\n" + sequence2 + "\n" + spacer2 + "\n" + qual_scores2 + "\n"
diff --git a/resources/analysisTools/bisulfiteWorkflow/bwaMemSortSlimWithReadConversionForBisulfiteData.sh b/resources/analysisTools/bisulfiteWorkflow/bwaMemSortSlimWithReadConversionForBisulfiteData.sh
index 9be2fe1..4f253a5 100755
--- a/resources/analysisTools/bisulfiteWorkflow/bwaMemSortSlimWithReadConversionForBisulfiteData.sh
+++ b/resources/analysisTools/bisulfiteWorkflow/bwaMemSortSlimWithReadConversionForBisulfiteData.sh
@@ -1,21 +1,26 @@
#!/bin/bash
+#
+# Copyright (c) 2019 German Cancer Research Center (DKFZ).
+#
+# Distributed under the MIT License (license terms are at https://github.com/DKFZ-ODCF/AlignmentAndQCWorkflows).
+#
-source "$TOOL_WORKFLOW_LIB"
+set -o pipefail
+set -ue
-printInfo
+source "$TOOL_WORKFLOW_LIB"
-set -o pipefail
+setUp_BashSucksVersion
+trap cleanUp_BashSucksVersion EXIT
-ID=${RUN}_${LANE}
-SM=sample_${SAMPLE}_${PID}
+ID="${RUN}_${LANE}"
+SM="sample_${SAMPLE}_${PID}"
# RODDY_SCRATCH is used here. Is for PBS $PBS_SCRATCH_DIR/$PBS_JOBID, for SGE /tmp/roddyScratch/jobid
RODDY_BIG_SCRATCH=$(getBigScratchDirectory "${FILENAME_SORTED_BAM}_TEMP")
mkdir -p "$RODDY_BIG_SCRATCH"
# pipes via local scratch dir
-FNPIPE1=${RODDY_SCRATCH}/NAMED_PIPE1
-FNPIPE2=${RODDY_SCRATCH}/NAMED_PIPE2
NP_READBINS_IN=${RODDY_SCRATCH}/np_readbins_in
NP_COVERAGEQC_IN=${RODDY_SCRATCH}/np_coverageqc_in
NP_COMBINEDANALYSIS_IN=${RODDY_SCRATCH}/np_combinedanalysis_in
@@ -24,102 +29,111 @@ NP_SAMTOOLS_INDEX_IN=${RODDY_SCRATCH}/np_samtools_index_in
NP_BWA_OUT=${RODDY_SCRATCH}/np_bwa_out
NP_BCONV_OUT=${RODDY_SCRATCH}/np_bconv_out
-MBUF_100M="${MBUFFER_BINARY} -m 100m -q -l /dev/null"
-MBUF_2G="${MBUFFER_BINARY} -m 2g -q -l /dev/null"
+MBUF_SMALL="${MBUFFER_BINARY} -m ${MBUFFER_SIZE_SMALL} -q -l /dev/null"
+MBUF_LARGE="${MBUFFER_BINARY} -m ${MBUFFER_SIZE_LARGE} -q -l /dev/null"
mkfifo ${NP_READBINS_IN} ${NP_COVERAGEQC_IN} ${NP_COMBINEDANALYSIS_IN} ${NP_FLAGSTATS} ${NP_BWA_OUT} ${NP_BCONV_OUT}
bamname=`basename ${FILENAME_SORTED_BAM}`
-INDEX_FILE=${FILENAME_SORTED_BAM}.bai
-tempSortedBamFile=${FILENAME_SORTED_BAM}.tmp
-tempFileForSort=${RODDY_BIG_SCRATCH}/${bamname}_forsorting
-tempBamIndexFile=${FILENAME_SORTED_BAM}.tmp.bai
-tempFlagstatsFile=${FILENAME_FLAGSTATS}.tmp
+INDEX_FILE="$FILENAME_SORTED_BAM.bai"
+tempSortedBamFile="$FILENAME_SORTED_BAM.tmp"
+tempFileForSort="$RODDY_BIG_SCRATCH/${bamname}_forsorting"
+tempBamIndexFile="$FILENAME_SORTED_BAM.tmp.bai"
+tempFlagstatsFile="$FILENAME_FLAGSTATS.tmp"
# samtools sort may complain about truncated temp files and for each line outputs
# the error message. This happens when the same files are written at the same time,
# see http://sourceforge.net/p/samtools/mailman/samtools-help/thread/BAA90EF6FE3B4D45A7B2F6E0EC5A8366DA3AB5@USTLMLLYC102.rf.lilly.com/
-NP_SORT_ERRLOG=${RODDY_SCRATCH}/NP_SORT_ERRLOG
-FILENAME_SORT_LOG=${DIR_TEMP}/${bamname}_errlog_sort
+NP_SORT_ERRLOG="$RODDY_SCRATCH/NP_SORT_ERRLOG"
+FILENAME_SORT_LOG="$DIR_TEMP/${bamname}_errlog_sort"
-RAW_SEQ=${RAW_SEQ_1}
-source ${TOOL_COMMON_ALIGNMENT_SETTINGS_SCRIPT}
-
-NP_BAMSORT=${RODDY_SCRATCH}/NAMED_PIPE_BAMSORT
-mkfifo ${NP_BAMSORT}
+NP_BAMSORT="$RODDY_SCRATCH/NAMED_PIPE_BAMSORT"
+mkfifo "$NP_BAMSORT"
# Create the following variable for error checking issues
-TMP_FILE=${tempSortedBamFile}
+TMP_FILE="$tempSortedBamFile"
# error tracking
-FILENAME_BWA_LOG=${DIR_TEMP}/${bamname}_errlog_bwamem
+FILENAME_BWA_LOG="$DIR_TEMP/${bamname}_errlog_bwamem"
+FILENAME_BWA_EC="$DIR_TEMP/${bamname}_ec"
+
+if [[ -n "$RAW_SEQ_1" ]]; then
+ source "$TOOL_DEFAULT_PLUGIN_LIB"
+ setCompressionToolsBasedOnFileCompression "$RAW_SEQ_1"
+fi
bamFileExists=false
-# in case the BAM already exists, but QC files are missing, create these only
-if [[ -f ${FILENAME_SORTED_BAM} ]] && [[ -s ${FILENAME_SORTED_BAM} ]]
-then
+# In case the BAM already exists, but QC files are missing, create these only
+# TODO: This logic currently is not directly/simply reflected. Instead there are multiple if branches. Consider refactoring.
+if [[ -f ${FILENAME_SORTED_BAM} ]] && [[ -s ${FILENAME_SORTED_BAM} ]]; then
checkBamIsComplete "$FILENAME_SORTED_BAM"
bamFileExists=true
fi
-# test if one of the fastq files is a fake fastq file to simulate paired end sequencing in PIDs with mixed sequencing (single and paired end)
-LENGTH_SEQ_1=`${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_1} 2>/dev/null | head | wc -l`
-LENGTH_SEQ_2=`${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_2} 2>/dev/null | head | wc -l`
-[[ ${LENGTH_SEQ_1} == 0 || ${LENGTH_SEQ_2} == 0 ]] && useSingleEndProcessing=true
-
-# make biobambam sort default
-useBioBamBamSort=${useBioBamBamSort-true}
-
-if [[ ${bamFileExists} == false ]]; then # we have to make the BAM
- mkfifo ${FNPIPE1} ${FNPIPE2}
- if [[ $useAdaptorTrimming == true ]]; then # [[ "$ADAPTOR_TRIMMING_TOOL" == *.jar ]]
- if [ "${qualityScore}" = "illumina" ]; then
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_1} | ${PERL_BINARY} ${TOOL_CONVERT_ILLUMINA_SCORES} - > $i1" &
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_2} | ${PERL_BINARY} ${TOOL_CONVERT_ILLUMINA_SCORES} - > $i2" &
- else
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_1} > $i1" &
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_2} > $i2" &
- fi
- # done in the sourced script designed for bwa aln:
- # i1=$DIR_SCRATCH/at_i1
- # i2=$DIR_SCRATCH/at_i2
- # o1=$DIR_SCRATCH/at_o1
- # o2=/dev/null
- # u1=/dev/null
- # u2=/dev/null
- # mkfifo $i1 $i2 $o1
-
- # But o2 now has to contain read2 here:
- o2=${RODDY_SCRATCH}/at_o2
- mkfifo $o2
- eval "$JAVA_BINARY -jar ${TOOL_ADAPTOR_TRIMMING} $ADAPTOR_TRIMMING_OPTIONS_0 $i1 $i2 $o1 $u1 $o2 $u2 $ADAPTOR_TRIMMING_OPTIONS_1" & procTrim=$!
-
- # trimming with fastx does not work in combination with Trimmomatic!
- # besides, bwa mem automagically reverts mate pair data
- cat $o1 | ${PYTHON_BINARY} ${TOOL_METHYL_C_TOOLS} fqconv -1 - - > $FNPIPE1 & procID_fqconv_r1=$!
- cat $o2 | ${PYTHON_BINARY} ${TOOL_METHYL_C_TOOLS} fqconv -2 - - > $FNPIPE2 & procID_fqconv_r2=$!
- elif [ "${qualityScore}" = "illumina" ]; then # bwa mem has no possibility to convert Illumina 1.3 scores
- true & procTrim=$! # dummy process id
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_1} | ${PERL_BINARY} ${TOOL_CONVERT_ILLUMINA_SCORES} - | \
- ${PYTHON_BINARY} ${TOOL_METHYL_C_TOOLS} fqconv -1 - - > $FNPIPE1" & procID_fqconv_r1=$!
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_2} | ${PERL_BINARY} ${TOOL_CONVERT_ILLUMINA_SCORES} - | \
- ${PYTHON_BINARY} ${TOOL_METHYL_C_TOOLS} fqconf -2 - - > $FNPIPE2" & procID_fqconv_r2=$!
- else
- true & procTrim=$! # dummy process id
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_1} | ${PYTHON_BINARY} ${TOOL_METHYL_C_TOOLS} fqconv -1 - - > $FNPIPE1" & procID_fqconv_r1=$!
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_2} | ${PYTHON_BINARY} ${TOOL_METHYL_C_TOOLS} fqconv -2 - - > $FNPIPE2" & procID_fqconv_r2=$!
- fi
+# Test whether one of the fastq files is a fake fastq file to simulate paired-end sequencing in PIDs with mixed sequencing (single- and paired-end)
+declare -i LENGTH_SEQ_1=$(getFastqAsciiStream "$RAW_SEQ_1" | head | wc -l)
+declare -i LENGTH_SEQ_2=$(getFastqAsciiStream "$RAW_SEQ_2" | head | wc -l)
+if [[ $LENGTH_SEQ_1 -eq 0 ]] && [[ $LENGTH_SEQ_2 -eq 0 ]]; then
+ throw 1 "Both input files are empty: '$RAW_SEQ_1' and '$RAW_SEQ_2'"
+elif [[ $LENGTH_SEQ_1 -eq 0 ]] || [[ $LENGTH_SEQ_2 -eq 0 ]]; then
+ useSingleEndProcessing=true
+fi
- INPUT_PIPES=""
- [[ ${LENGTH_SEQ_1} != 0 ]] && INPUT_PIPES="$FNPIPE1"
- [[ ${LENGTH_SEQ_2} != 0 ]] && INPUT_PIPES="${INPUT_PIPES} $FNPIPE2"
- [[ ${LENGTH_SEQ_1} == 0 ]] && cat $FNPIPE1 >/dev/null
- [[ ${LENGTH_SEQ_2} == 0 ]] && cat $FNPIPE2 >/dev/null
+
+# Determine the quality score
+set +e # The following few lines fail with exit 141 due to unknown reasons if `set -e` is set.
+if [[ $LENGTH_SEQ_1 -ne 0 ]]; then
+ qualityScore=$(getFastqAsciiStream "$RAW_SEQ_1" | "$PERL_BINARY" "$TOOL_SEQUENCER_DETECTION")
else
- true & procTrim=$!
+ qualityScore=$(getFastqAsciiStream "$RAW_SEQ_2" | "$PERL_BINARY" "$TOOL_SEQUENCER_DETECTION")
+fi
+set -e
+
+# Make biobambam sort default
+useBioBamBamSort="${useBioBamBamSort:-true}"
+
+# Default: Dummy process IDs to simplify downstream logic.
+# TODO Remove after completely switching to PID registry system.
+true & procUnpack1=$!
+true & procUnpack2=$!
+
+fqName="fastq"
+mkPipePairSource "$fqName"
+if [[ "$bamFileExists" == "false" ]]; then # we have to make the BAM
+ getFastqAsciiStream "$RAW_SEQ_1" > $(getPairedPipeEndPath 1 "$fqName") & procUnpack1=$!
+ getFastqAsciiStream "$RAW_SEQ_2" > $(getPairedPipeEndPath 2 "$fqName") & procUnpack2=$!
+
+ if [[ "$qualityScore" == "illumina" ]]; then
+ extendPipe $(mkPairedPipeName 1 "$fqName") "qScore" -- toIlluminaScore
+ extendPipe $(mkPairedPipeName 2 "$fqName") "qScore" -- toIlluminaScore
+ fi
+
+ if [[ "${useAdaptorTrimming:-false}" == "true" ]]; then
+ extendPipePair "$fqName" "trimmomatic" -- trimmomatic
+ fi
+
+ if [[ "${reorderUndirectionalWGBSReadPairs:-false}" == "true" ]]; then
+ extendPipePair "$fqName" "reorder" -- reorderUndirectionalReads
+ fi
+
+ extendPipe $(mkPairedPipeName 1 "$fqName") "fqconv" -- methylCfqconv 1
+ extendPipe $(mkPairedPipeName 2 "$fqName") "fqconv" -- methylCfqconv 2
+
+ INPUT_PIPES=""
+ if [[ ${LENGTH_SEQ_1} == 0 ]]; then
+ cat "$(getPairedPipeEndPath 1 $fqName)" > /dev/null
+ else
+ INPUT_PIPES="$(getPairedPipeEndPath 1 $fqName)"
+ fi
+
+ if [[ ${LENGTH_SEQ_2} == 0 ]]; then
+ cat "$(getPairedPipeEndPath 2 $fqName)" > /dev/null
+ else
+ INPUT_PIPES="${INPUT_PIPES} $(getPairedPipeEndPath 2 $fqName)"
+ fi
fi
# Try to read from pipes BEFORE they are filled.
-# in all cases:
+# In all cases:
# SAM output is piped to perl script that calculates various QC measures
(${PERL_BINARY} ${TOOL_COMBINED_BAM_ANALYSIS} -i ${NP_COMBINEDANALYSIS_IN} -a ${FILENAME_DIFFCHROM_MATRIX}.tmp -c ${CHROM_SIZES_FILE} -b ${FILENAME_ISIZES_MATRIX}.tmp -f ${FILENAME_EXTENDED_FLAGSTATS}.tmp -m ${FILENAME_ISIZES_STATISTICS}.tmp -o ${FILENAME_DIFFCHROM_STATISTICS}.tmp -p ${INSERT_SIZE_LIMIT} ) & procIDCBA=$!
@@ -127,7 +141,7 @@ fi
(${TOOL_COVERAGE_QC_D_IMPL} --alignmentFile=${NP_COVERAGEQC_IN} --outputFile=${FILENAME_GENOME_COVERAGE}.tmp --processors=1 --basequalCutoff=${BASE_QUALITY_CUTOFF} --ungappedSizes=${CHROM_SIZES_FILE}) & procIDGenomeCoverage=$!
# read bins
-(set -o pipefail; ${TOOL_GENOME_COVERAGE_D_IMPL} --alignmentFile=${NP_READBINS_IN} --outputFile=/dev/stdout --processors=4 --mode=countReads --windowSize=${WINDOW_SIZE} | $MBUF_100M | ${PERL_BINARY} ${TOOL_FILTER_READ_BINS} - ${CHROM_SIZES_FILE} > ${FILENAME_READBINS_COVERAGE}.tmp) & procIDReadbinsCoverage=$!
+(set -o pipefail; ${TOOL_GENOME_COVERAGE_D_IMPL} --alignmentFile=${NP_READBINS_IN} --outputFile=/dev/stdout --processors=4 --mode=countReads --windowSize=${WINDOW_SIZE} | $MBUF_SMALL | ${PERL_BINARY} ${TOOL_FILTER_READ_BINS} - ${CHROM_SIZES_FILE} > ${FILENAME_READBINS_COVERAGE}.tmp) & procIDReadbinsCoverage=$!
# use sambamba for flagstats
(set -o pipefail; cat ${NP_FLAGSTATS} | ${SAMBAMBA_FLAGSTATS_BINARY} flagstat -t 1 /dev/stdin > $tempFlagstatsFile) & procIDFlagstat=$!
@@ -135,7 +149,7 @@ fi
if [[ ${bamFileExists} == true ]]; then
echo "the BAM file already exists, re-creating other output files."
# make all the pipes
- (cat ${FILENAME_SORTED_BAM} | ${MBUF_2G} | tee ${NP_COVERAGEQC_IN} ${NP_READBINS_IN} ${NP_FLAGSTATS} | ${SAMBAMBA_BINARY} view /dev/stdin | ${MBUF_2G} > $NP_COMBINEDANALYSIS_IN) & procIDOutPipe=$!
+ (cat ${FILENAME_SORTED_BAM} | ${MBUF_LARGE} | tee ${NP_COVERAGEQC_IN} ${NP_READBINS_IN} ${NP_FLAGSTATS} | ${SAMBAMBA_BINARY} view /dev/stdin | ${MBUF_LARGE} > $NP_COMBINEDANALYSIS_IN) & procIDOutPipe=$!
else
if [[ ${useBioBamBamSort} == false ]]; then
# we use samtools for making the index
@@ -162,7 +176,7 @@ else
${SAMTOOLS_BINARY} view -h ${NP_BCONV_OUT} | \
tee ${NP_COMBINEDANALYSIS_IN} | \
${SAMTOOLS_BINARY} view -uSbh -F 2304 - | \
- $MBUF_2G | \
+ $MBUF_LARGE | \
${SAMTOOLS_BINARY} sort -@ 8 \
-m ${SAMPESORT_MEMSIZE} \
-o - ${tempFileForSort} 2>$NP_SORT_ERRLOG | \
@@ -170,7 +184,7 @@ else
${NP_READBINS_IN} \
${NP_FLAGSTATS} \
${NP_SAMTOOLS_INDEX_IN} > ${tempSortedBamFile}; \
- echo $? > ${DIR_TEMP}/${bamname}_ec) & procID_MEMSORT=$!
+ echo "$? (Pipe Exit Codes: ${PIPESTATUS[@]})" > ${DIR_TEMP}/${bamname}_ec) & procID_MEMSORT=$!
# filter samtools error log
(cat $NP_SORT_ERRLOG | uniq > $FILENAME_SORT_LOG) & procID_logwrite=$!
@@ -189,31 +203,24 @@ fi
if [[ ${bamFileExists} == true ]]; then
wait $procIDOutPipe || throw 13 "Error from sambamba view pipe"
else # make sure to rename BAM file when it has been produced correctly
- [[ -p $i1 ]] && rm $i1 $i2 $o1 $o2 2> /dev/null
- rm $FNPIPE1
- rm $FNPIPE2
- errorString="There was a non-zero exit code in the bwa mem - sort pipeline; exiting..."
- source ${TOOL_BWA_ERROR_CHECKING_SCRIPT}
+ errorString="There was a non-zero exit code in the bwa mem - sort pipeline; exiting..."
+ source "$TOOL_BWA_ERROR_CHECKING_SCRIPT"
checkBamIsComplete "$tempSortedBamFile"
mv ${tempSortedBamFile} ${FILENAME_SORTED_BAM} || throw 36 "Could not move file"
# index is only created by samtools or biobambam when producing the BAM, it may be older than the BAM, so update time stamp
if [[ -f ${tempBamIndexFile} ]]; then
mv ${tempBamIndexFile} ${INDEX_FILE} && touch ${INDEX_FILE} || throw 37 "Could not move file"
fi
- # clean up adapter trimming pipes if they exist
- [[ -p $i1 ]] && rm $i1 $i2 $o1 $o2 2> /dev/null
-
fi
-wait $procTrim || throw 38 "Error from trimming"
+waitForRegisteredPids_BashSucksVersion
+wait $procUnpack1 || throw 39 "Error from reading FASTQ 1"
+wait $procUnpack1 || throw 40 "Error from reading FASTQ 2"
+
wait $procIDFlagstat || throw 14 "Error from sambamba flagstats"
wait $procIDReadbinsCoverage || throw 15 "Error from genomeCoverage read bins"
wait $procIDGenomeCoverage || throw 16 "Error from coverageQCD"
wait $procIDCBA || throw 17 "Error from combined QC perl script"
-if [[ ${bamFileExists} == false ]]; then
- wait $procID_fqconv_r1 || throw 18 "Error in FQCONV (read 1)"
- wait $procID_fqconv_r2 || throw 19 "Error in FQCONV (read 2)"
-fi
# rename QC files
mv ${FILENAME_DIFFCHROM_MATRIX}.tmp ${FILENAME_DIFFCHROM_MATRIX} || throw 28 "Could not move file"
@@ -225,24 +232,35 @@ mv ${FILENAME_READBINS_COVERAGE}.tmp ${FILENAME_READBINS_COVERAGE} || throw 34 "
mv ${FILENAME_GENOME_COVERAGE}.tmp ${FILENAME_GENOME_COVERAGE} || throw 35 "Could not move file"
mv ${tempFlagstatsFile} ${FILENAME_FLAGSTATS} || throw 33 "Could not move file"
+
runFingerprinting "${FILENAME_SORTED_BAM}" "${FILENAME_FINGERPRINTS}"
-if [[ "$RODDY_BIG_SCRATCH" != "$RODDY_SCRATCH" ]]; then # $RODDY_SCRATCH is also deleted by the wrapper.
- rm -rf "$RODDY_BIG_SCRATCH" 2> /dev/null # Clean-up big-file scratch directory. Only called if no error in wait or mv before.
-fi
+removeRoddyBigScratch
# QC summary
# remove old warnings file if it exists (due to errors in run such as wrong chromsizes file)
[[ -f ${FILENAME_QCSUMMARY}_WARNINGS.txt ]] && rm ${FILENAME_QCSUMMARY}_WARNINGS.txt
-(${PERL_BINARY} $TOOL_WRITE_QC_SUMMARY -p $PID -s $SAMPLE -r $RUN -l $LANE -w ${FILENAME_QCSUMMARY}_WARNINGS.txt -f $FILENAME_FLAGSTATS -d $FILENAME_DIFFCHROM_STATISTICS -i $FILENAME_ISIZES_STATISTICS -c $FILENAME_GENOME_COVERAGE > ${FILENAME_QCSUMMARY}_temp && mv ${FILENAME_QCSUMMARY}_temp $FILENAME_QCSUMMARY) || ( echo "Error from writeQCsummary.pl" && exit 12)
+${PERL_BINARY} $TOOL_WRITE_QC_SUMMARY \
+ -p $PID \
+ -s $SAMPLE \
+ -r $RUN \
+ -l $LANE \
+ -w ${FILENAME_QCSUMMARY}_WARNINGS.txt \
+ -f $FILENAME_FLAGSTATS \
+ -d $FILENAME_DIFFCHROM_STATISTICS \
+ -i $FILENAME_ISIZES_STATISTICS \
+ -c $FILENAME_GENOME_COVERAGE \
+ > ${FILENAME_QCSUMMARY}_temp \
+ && mv ${FILENAME_QCSUMMARY}_temp $FILENAME_QCSUMMARY \
+ || throw 12 "Error from writeQCsummary.pl"
# Produced qualitycontrol.json for OTP.
${PERL_BINARY} ${TOOL_QC_JSON} \
- ${FILENAME_GENOME_COVERAGE} \
- ${FILENAME_ISIZES_STATISTICS} \
- ${FILENAME_FLAGSTATS} \
- ${FILENAME_DIFFCHROM_STATISTICS} \
- > ${FILENAME_QCJSON}.tmp \
- || throw 26 "Error when compiling qualitycontrol.json for ${FILENAME}, stopping here"
+ ${FILENAME_GENOME_COVERAGE} \
+ ${FILENAME_ISIZES_STATISTICS} \
+ ${FILENAME_FLAGSTATS} \
+ ${FILENAME_DIFFCHROM_STATISTICS} \
+ > ${FILENAME_QCJSON}.tmp \
+ || throw 26 "Error when compiling qualitycontrol.json for ${FILENAME_QCJSON}, stopping here"
mv ${FILENAME_QCJSON}.tmp ${FILENAME_QCJSON} || throw 27 "Could not move file"
# plots are only made for paired end and not on convey
@@ -251,3 +269,5 @@ mv ${FILENAME_QCJSON}.tmp ${FILENAME_QCJSON} || throw 27 "Could not move file"
${RSCRIPT_BINARY} ${TOOL_INSERT_SIZE_PLOT_SCRIPT} ${FILENAME_ISIZES_MATRIX} ${FILENAME_ISIZES_STATISTICS} ${FILENAME_ISIZES_PLOT}_temp "PE insertsize of ${bamname}" && mv ${FILENAME_ISIZES_PLOT}_temp ${FILENAME_ISIZES_PLOT} || ( echo "Error from insert sizes plotter" && exit 22)
${RSCRIPT_BINARY} ${TOOL_PLOT_DIFFCHROM} -i "$FILENAME_DIFFCHROM_MATRIX" -s "$FILENAME_DIFFCHROM_STATISTICS" -o "${FILENAME_DIFFCHROM_PLOT}_temp" && mv ${FILENAME_DIFFCHROM_PLOT}_temp ${FILENAME_DIFFCHROM_PLOT} || ( echo "Error from chrom_diff.r" && exit 23)
+
+
diff --git a/resources/analysisTools/qcPipeline/PhredOrIllumina.pl b/resources/analysisTools/qcPipeline/PhredOrIllumina.pl
index c23c923..3e43843 100755
--- a/resources/analysisTools/qcPipeline/PhredOrIllumina.pl
+++ b/resources/analysisTools/qcPipeline/PhredOrIllumina.pl
@@ -28,4 +28,4 @@
# gunzip or awk always complain about broken pipe, so maybe that helps?
# "close" Closes the file or pipe associated with the file handle.
# close (STDIN);
-exit;
+exit 0;
diff --git a/resources/analysisTools/qcPipeline/bashLib.sh b/resources/analysisTools/qcPipeline/bashLib.sh
index 244fe75..1850fc1 100755
--- a/resources/analysisTools/qcPipeline/bashLib.sh
+++ b/resources/analysisTools/qcPipeline/bashLib.sh
@@ -1,8 +1,14 @@
+#
+# Copyright (c) 2019 German Cancer Research Center (DKFZ).
+#
+# Distributed under the MIT License (license terms are at https://github.com/DKFZ-ODCF/AlignmentAndQCWorkflows).
+#
# Library of BASH function. Please import using
#
# source "$TOOL_BASH_LIB"
+BASHLIB___ERREXIT=$(if [[ $SHELLOPTS =~ "errexit" ]]; then echo "errexit"; fi)
BASHLIB___SHELL_OPTIONS=$(set +o)
set +o verbose
set +o xtrace
@@ -124,6 +130,232 @@ stringJoin () {
echo "$result"
}
+normalizeBoolean() {
+ if [[ "${1:-false}" == "true" ]]; then
+ echo "true"
+ else
+ echo "false"
+ fi
+}
+
+isDebugSet() {
+ normalizeBoolean "${debug:-false}"
+}
+
+
+#####################################################################
+## Handling processes and tempfiles (original code from BamToFastqPlugin)
+#####################################################################
+
+# TODO: Make this based on associative array to have more information (some tag!) about the failed process. Every tag only once (warn and save with extension!)
+registerPid() {
+ local pid="${1:-$!}"
+ declare -gax pids=(${pids[@]} $pid)
+}
+
+listPids() {
+ for pid in "${pids[@]}"; do
+ if [[ "$pid" != "$ARRAY_ELEMENT_DUMMY" ]]; then
+ echo "$pid"
+ fi
+ done
+}
+
+registerTmpFile() {
+ local tmpFile="${1:?No temporary file name to register}"
+ # Note that the array is build in reversed order, which simplifies the deletion of nested directories.
+ declare -gax tmpFiles=("$tmpFile" "${tmpFiles[@]}")
+}
+
+reverseArray() {
+ local c=""
+ for b in "$@"; do
+ c="$b $c"
+ done
+ echo $c
+}
+
+# Bash sucks. An empty array does not exist! So if there are no tempfiles/pids, then there is no array and set -u will result an error!
+# Therefore we put a dummy value into the arrays and have to take care to remove the dummy before the processing.
+# The dummy contains a random string to avoid collision with possible filenames (the filename 'dummy' is quite likely).
+ARRAY_ELEMENT_DUMMY=$(mktemp -u "_dummy_XXXXX")
+
+waitForRegisteredPids_BashSucksVersion() {
+ jobs
+ declare -a realPids=$(listPids)
+ if [[ -v realPids && ${#realPids[@]} -gt 0 ]]; then
+ # TODO Make this a loop and report the exact pid that failed (or the key, after switching from array to dictionary).
+ wait ${realPids[@]}
+ declare EXIT_CODE=$?
+ if [[ ${EXIT_CODE} -ne 0 ]]; then
+ throw ${EXIT_CODE} "One of the following processes ended with exit code ${EXIT_CODE}: ${realPids[@]}"
+ fi
+ fi
+ pids=("$ARRAY_ELEMENT_DUMMY")
+}
+setUp_BashSucksVersion() {
+ declare -g -a -x tmpFiles=("$ARRAY_ELEMENT_DUMMY")
+ declare -g -a -x pids=("$ARRAY_ELEMENT_DUMMY")
+ initPipeEnds
+
+ # Remove all registered temporary files upon exit
+ trap cleanUp_BashSucksVersion EXIT
+}
+cleanUp_BashSucksVersion() {
+ if [[ $(isDebugSet) == "false" && -v tmpFiles && ${#tmpFiles[@]} -gt 1 ]]; then
+ for f in ${tmpFiles[@]}; do
+ if [[ "$f" == "$ARRAY_ELEMENT_DUMMY" ]]; then
+ continue
+ elif [[ -d "$f" ]]; then
+ rmdir "$f"
+ elif [[ -e "$f" ]]; then
+ rm -f "$f"
+ fi
+ done
+ tmpFiles=("$ARRAY_ELEMENT_DUMMY")
+ fi
+}
+
+# These versions only works with Bash >4.4. Prior version do not really declare the array variables with empty values and set -u results in error message.
+waitForRegisteredPids() {
+ jobs
+ wait ${pids[@]}
+ declare EXIT_CODE=$?
+ if [[ ${EXIT_CODE} -ne 0 ]]; then
+ throw ${EXIT_CODE} "One of the following processes ended with exit code ${EXIT_CODE}: ${pids[@]}"
+ fi
+ pids=()
+}
+setUp() {
+ declare -g -a -x tmpFiles=()
+ declare -g -a -x pids=()
+ initPipeEnds
+}
+cleanUp() {
+ if [[ $(isDebugSet) == "false" && -v tmpFiles && ${#tmpFiles[@]} -gt 0 ]]; then
+ for f in "${tmpFiles[@]}"; do
+ if [[ -d "$f" ]]; then
+ rmdir "$f"
+ elif [[ -e "$f" ]]; then
+ rm "$f"
+ fi
+ done
+ tmpFiles=()
+ fi
+}
-eval "$BASHLIB___SHELL_OPTIONS"
+
+#####################################################################
+## Linear pipe management
+#####################################################################
+# The pipe-extension API follows this pattern:
+#
+# * pipeExtenderFunction pipeBaseName pipeStepExtension -- command
+#
+# * The pipeBaseName is the name under which the pipe (or derived from this: pipe pair) is registered.
+# * The pipeStepExtension is used to tag the actual pipe used in the specific step. Use a name describing the content flowing through the pipe,
+# i.e. the output of the extension step you are declaring.
+# * Current pipe extender functions are extendPipe (for single pipes) and extendPipePair (for r1/r2 pipes)
+# * To work with paired pipes (r1/r2) mkPairedPipeName is used to calculate the r1_ or r2_ pipeBaseName
+
+_pipePath="$RODDY_SCRATCH"
+
+# Maintain a mapping of pipe-basenames to current pipe-ends.
+# Please read https://www.artificialworlds.net/blog/2012/10/17/bash-associative-array-examples/ to understand the associative array syntax below.
+initPipeEnds() {
+ unset _pipeEnds
+ declare -Ag _pipeEnds=()
+}
+
+listPipeEnds() {
+ for i in "${!_pipeEnds[@]}"; do
+ echo "'$i' => '${_pipeEnds[$i]}'"
+ done
+}
+
+# Make a path without registering it.
+mkPipePath() {
+ local pipeName="${1:?Named-pipe base name}"
+ echo "$_pipePath/$pipeName"
+}
+
+# Just get the path from the registry.
+# Please read https://www.artificialworlds.net/blog/2012/10/17/bash-associative-array-examples/ to understand the associative array syntax below.
+getPipeEndPath() {
+ local pipeName="${1:?No pipename}"
+ if [[ ! ${_pipeEnds[$pipeName]+_} ]]; then
+ throw 152 "Could not find pipe by name '$pipeName'"
+ fi
+ local pipePath="${_pipeEnds[$pipeName]}"
+ echo "$pipePath"
+}
+
+# Register a path in the registry under the key "pipeName".
+setPipeEndPath() {
+ local pipeName="${1:?No pipename}"
+ local pipePath="${2:?No pipe path}"
+ if [[ -z "$pipeName" ]]; then
+ throw 153 "Cannot set empty pipename"
+ fi
+ _pipeEnds[$pipeName]="$pipePath"
+}
+
+# Create a path from the name and the tag and register it as new pipe-end in the registry.
+# Note that the pipe is registered as tempfile and will be deleted upon cleanUp.
+updatePipeEndPath() {
+ local pipeName="${1:?Named-pipe base name}"
+ local tag="${2:?Pipe tag}"
+ local pipePath=$(mkPipePath "${pipeName}_$tag")
+ mkfifo "$pipePath" || throw 150 "Could not create named pipe at '$pipePath'"
+ registerTmpFile "$pipePath"
+ setPipeEndPath "$pipeName" "$pipePath"
+}
+
+# Create a pipe and register it as new pipe-end in the registry. It will be the source of pipe.
+# Please read https://www.artificialworlds.net/blog/2012/10/17/bash-associative-array-examples/ to understand the associative array syntax below.
+mkPipeSource() {
+ local pipeName="${1:?No pipename}"
+ if [[ ${_pipeEnds[$pipeName]+_} ]]; then
+ throw 154 "Cannot create new pipe source. Pipe named '$pipeName' -- already exists with value '${_pipeEnds[$pipeName]}'"
+ fi
+ updatePipeEndPath "$pipeName" "source"
+}
+
+# Linear extension of a pipeline.
+# For a command with the interface "command infile outfile @rest" take the pipe with the given basename
+# and use it as input file. Create a new output pipe and set that pipe as output file.
+# Set the new end of the linear pipeline to the output pipe. Call like this:
+#
+# extendPipe $pipeName $tag -- commandName @restArgs
+#
+# The "--" is optional.
+extendPipe() {
+ local pipeName="${1:?No pipe basename}"
+ local tag="${2:?No tag}"
+ shift 2
+ if [[ "$1" == "--" ]]; then
+ shift
+ fi
+ local command="${1:?No command/function}"
+ shift
+ declare -a rest=("$@")
+
+ local inpipe=$(getPipeEndPath "$pipeName")
+ updatePipeEndPath "$pipeName" "$tag"
+ local outpipe=$(getPipeEndPath "$pipeName")
+
+ # Bash SUCKS! Empty arrays do give an error with set -u with Bash < 4.4, but -v varName still succeeds! Therefore test the content, here.
+ if [[ "${rest:-}" != "" ]]; then
+ "$command" "$inpipe" "$outpipe" "${rest[@]}" & registerPid
+ else
+ "$command" "$inpipe" "$outpipe" & registerPid
+ fi
+
+}
+
+
+eval "$BASHLIB___SHELL_OPTIONS"
+if [[ "$BASHLIB___ERREXIT" == "errexit" ]]; then
+ set -e
+fi
diff --git a/resources/analysisTools/qcPipeline/bwaErrorChecking.sh b/resources/analysisTools/qcPipeline/bwaErrorChecking.sh
index 14b9dd3..c008a18 100755
--- a/resources/analysisTools/qcPipeline/bwaErrorChecking.sh
+++ b/resources/analysisTools/qcPipeline/bwaErrorChecking.sh
@@ -21,20 +21,20 @@ then
fi
# TODO Check that?
- [[ "$?" != "0" ]] && echo $errorString && exit 32
+ [[ $? -ne 0 ]] && echo $errorString && exit 32
fi
# test for success before renaming!
-success=`grep " fault" ${FILENAME_BWA_LOG}`
+success=$(grep " fault" ${FILENAME_BWA_LOG} || true)
[[ ! -z "$success" ]] && echo found segfault $success in bwa logfile! && exit 31
# Barbara Aug 10 2015: I can't remember what bwa aln and sampe reported as "error".
# bluebee bwa has "error_count" in bwa-0.7.8-r2.05; and new in bwa-0.7.8-r2.06: "WARNING:top_bs_ke_be_hw: dummy be execution, only setting error."
# these are not errors that would lead to fail, in contrast to "ERROR: Bus error"
-success=`grep -i "error" ${FILENAME_BWA_LOG} | grep -v "error_count" | grep -v "dummy be execution"`
+success=$(grep -i "error" ${FILENAME_BWA_LOG} | grep -v "error_count" | grep -v "dummy be execution" || true)
[[ ! -z "$success" ]] && echo found error $success in bwa logfile! && exit 36
-success=`grep "Abort. Sorry." ${FILENAME_BWA_LOG}`
+success=$(grep "Abort. Sorry." ${FILENAME_BWA_LOG} || true)
[[ ! -z "$success" ]] && echo found error $success in bwa logfile! && exit 37
@@ -46,7 +46,7 @@ success=`grep "Abort. Sorry." ${FILENAME_BWA_LOG}`
echo "testing $FILENAME_SORT_LOG"
if [ ! -z $FILENAME_SORT_LOG ] && [ -f $FILENAME_SORT_LOG ]
then
- success=`grep "is truncated. Continue anyway." $FILENAME_SORT_LOG`
+ success=$(grep "is truncated. Continue anyway." "$FILENAME_SORT_LOG" || true)
[[ ! -z "$success" ]] && echo found error $success in samtools sorting logfile! && exit 38
else
echo "there is no samtools sort log file"
diff --git a/resources/analysisTools/qcPipeline/bwaMemSortSlim.sh b/resources/analysisTools/qcPipeline/bwaMemSortSlim.sh
index 6a190fc..47e3608 100755
--- a/resources/analysisTools/qcPipeline/bwaMemSortSlim.sh
+++ b/resources/analysisTools/qcPipeline/bwaMemSortSlim.sh
@@ -23,8 +23,8 @@ NP_COMBINEDANALYSIS_IN=${RODDY_SCRATCH}/np_combinedanalysis_in
NP_FLAGSTATS=${RODDY_SCRATCH}/np_flagstats_in
NP_SAMTOOLS_INDEX_IN=${RODDY_SCRATCH}/np_samtools_index_in
-MBUF_100M="${MBUFFER_BINARY} -m 100m -q -l /dev/null"
-MBUF_2G="${MBUFFER_BINARY} -m 2g -q -l /dev/null"
+MBUF_SMALL="${MBUFFER_BINARY} -m ${MBUFFER_SIZE_SMALL} -q -l /dev/null"
+MBUF_LARGE="${MBUFFER_BINARY} -m ${MBUFFER_SIZE_LARGE} -q -l /dev/null"
mkfifo ${NP_READBINS_IN} ${NP_COVERAGEQC_IN} ${NP_COMBINEDANALYSIS_IN} ${NP_FLAGSTATS}
@@ -76,11 +76,11 @@ then
then
if [ "${qualityScore}" = "illumina" ]
then
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_1} | ${PERL_BINARY} ${TOOL_CONVERT_ILLUMINA_SCORES} - | $MBUF_2G > $i1" &
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_2} | ${PERL_BINARY} ${TOOL_CONVERT_ILLUMINA_SCORES} - | $MBUF_2G > $i2" &
+ eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_1} | ${PERL_BINARY} ${TOOL_CONVERT_ILLUMINA_SCORES} - | $MBUF_LARGE > $i1" &
+ eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_2} | ${PERL_BINARY} ${TOOL_CONVERT_ILLUMINA_SCORES} - | $MBUF_LARGE > $i2" &
else
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_1} | $MBUF_2G > $i1" &
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_2} | $MBUF_2G > $i2" &
+ eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_1} | $MBUF_LARGE > $i1" &
+ eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_2} | $MBUF_LARGE > $i2" &
fi
# done in the sourced script designed for bwa aln:
# i1=$DIR_SCRATCH/at_i1
@@ -98,17 +98,17 @@ then
# besides, bwa mem automagically reverts mate pair data
#cat $o1 ${TRIM_STEP} ${REVERSE_STEP} | $MBUF_2G > $FNPIPE1 &
#cat $o2 ${TRIM_STEP} ${REVERSE_STEP} | $MBUF_2G > $FNPIPE2 &
- cat $o1 | $MBUF_2G > $FNPIPE1 &
- cat $o2 | $MBUF_2G > $FNPIPE2 &
+ cat $o1 | $MBUF_LARGE > $FNPIPE1 &
+ cat $o2 | $MBUF_LARGE > $FNPIPE2 &
elif [ "${qualityScore}" = "illumina" ] # bwa mem has no possibility to convert Illumina 1.3 scores
then
true & procTrim=$! # dummy process id
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_1} | ${PERL_BINARY} ${TOOL_CONVERT_ILLUMINA_SCORES} - | $MBUF_2G > $FNPIPE1" &
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_2} | ${PERL_BINARY} ${TOOL_CONVERT_ILLUMINA_SCORES} - | $MBUF_2G > $FNPIPE2" &
+ eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_1} | ${PERL_BINARY} ${TOOL_CONVERT_ILLUMINA_SCORES} - | $MBUF_LARGE > $FNPIPE1" &
+ eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_2} | ${PERL_BINARY} ${TOOL_CONVERT_ILLUMINA_SCORES} - | $MBUF_LARGE > $FNPIPE2" &
else
true & procTrim=$! # dummy process id
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_1} | $MBUF_2G > $FNPIPE1" &
- eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_2} | $MBUF_2G > $FNPIPE2" &
+ eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_1} | $MBUF_LARGE > $FNPIPE1" &
+ eval "${UNZIPTOOL} ${UNZIPTOOL_OPTIONS} ${RAW_SEQ_2} | $MBUF_LARGE > $FNPIPE2" &
fi
INPUT_PIPES=""
@@ -129,7 +129,7 @@ fi
(${TOOL_COVERAGE_QC_D_IMPL} --alignmentFile=${NP_COVERAGEQC_IN} --outputFile=${FILENAME_GENOME_COVERAGE}.tmp --processors=1 --basequalCutoff=${BASE_QUALITY_CUTOFF} --ungappedSizes=${CHROM_SIZES_FILE}) & procIDGenomeCoverage=$!
# read bins
-(set -o pipefail; ${TOOL_GENOME_COVERAGE_D_IMPL} --alignmentFile=${NP_READBINS_IN} --outputFile=/dev/stdout --processors=4 --mode=countReads --windowSize=${WINDOW_SIZE} | $MBUF_100M | ${PERL_BINARY} ${TOOL_FILTER_READ_BINS} - ${CHROM_SIZES_FILE} > ${FILENAME_READBINS_COVERAGE}.tmp) & procIDReadbinsCoverage=$!
+(set -o pipefail; ${TOOL_GENOME_COVERAGE_D_IMPL} --alignmentFile=${NP_READBINS_IN} --outputFile=/dev/stdout --processors=4 --mode=countReads --windowSize=${WINDOW_SIZE} | $MBUF_SMALL | ${PERL_BINARY} ${TOOL_FILTER_READ_BINS} - ${CHROM_SIZES_FILE} > ${FILENAME_READBINS_COVERAGE}.tmp) & procIDReadbinsCoverage=$!
# use sambamba for flagstats
${SAMBAMBA_FLAGSTATS_BINARY} flagstat -t 1 "$NP_FLAGSTATS" > "$tempFlagstatsFile" & procIDFlagstat=$!
@@ -138,14 +138,14 @@ if [[ ${bamFileExists} == true ]]
then
echo "the BAM file already exists, re-creating other output files."
# make all the pipes
- (cat ${FILENAME_SORTED_BAM} | ${MBUF_2G} | tee ${NP_COVERAGEQC_IN} ${NP_READBINS_IN} ${NP_FLAGSTATS} | ${SAMBAMBA_BINARY} view /dev/stdin | ${MBUF_2G} > $NP_COMBINEDANALYSIS_IN) & procIDOutPipe=$!
+ (cat ${FILENAME_SORTED_BAM} | ${MBUF_LARGE} | tee ${NP_COVERAGEQC_IN} ${NP_READBINS_IN} ${NP_FLAGSTATS} | ${SAMBAMBA_BINARY} view /dev/stdin | ${MBUF_LARGE} > $NP_COMBINEDANALYSIS_IN) & procIDOutPipe=$!
else
if [[ "$ON_CONVEY" == true ]]
then # we have to use sambamba and cannot make an index (because sambamba does not work with a pipe)
# Here, we use always use the local scratch (${RODDY_SCRATCH}) for sorting!
useBioBamBamSort=false;
- (set -o pipefail; ${BWA_ACCELERATED_BINARY} mem ${BWA_MEM_CONVEY_ADDITIONAL_OPTIONS} -R "@RG\tID:${ID}\tSM:${SM}\tLB:${LB}\tPL:ILLUMINA" $BWA_MEM_OPTIONS ${INDEX_PREFIX} ${INPUT_PIPES} 2> $FILENAME_BWA_LOG | $MBUF_2G | tee $NP_COMBINEDANALYSIS_IN | \
- ${SAMBAMBA_BINARY} view -f bam -S -l 0 -t 8 /dev/stdin | $MBUF_2G | \
+ (set -o pipefail; ${BWA_ACCELERATED_BINARY} mem ${BWA_MEM_CONVEY_ADDITIONAL_OPTIONS} -R "@RG\tID:${ID}\tSM:${SM}\tLB:${LB}\tPL:ILLUMINA" $BWA_MEM_OPTIONS ${INDEX_PREFIX} ${INPUT_PIPES} 2> $FILENAME_BWA_LOG | $MBUF_LARGE | tee $NP_COMBINEDANALYSIS_IN | \
+ ${SAMBAMBA_BINARY} view -f bam -S -l 0 -t 8 /dev/stdin | $MBUF_LARGE | \
tee ${NP_FLAGSTATS} | \
${SAMBAMBA_BINARY} sort --tmpdir=${RODDY_SCRATCH} -l 9 -t ${CONVEY_SAMBAMBA_SAMSORT_THREADS} -m ${CONVEY_SAMBAMBA_SAMSORT_MEMSIZE} /dev/stdin -o /dev/stdout 2> $FILENAME_SORT_LOG | \
tee ${NP_COVERAGEQC_IN} ${NP_READBINS_IN} > $tempSortedBamFile; echo $? > ${DIR_TEMP}/${bamname}_ec) & procID_MEMSORT=$!
@@ -157,8 +157,8 @@ else
NP_SORT_ERRLOG="$RODDY_SCRATCH/NP_SORT_ERRLOG"
mkfifo $NP_SORT_ERRLOG ${NP_SAMTOOLS_INDEX_IN}
${SAMTOOLS_BINARY} index ${NP_SAMTOOLS_INDEX_IN} ${tempBamIndexFile} & procID_IDX=$!
- (set -o pipefail; ${BWA_BINARY} mem -t ${BWA_MEM_THREADS} -R "@RG\tID:${ID}\tSM:${SM}\tLB:${LB}\tPL:ILLUMINA" $BWA_MEM_OPTIONS ${INDEX_PREFIX} ${INPUT_PIPES} 2> $FILENAME_BWA_LOG | $MBUF_2G | tee $NP_COMBINEDANALYSIS_IN | \
- ${SAMTOOLS_BINARY} view -uSbh - | $MBUF_2G | \
+ (set -o pipefail; ${BWA_BINARY} mem -t ${BWA_MEM_THREADS} -R "@RG\tID:${ID}\tSM:${SM}\tLB:${LB}\tPL:ILLUMINA" $BWA_MEM_OPTIONS ${INDEX_PREFIX} ${INPUT_PIPES} 2> $FILENAME_BWA_LOG | $MBUF_LARGE | tee $NP_COMBINEDANALYSIS_IN | \
+ ${SAMTOOLS_BINARY} view -uSbh - | $MBUF_LARGE | \
${SAMTOOLS_BINARY} sort -@ 8 -m ${SAMPESORT_MEMSIZE} -o - ${tempFileForSort} 2>$NP_SORT_ERRLOG | \
tee ${NP_COVERAGEQC_IN} ${NP_READBINS_IN} ${NP_FLAGSTATS} ${NP_SAMTOOLS_INDEX_IN} > ${tempSortedBamFile}; echo $? > ${DIR_TEMP}/${bamname}_ec) & procID_MEMSORT=$!
# filter samtools error log
@@ -171,8 +171,8 @@ else
(cat ${NP_BAMSORT} | tee ${NP_COVERAGEQC_IN} ${NP_READBINS_IN} ${NP_FLAGSTATS} > ${tempSortedBamFile}) & procIDview=$!
# Output sam to separate named pipe
# Rewrite to a bamfile
- (set -o pipefail; ${BWA_BINARY} mem -t ${BWA_MEM_THREADS} -R "@RG\tID:${ID}\tSM:${SM}\tLB:${LB}\tPL:ILLUMINA" $BWA_MEM_OPTIONS ${INDEX_PREFIX} ${INPUT_PIPES} 2> $FILENAME_BWA_LOG | $MBUF_2G | tee $NP_COMBINEDANALYSIS_IN | \
- ${SAMTOOLS_BINARY} view -uSbh - | $MBUF_2G | \
+ (set -o pipefail; ${BWA_BINARY} mem -t ${BWA_MEM_THREADS} -R "@RG\tID:${ID}\tSM:${SM}\tLB:${LB}\tPL:ILLUMINA" $BWA_MEM_OPTIONS ${INDEX_PREFIX} ${INPUT_PIPES} 2> $FILENAME_BWA_LOG | $MBUF_LARGE | tee $NP_COMBINEDANALYSIS_IN | \
+ ${SAMTOOLS_BINARY} view -uSbh - | $MBUF_LARGE | \
${BAMSORT_BINARY} O=${NP_BAMSORT} level=1 inputthreads=2 outputthreads=2 \
index=1 indexfilename=${tempBamIndexFile} calmdnm=1 calmdnmrecompindetonly=1 calmdnmreference=${INDEX_PREFIX} \
tmpfile=${tempFileForSort} 2> $FILENAME_SORT_LOG; echo $? > ${DIR_TEMP}/ec_bbam) & procIDBamsort=$!
diff --git a/resources/analysisTools/qcPipeline/workflowLib.sh b/resources/analysisTools/qcPipeline/workflowLib.sh
index 16edfd5..9897d24 100755
--- a/resources/analysisTools/qcPipeline/workflowLib.sh
+++ b/resources/analysisTools/qcPipeline/workflowLib.sh
@@ -1,3 +1,8 @@
+#
+# Copyright (c) 2019 German Cancer Research Center (DKFZ).
+#
+# Distributed under the MIT License (license terms are at https://github.com/DKFZ-ODCF/AlignmentAndQCWorkflows).
+#
##############################################################################
## Domain-specific code (maybe put this into a dedicated library file)
##############################################################################
@@ -7,6 +12,7 @@
source "$TOOL_BASH_LIB"
+WORKFLOWLIB___ERREXIT=$(if [[ $SHELLOPTS =~ "errexit" ]]; then echo "errexit"; fi)
WORKFLOWLIB___SHELL_OPTIONS=$(set +o)
set +o verbose
set +o xtrace
@@ -33,13 +39,13 @@ mbuf () {
local bufferSize="$1"
shift
assertNonEmpty "$bufferSize" "No buffer size defined for mbuf()" || return $?
- "$MBUFFER_BINARY" -m "$bufferSize" -q -l /dev/null ${@}
+ "$MBUFFER_BINARY" -m "$bufferSize" -q -l /dev/null "$@"
}
## The return the directory, to which big temporary files should be written, e.g. for sorting.
getBigScratchDirectory () {
- local suggestedLocation="${1}"
+ local suggestedLocation="${1:-}"
local scratchDir
if [[ "${useRoddyScratchAsBigFileScratch:-false}" == true ]]; then
scratchDir="${RODDY_SCRATCH}"
@@ -135,7 +141,7 @@ removeRoddyBigScratch () {
checkBamIsComplete () {
local bamFile="${1:?No BAM file given}"
- local result
+ local result # AFAIR the assigment and declaration together did not work; presumably because of a Bash bug!
result=$("$TOOL_BAM_IS_COMPLETE" "$bamFile")
if [[ $? ]]; then
echo "BAM is terminated! $bamFile" >> /dev/stderr
@@ -144,4 +150,126 @@ checkBamIsComplete () {
fi
}
-eval "$WORKFLOWLIB___SHELL_OPTIONS"
\ No newline at end of file
+
+
+
+#############################################################################
+# Linear pipe management
+#############################################################################
+
+# Make a name for a read and a given pipe-name.
+mkPairedPipeName() {
+ local readNo="${1:?No 1/2 as read identifier}"
+ local pipeName="${2:?No pipe basename}"
+ if [[ $readNo -ne 1 && $readNo -ne 2 ]]; then
+ throw 151 "Read number must be 1 or 2. Got '$readNo'"
+ fi
+ echo "r${readNo}_${pipeName}"
+}
+
+# Make and register a pair of named pipes from the pipe-name.
+mkPipePairSource() {
+ local pipeName="${1:?Named-pipe basename}"
+ mkPipeSource $(mkPairedPipeName 1 "$pipeName")
+ mkPipeSource $(mkPairedPipeName 2 "$pipeName")
+}
+
+# Get the path to the pipe named pipename for read 1 or 2.
+getPairedPipeEndPath() {
+ local readNo="${1:?No read-number}"
+ local pipeName="${2:?Named-pipe basename}"
+ getPipeEndPath $(mkPairedPipeName "$readNo" "$pipeName")
+}
+
+# Extend a pair of pipes by running a command with the interface "command inpipe1 inpipe2 outpipe1 outpipe2 @rest".
+# The pipeline end is automatically progressed. Call like this:
+#
+# extendPipePair $pipeName $tag -- commandName @restArgs
+#
+# The "--" is optional.
+extendPipePair() {
+ local pipeName="${1:?Named-pipe base path}"
+ local tag="${2:?No processing step tag}"
+ shift 2
+ if [[ "$1" == "--" ]]; then
+ shift
+ fi
+ local command="${1:?No command/function}"
+ shift
+ declare -a rest=("$@")
+
+ local pipe1Name=$(mkPairedPipeName 1 "$pipeName")
+ local pipe2Name=$(mkPairedPipeName 2 "$pipeName")
+
+ local r1_inpipe=$(getPipeEndPath "$pipe1Name")
+ updatePipeEndPath "$pipe1Name" "$tag"
+ local r1_outpipe=$(getPipeEndPath "$pipe1Name")
+
+ local r2_inpipe=$(getPipeEndPath "$pipe2Name")
+ updatePipeEndPath "$pipe2Name" "$tag"
+ local r2_outpipe=$(getPipeEndPath "$pipe2Name")
+
+ # Bash SUCKS! Empty arrays do give an error with set -u with Bash < 4.4, but -v varName still succeeds! Therefore test the content, here.
+ if [[ "${rest:-}" != "" ]]; then
+ "$command" "$r1_inpipe" "$r2_inpipe" "$r1_outpipe" "$r2_outpipe" "${rest[@]}" & registerPid
+ else
+ "$command" "$r1_inpipe" "$r2_inpipe" "$r1_outpipe" "$r2_outpipe" & registerPid
+ fi
+}
+
+
+
+#############################################################################
+
+getFastqAsciiStream() {
+ local name="${1:?No FASTQ filename given}"
+ $UNZIPTOOL $UNZIPTOOL_OPTIONS "$name"
+}
+
+reorderUndirectionalReads() {
+ local r1Input="${1:?No input R1}"
+ local r2Input="${2:?No input R2}"
+ local r1Output="${3:?No output R1}"
+ local r2Output="${4:?No output R2}"
+ "$PYTHON_BINARY" "$TOOL_UNIDIRECTIONAL_WGBS_READ_REORDERING" \
+ --input_ascii \
+ --R1_in "$r1Input" \
+ --R2_in "$r2Input" \
+ --output_ascii \
+ --R1_out "$r1Output" \
+ --R2_out "$r2Output" \
+ --R1_unassigned /dev/null \
+ --R2_unassigned /dev/null
+}
+
+toIlluminaScore() {
+ local inFile="${1:?No input file}"
+ local outFile="${2:?No output file}"
+ "$PERL_BINARY" "$TOOL_CONVERT_ILLUMINA_SCORES" "$inFile" > "$outFile"
+}
+
+methylCfqconv() {
+ local inFile="${1:?No input file}"
+ local outFile="${2:?No output file}"
+ local readNo="${3:?No read number}"
+ "$PYTHON_BINARY" "$TOOL_METHYL_C_TOOLS" fqconv "-$readNo" "$inFile" "$outFile"
+}
+
+trimmomatic() {
+ local input1="${1:?No R1 input}"
+ local input2="${2:?No R2 input}"
+ local output1="${3:?No R1 output}"
+ local output2="${4:?No R2 output}"
+
+ local u1=/dev/null
+ local u2=/dev/null
+
+ "$JAVA_BINARY" -jar "$TOOL_ADAPTOR_TRIMMING" $ADAPTOR_TRIMMING_OPTIONS_0 "$input1" "$input2" "$output1" "$u1" "$output2" "$u2" $ADAPTOR_TRIMMING_OPTIONS_1
+}
+
+
+
+eval "$WORKFLOWLIB___SHELL_OPTIONS"
+if [[ "$WORKFLOWLIB___ERREXIT" == "errexit" ]]; then
+ set -e
+fi
diff --git a/resources/configurationFiles/analysisBisulfiteCore.xml b/resources/configurationFiles/analysisBisulfiteCore.xml
index 74afbea..a2f72f6 100644
--- a/resources/configurationFiles/analysisBisulfiteCore.xml
+++ b/resources/configurationFiles/analysisBisulfiteCore.xml
@@ -22,6 +22,8 @@
description="true: Trim Adaptor sequences before alignment. If true, then CLIP_INDEX has to be set; false: Don't trim"/>
+
+
+
diff --git a/resources/configurationFiles/analysisQC.xml b/resources/configurationFiles/analysisQC.xml
index 74489a3..2f80077 100644
--- a/resources/configurationFiles/analysisQC.xml
+++ b/resources/configurationFiles/analysisQC.xml
@@ -81,6 +81,8 @@
+
+
diff --git a/resources/tests/analysisTools/qcPipeline/bashLib.sh b/resources/tests/analysisTools/qcPipeline/bashLib.sh
new file mode 100644
index 0000000..2c5308d
--- /dev/null
+++ b/resources/tests/analysisTools/qcPipeline/bashLib.sh
@@ -0,0 +1,100 @@
+#
+# Copyright (c) 2018 German Cancer Research Center (DKFZ).
+#
+# Distributed under the MIT License (license terms are at https://github.com/DKFZ-ODCF/AlignmentAndQCWorkflows).
+#
+
+source ${TOOL_BASH_LIB:?No TOOL_BASHLIB}
+
+testShellIsInteractive() {
+ export -f shellIsInteractive
+ assertEquals false "$(bash -c shellIsInteractive)"
+ assertEquals true "$(bash -i -c shellIsInteractive)"
+}
+
+testPrintErrout() {
+ assertEquals "Error(1): hola" "$(2>&1 errout 1 'hola')"
+}
+
+testAssertNonEmpty() {
+ assertEquals "" "$(bash -i -c 'assertNonEmpty 2> /dev/null')"
+ assertEquals "" "$(assertNonEmpty 'hola')"
+}
+
+testStringJoin() {
+ assertEquals "" "$(stringJoin ',')"
+ assertEquals "A:b:c" "$(stringJoin ':' A b c)"
+ assertEquals "a,B" "$(stringJoin ',' a B)"
+}
+
+
+
+testMkPipePath() {
+ assertEquals "$_pipePath/test" "$(mkPipePath 'test')"
+}
+
+setupPipePath() {
+ initPipeEnds
+ setUp_BashSucksVersion
+ _pipePath=$(mktemp -d /tmp/bashlibTest_XXXXXX)
+}
+
+teardownPipePath() {
+ rm -rf "$_pipePath"
+ unset _pipePath
+ unset _pipeEnds
+ cleanUp_BashSucksVersion
+}
+
+testSetGetPipeEndPath() {
+ setupPipePath
+ test getPipeEndPath 'doesnotexist'
+ assertFalse "Error exit on non-existent pipe-name access" $?
+ setPipeEndPath "test" "/a/b/c"
+ assertEquals "/a/b/c" "$(getPipeEndPath 'test')"
+ teardownPipePath
+}
+
+testUpdatePipeEndPath() {
+ setupPipePath
+ updatePipeEndPath "test" "tag1"
+ local sourcePath1=$(getPipeEndPath 'test')
+ updatePipeEndPath "test" "tag2"
+ local sourcePath2=$(getPipeEndPath 'test')
+ assertNotEquals "$sourcePath1" "$sourcePath2"
+ test -p "$sourcePath1"
+ assertTrue "$sourcePath1 exists" $?
+ test -p "$sourcePath2"
+ assertTrue "$sourcePath2 exists" $?
+ assertEquals "$sourcePath2" "$(getPipeEndPath 'test')"
+ teardownPipePath
+}
+
+testMkPipeSource() {
+ setupPipePath
+ mkPipeSource "test"
+ test -p $(getPipeEndPath 'test')
+ assertTrue "source pipe end exists" $?
+ teardownPipePath
+}
+
+echoInput() {
+ cat "$1" > "$2"
+}
+
+
+testExtendPipe() {
+ setupPipePath
+ mkPipeSource "test"
+ local sourcePipe=$(getPipeEndPath "test")
+ extendPipe "test" "step1" -- echoInput
+ extendPipe "test" "step2" echoInput
+ echo "hallo" > "$sourcePipe" &
+ local result=$(cat $(getPipeEndPath "test"))
+
+ assertEquals "hallo" "$result"
+ teardownPipePath
+}
+
+
+source ${SHUNIT2:?Oops}
\ No newline at end of file
diff --git a/resources/tests/analysisTools/qcPipeline/workflowLib.sh b/resources/tests/analysisTools/qcPipeline/workflowLib.sh
new file mode 100644
index 0000000..8023504
--- /dev/null
+++ b/resources/tests/analysisTools/qcPipeline/workflowLib.sh
@@ -0,0 +1,121 @@
+#
+# Copyright (c) 2018 German Cancer Research Center (DKFZ).
+#
+# Distributed under the MIT License (license terms are at https://github.com/DKFZ-ODCF/AlignmentAndQCWorkflows).
+#
+
+source ${TOOL_WORKFLOW_LIB:?No TOOL_WORKFLOW_LIB}
+
+
+testMarkWithPicard() {
+ assertFalse 1 markWithPicard
+
+ local markDuplicatesVariant=picard
+ assertTrue 2 markWithPicard
+ local markDuplicatesVariant=sambamba
+ assertFalse 3 markWithPicard
+
+ unset markDuplicatesVariant
+ local useBioBamBamMarkDuplicates=false
+ assertTrue 4 markWithPicard
+}
+
+testMarkWithSambamba() {
+ assertFalse markWithSambamba
+ local markDuplicatesVariant=sambamba
+ assertTrue markWithSambamba
+}
+
+testMarkWithBiobambam() {
+ assertFalse markWithBiobambam
+
+ local markDuplicatesVariant=biobambam
+ assertTrue markWithBiobambam
+ local useBiobambamMarkDuplicates=false
+ assertTrue markWithBiobambam
+}
+
+
+testGetBigScratchDirectory() {
+ local outputAnalysisBaseDirectory=/tmp/blub
+ local RODDY_SCRATCH=/tmp/blabla
+
+ assertEquals $outputAnalysisBaseDirectory/tmp "$(getBigScratchDirectory)"
+ assertEquals /tmp/alt "$(getBigScratchDirectory /tmp/alt)"
+
+ local useRoddyScratchAsBigFileScratch=true
+ assertEquals $RODDY_SCRATCH "$(getBigScratchDirectory)"
+}
+
+testAnalysisType() {
+ assertEquals genome "$(analysisType)"
+ local runExomeAnalysis=true
+ assertEquals exome "$(analysisType)"
+}
+
+
+testToIEqualsList() {
+ assertEquals "I=a I=b I=c " "$(toIEqualsList a b c)"
+}
+
+
+setupPipePath() {
+ initPipeEnds
+ _pipePath=$(mktemp -d /tmp/bashlibTest_XXXXXX)
+}
+
+teardownPipePath() {
+ rm -rf "$_pipePath"
+ unset _pipePath
+ unset _pipeEnds
+}
+
+
+testMkPairedPipeName() {
+ local name
+
+ name=$(mkPairedPipeName 1 tag)
+ assertEquals "r1_tag" "$name"
+
+ name=$(mkPairedPipeName 2 tag)
+ assertEquals "r2_tag" "$name"
+}
+
+testMkPipePair() {
+ setupPipePath
+ mkPipePairSource "test"
+ test -p $(getPairedPipeEndPath 1 "test")
+ assertTrue "First pipe in pair created" $?
+ test -p $(getPairedPipeEndPath 2 "test")
+ assertTrue "Second pipe in pair created" $?
+ teardownPipePath
+}
+
+reorder() {
+ cat "$1" <(echo "_was1") > "$4" &
+ cat "$2" <(echo "_was2") > "$3" &
+ wait
+}
+
+testExtendPipePair() {
+ setupPipePath
+ mkPipePairSource "test"
+
+ local source1=$(getPairedPipeEndPath 1 "test")
+ echo -n "hallo1" > "$source1" &
+
+ local source2=$(getPairedPipeEndPath 2 "test")
+ echo -n "hallo2" > "$source2" &
+
+ extendPipePair "test" "step1" -- reorder
+
+ local result1=$(cat $(getPairedPipeEndPath 1 "test"))
+ local result2=$(cat $(getPairedPipeEndPath 2 "test"))
+
+ assertEquals "hallo1_was1" "$result2"
+ assertEquals "hallo2_was2" "$result1"
+
+ teardownPipePath
+}
+
+source ${SHUNIT2:?Oops}
\ No newline at end of file
diff --git a/resources/tests/runTests.sh b/resources/tests/runTests.sh
new file mode 100755
index 0000000..b5fab02
--- /dev/null
+++ b/resources/tests/runTests.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+#
+# Copyright (c) 2018 German Cancer Research Center (DKFZ).
+#
+# Distributed under the MIT License (license terms are at https://github.com/DKFZ-ODCF/AlignmentAndQCWorkflows).
+#
+
+currentDir=$(dirname $(readlink -f "$0"))
+runner=${SHUNIT2:?No SHUNIT2 variable. Point it to the shunit2 of https://github.com/kward/shunit2.git.}
+
+export SRC_ROOT=$(readlink -f $currentDir/../)
+export TOOL_BASH_LIB=$SRC_ROOT/analysisTools/qcPipeline/bashLib.sh
+export TOOL_WORKFLOW_LIB=$SRC_ROOT/analysisTools/qcPipeline/workflowLib.sh
+
+for t in $(find $currentDir/ -mindepth 2 -type f -name "*.sh"); do
+ 2>&1 echo "Running tests in '$t' ..."
+ bash $t
+done
+
+