Skip to content

Commit

Permalink
Got (most of) the BWA methyl-seq code to run with set -e to improve e…
Browse files Browse the repository at this point in the history
…rror robustness and handling.
  • Loading branch information
vinjana committed Feb 18, 2019
1 parent 87b8cba commit adacdb4
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 46 deletions.
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
#!/bin/bash
#
# Copyright (c) 2018 German Cancer Research Center (DKFZ).
# Copyright (c) 2019 German Cancer Research Center (DKFZ).
#
# Distributed under the MIT License (license terms are at https://github.com/DKFZ-ODCF/AlignmentAndQCWorkflows).
#

source "$TOOL_WORKFLOW_LIB"

set -o pipefail
set -ue

source "$TOOL_WORKFLOW_LIB"

setUp_BashSucksVersion
trap cleanUp_BashSucksVersion EXIT

ID=${RUN}_${LANE}
SM=sample_${SAMPLE}_${PID}
ID="${RUN}_${LANE}"
SM="sample_${SAMPLE}_${PID}"

# RODDY_SCRATCH is used here. Is for PBS $PBS_SCRATCH_DIR/$PBS_JOBID, for SGE /tmp/roddyScratch/jobid
RODDY_BIG_SCRATCH=$(getBigScratchDirectory "${FILENAME_SORTED_BAM}_TEMP")
Expand All @@ -34,59 +35,61 @@ MBUF_LARGE="${MBUFFER_BINARY} -m ${MBUFFER_SIZE_LARGE} -q -l /dev/null"
mkfifo ${NP_READBINS_IN} ${NP_COVERAGEQC_IN} ${NP_COMBINEDANALYSIS_IN} ${NP_FLAGSTATS} ${NP_BWA_OUT} ${NP_BCONV_OUT}

bamname=`basename ${FILENAME_SORTED_BAM}`
INDEX_FILE=${FILENAME_SORTED_BAM}.bai
tempSortedBamFile=${FILENAME_SORTED_BAM}.tmp
tempFileForSort=${RODDY_BIG_SCRATCH}/${bamname}_forsorting
tempBamIndexFile=${FILENAME_SORTED_BAM}.tmp.bai
tempFlagstatsFile=${FILENAME_FLAGSTATS}.tmp
INDEX_FILE="$FILENAME_SORTED_BAM.bai"
tempSortedBamFile="$FILENAME_SORTED_BAM.tmp"
tempFileForSort="$RODDY_BIG_SCRATCH/${bamname}_forsorting"
tempBamIndexFile="$FILENAME_SORTED_BAM.tmp.bai"
tempFlagstatsFile="$FILENAME_FLAGSTATS.tmp"

# samtools sort may complain about truncated temp files and for each line outputs
# the error message. This happens when the same files are written at the same time,
# see http://sourceforge.net/p/samtools/mailman/samtools-help/thread/[email protected]/
NP_SORT_ERRLOG=${RODDY_SCRATCH}/NP_SORT_ERRLOG
FILENAME_SORT_LOG=${DIR_TEMP}/${bamname}_errlog_sort
NP_SORT_ERRLOG="$RODDY_SCRATCH/NP_SORT_ERRLOG"
FILENAME_SORT_LOG="$DIR_TEMP/${bamname}_errlog_sort"

NP_BAMSORT=${RODDY_SCRATCH}/NAMED_PIPE_BAMSORT
mkfifo ${NP_BAMSORT}
NP_BAMSORT="$RODDY_SCRATCH/NAMED_PIPE_BAMSORT"
mkfifo "$NP_BAMSORT"

# Create the following variable for error checking issues
TMP_FILE=${tempSortedBamFile}
TMP_FILE="$tempSortedBamFile"
# error tracking
FILENAME_BWA_LOG=${DIR_TEMP}/${bamname}_errlog_bwamem
FILENAME_BWA_EC=${DIR_TEMP}/${bamname}_ec
FILENAME_BWA_LOG="$DIR_TEMP/${bamname}_errlog_bwamem"
FILENAME_BWA_EC="$DIR_TEMP/${bamname}_ec"

if [[ -n "$RAW_SEQ_1" ]]; then
source "$TOOL_DEFAULT_PLUGIN_LIB"
setCompressionToolsBasedOnFileCompression "$RAW_SEQ_1"
fi

bamFileExists=false
# in case the BAM already exists, but QC files are missing, create these only
# In case the BAM already exists, but QC files are missing, create these only
# TODO: This logic currently is not directly/simply reflected. Instead there are multiple if branches. Consider refactoring.
if [[ -f ${FILENAME_SORTED_BAM} ]] && [[ -s ${FILENAME_SORTED_BAM} ]]
then
if [[ -f ${FILENAME_SORTED_BAM} ]] && [[ -s ${FILENAME_SORTED_BAM} ]]; then
checkBamIsComplete "$FILENAME_SORTED_BAM"
bamFileExists=true
fi

# test if one of the fastq files is a fake fastq file to simulate paired end sequencing in PIDs with mixed sequencing (single and paired end)
LENGTH_SEQ_1=$(getFastqAsciiStream "$RAW_SEQ_1" | head | wc -l)
LENGTH_SEQ_2=$(getFastqAsciiStream "$RAW_SEQ_2" | head | wc -l)
if [[ $LENGTH_SEQ_1 == 0 || $LENGTH_SEQ_2 == 0 ]]; then
useSingleEndProcessing=true
elif [[ $LENGTH_SEQ_1 == 0 && $LENGTH_SEQ_2 == 0 ]]; then
# Test whether one of the fastq files is a fake fastq file to simulate paired-end sequencing in PIDs with mixed sequencing (single- and paired-end)
declare -i LENGTH_SEQ_1=$(getFastqAsciiStream "$RAW_SEQ_1" | head | wc -l)
declare -i LENGTH_SEQ_2=$(getFastqAsciiStream "$RAW_SEQ_2" | head | wc -l)
if [[ $LENGTH_SEQ_1 -eq 0 ]] && [[ $LENGTH_SEQ_2 -eq 0 ]]; then
throw 1 "Both input files are empty: '$RAW_SEQ_1' and '$RAW_SEQ_2'"
elif [[ $LENGTH_SEQ_1 -eq 0 ]] || [[ $LENGTH_SEQ_2 -eq 0 ]]; then
useSingleEndProcessing=true
fi


# Determine the quality score
if [[ $LENGTH_SEQ_1 != 0 ]]; then
set +e # The following few lines fail with exit 141 due to unknown reasons is `set -e` is set.
if [[ $LENGTH_SEQ_1 -ne 0 ]]; then
qualityScore=$(getFastqAsciiStream "$RAW_SEQ_1" | "$PERL_BINARY" "$TOOL_SEQUENCER_DETECTION")
else
qualityScore=$(getFastqAsciiStream "$RAW_SEQ_2" | "$PERL_BINARY" "$TOOL_SEQUENCER_DETECTION")
fi
set -e

# make biobambam sort default
useBioBamBamSort=${useBioBamBamSort-true}
# Make biobambam sort default
useBioBamBamSort="${useBioBamBamSort:-true}"

# Default: Dummy process IDs to simplify downstream logic.
# TODO Remove after completely switching to PID registry system.
Expand Down Expand Up @@ -130,7 +133,7 @@ if [[ "$bamFileExists" == "false" ]]; then # we have to make the BAM
fi

# Try to read from pipes BEFORE they are filled.
# in all cases:
# In all cases:
# SAM output is piped to perl script that calculates various QC measures
(${PERL_BINARY} ${TOOL_COMBINED_BAM_ANALYSIS} -i ${NP_COMBINEDANALYSIS_IN} -a ${FILENAME_DIFFCHROM_MATRIX}.tmp -c ${CHROM_SIZES_FILE} -b ${FILENAME_ISIZES_MATRIX}.tmp -f ${FILENAME_EXTENDED_FLAGSTATS}.tmp -m ${FILENAME_ISIZES_STATISTICS}.tmp -o ${FILENAME_DIFFCHROM_STATISTICS}.tmp -p ${INSERT_SIZE_LIMIT} ) & procIDCBA=$!

Expand Down Expand Up @@ -200,7 +203,8 @@ fi
if [[ ${bamFileExists} == true ]]; then
wait $procIDOutPipe || throw 13 "Error from sambamba view pipe"
else # make sure to rename BAM file when it has been produced correctly
checkBwaLog "$TMP_FILE" "$FILENAME_BWA_LOG" "$FILENAME_SORT_LOG"
errorString="There was a non-zero exit code in the bwa mem - sort pipeline; exiting..."
source "$TOOL_BWA_ERROR_CHECKING_SCRIPT"
checkBamIsComplete "$tempSortedBamFile"
mv ${tempSortedBamFile} ${FILENAME_SORTED_BAM} || throw 36 "Could not move file"
# index is only created by samtools or biobambam when producing the BAM, it may be older than the BAM, so update time stamp
Expand Down
2 changes: 1 addition & 1 deletion resources/analysisTools/qcPipeline/PhredOrIllumina.pl
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@
# gunzip or awk always complain about broken pipe, so maybe that helps?
# "close" Closes the file or pipe associated with the file handle.
# close (STDIN);
exit;
exit 0;
14 changes: 11 additions & 3 deletions resources/analysisTools/qcPipeline/bashLib.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
#
# Copyright (c) 2019 German Cancer Research Center (DKFZ).
#
# Distributed under the MIT License (license terms are at https://github.com/DKFZ-ODCF/AlignmentAndQCWorkflows).
#
# Library of BASH function. Please import using
#
# source "$TOOL_BASH_LIB"


BASHLIB___ERREXIT=$(if [[ $SHELLOPTS =~ "errexit" ]]; then echo "errexit"; fi)
BASHLIB___SHELL_OPTIONS=$(set +o)
set +o verbose
set +o xtrace
Expand Down Expand Up @@ -339,8 +345,8 @@ extendPipe() {
updatePipeEndPath "$pipeName" "$tag"
local outpipe=$(getPipeEndPath "$pipeName")

# This &#"@! is because Bash SUCKS! Empty arrays do not exist Bash < 4.4
if [[ -v rest ]]; then
# Bash SUCKS! Empty arrays do give an error with set -u with Bash < 4.4, but -v varName still succeeds! Therefore test the content, here.
if [[ "${rest:-}" != "" ]]; then
"$command" "$inpipe" "$outpipe" "${rest[@]}" & registerPid
else
"$command" "$inpipe" "$outpipe" & registerPid
Expand All @@ -350,4 +356,6 @@ extendPipe() {


eval "$BASHLIB___SHELL_OPTIONS"

if [[ "$BASHLIB___ERREXIT" == "errexit" ]]; then
set -e
fi
10 changes: 5 additions & 5 deletions resources/analysisTools/qcPipeline/bwaErrorChecking.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,20 @@ then
fi

# TODO Check that?
[[ "$?" != "0" ]] && echo $errorString && exit 32
[[ $? -ne 0 ]] && echo $errorString && exit 32
fi

# test for success before renaming!
success=`grep " fault" ${FILENAME_BWA_LOG}`
success=$(grep " fault" ${FILENAME_BWA_LOG} || true)
[[ ! -z "$success" ]] && echo found segfault $success in bwa logfile! && exit 31

# Barbara Aug 10 2015: I can't remember what bwa aln and sampe reported as "error".
# bluebee bwa has "error_count" in bwa-0.7.8-r2.05; and new in bwa-0.7.8-r2.06: "WARNING:top_bs_ke_be_hw: dummy be execution, only setting error."
# these are not errors that would lead to fail, in contrast to "ERROR: Bus error"
success=`grep -i "error" ${FILENAME_BWA_LOG} | grep -v "error_count" | grep -v "dummy be execution"`
success=$(grep -i "error" ${FILENAME_BWA_LOG} | grep -v "error_count" | grep -v "dummy be execution" || true)
[[ ! -z "$success" ]] && echo found error $success in bwa logfile! && exit 36

success=`grep "Abort. Sorry." ${FILENAME_BWA_LOG}`
success=$(grep "Abort. Sorry." ${FILENAME_BWA_LOG} || true)
[[ ! -z "$success" ]] && echo found error $success in bwa logfile! && exit 37


Expand All @@ -46,7 +46,7 @@ success=`grep "Abort. Sorry." ${FILENAME_BWA_LOG}`
echo "testing $FILENAME_SORT_LOG"
if [ ! -z $FILENAME_SORT_LOG ] && [ -f $FILENAME_SORT_LOG ]
then
success=`grep "is truncated. Continue anyway." $FILENAME_SORT_LOG`
success=$(grep "is truncated. Continue anyway." "$FILENAME_SORT_LOG" || true)
[[ ! -z "$success" ]] && echo found error $success in samtools sorting logfile! && exit 38
else
echo "there is no samtools sort log file"
Expand Down
21 changes: 15 additions & 6 deletions resources/analysisTools/qcPipeline/workflowLib.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#
# Copyright (c) 2019 German Cancer Research Center (DKFZ).
#
# Distributed under the MIT License (license terms are at https://github.com/DKFZ-ODCF/AlignmentAndQCWorkflows).
#
##############################################################################
## Domain-specific code (maybe put this into a dedicated library file)
##############################################################################
Expand All @@ -7,6 +12,7 @@

source "$TOOL_BASH_LIB"

WORKFLOWLIB___ERREXIT=$(if [[ $SHELLOPTS =~ "errexit" ]]; then echo "errexit"; fi)
WORKFLOWLIB___SHELL_OPTIONS=$(set +o)
set +o verbose
set +o xtrace
Expand Down Expand Up @@ -135,7 +141,7 @@ removeRoddyBigScratch () {

checkBamIsComplete () {
local bamFile="${1:?No BAM file given}"
local result
local result # AFAIR the assigment and declaration together did not work; presumably because of a Bash bug!
result=$("$TOOL_BAM_IS_COMPLETE" "$bamFile")
if [[ $? ]]; then
echo "BAM is terminated! $bamFile" >> /dev/stderr
Expand Down Expand Up @@ -203,8 +209,8 @@ extendPipePair() {
updatePipeEndPath "$pipe2Name" "$tag"
local r2_outpipe=$(getPipeEndPath "$pipe2Name")

# This &#"@! is because Bash SUCKS! Empty arrays do not exist Bash < 4.4
if [[ -v rest ]]; then
# Bash SUCKS! Empty arrays do give an error with set -u with Bash < 4.4, but -v varName still succeeds! Therefore test the content, here.
if [[ "${rest:-}" != "" ]]; then
"$command" "$r1_inpipe" "$r2_inpipe" "$r1_outpipe" "$r2_outpipe" "${rest[@]}" & registerPid
else
"$command" "$r1_inpipe" "$r2_inpipe" "$r1_outpipe" "$r2_outpipe" & registerPid
Expand All @@ -225,7 +231,7 @@ reorderUndirectionalReads() {
local r2Input="${2:?No input R2}"
local r1Output="${3:?No output R1}"
local r2Output="${4:?No output R2}"
"$TOOL_UNIDIRECTIONAL_WGBS_READ_REORDERING" \
"$PYTHON_BINARY" "$TOOL_UNIDIRECTIONAL_WGBS_READ_REORDERING" \
--input_ascii \
--R1_in "$r1Input" \
--R2_in "$r2Input" \
Expand Down Expand Up @@ -258,9 +264,12 @@ trimmomatic() {
local u1=/dev/null
local u2=/dev/null

"$TRIMMOMATIC_BINARY" $ADAPTOR_TRIMMING_OPTIONS_0 "$input1" "$input2" "$output1" "$u1" "$output2" "$u2" $ADAPTOR_TRIMMING_OPTIONS_1
"$JAVA_BINARY" -jar "$TOOL_ADAPTOR_TRIMMING" $ADAPTOR_TRIMMING_OPTIONS_0 "$input1" "$input2" "$output1" "$u1" "$output2" "$u2" $ADAPTOR_TRIMMING_OPTIONS_1
}



eval "$WORKFLOWLIB___SHELL_OPTIONS"
eval "$WORKFLOWLIB___SHELL_OPTIONS"
if [[ "$WORKFLOWLIB___ERREXIT" == "errexit" ]]; then
set -e
fi

0 comments on commit adacdb4

Please sign in to comment.