Skip to content
Taejoon Kwon edited this page Jan 22, 2015 · 12 revisions

Table of Contents

Mapping single-end reads with 'bwa mem'

#!/bin/bash
BWA="$HOME/src.HTseq/bwa/0.7.10/bwa"
SAM2HIT="$HOME/git/HTseq-toolbox/sam/sam-to-sam_hit.py"

THREADS=12

DB="$WORK/PSEAE.db/bwadb/PSEAE_PA14_genome"
DBNAME=$(basename $DB)
DBNAME=${DBNAME/.fa/}

for FQ in $(ls ../fastq.tx/*fastq)
do
  SAM=$(basename $FQ)
  SAM=${SAM/.called.fastq/_called}
  SAM=$SAM"."$DBNAME".bwa_mem.sam"

  $BWA mem -t $THREADS $DB $FQ > $SAM
  $SAM2HIT $SAM
done

Filter best hits

#!/bin/bash
SAM2BEST="$HOME/git/HTseq-toolbox/sam/sam-to-sam_best.py"

for SAM in $(ls *sam_hit)
do
  $SAM2BEST $SAM
done

Make t_base_log

#!/bin/bash
TLOG="$HOME/git/HTseq-toolbox/sam/sam-to-t_base_log.py"

for SAM in $(ls *sam_hit_best)
do
  $TLOG $SAM
done
  • Output t_base_log file format
Target	Pos	Count+	Count-	Quadruple	Border	A  T  G  C
#Target: PSEAE_PA14_NC_008463
PSEAE_PA14|NC_008463	0	2	5	7	0	0  7  0  0
PSEAE_PA14|NC_008463	1	2	5	7	0	0  7  0  0
PSEAE_PA14|NC_008463	2	2	5	7	0	0  7  0  0

Calculate gene_tpm with t_base_log and GFF

#!/bin/bash
GENE_TPM="$HOME/git/HTseq-toolbox/align/t_base_log+gff-to-gene_tpm.py"
GFF = '/work/PSEAE_net/genome/PSEAE_PA14_genome.gff3.gz'

for LOG in $(ls *log.gz)
do
  OUT=${LOG/.t_base_log.gz/}".gene_tpm"
  echo $OUT
  $GENE_TPM $LOG $GFF > $OUT
done
Clone this wiki locally