Skip to content

Commit

Permalink
Make suitable for Brew packaging
Browse files Browse the repository at this point in the history
  • Loading branch information
tseemann committed Apr 4, 2015
1 parent 7ea9fd6 commit 0603fb2
Show file tree
Hide file tree
Showing 12 changed files with 355 additions and 180 deletions.
74 changes: 74 additions & 0 deletions bin/afa-pairwise.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Bio::SeqIO;

my(@Options, $verbose, $sep);
setOptions();

my %seq;
my $afa = Bio::SeqIO->new(-fh=>\*ARGV, -format=>'fasta');
while (my $seq = $afa->next_seq) {
$seq{ $seq->id } = $seq->seq;
}

my @id = sort keys %seq;

print join($sep, 'ID', @id),"\n";
for my $i (0 .. $#id) {
my @row = ($id[$i]);
for my $j (0 .. $#id) {
# my $d = $i==$j ? 0 : distance( $seq{ $id[$i] }, $seq{ $id[$j] } );
# push @row, $d;
# push @row, distance( $seq{ $id[$i] }, $seq{ $id[$j] } );
my $d = distance( $seq{ $id[$i] }, $seq{ $id[$j] } );
push @row, $d;
}
print join($sep, @row), "\n";
}

sub distance {
my($s, $t) = @_;
my $L = length($s);
die "Strings not same length!" if $L != length($t);
my $diff = 0;
for my $i (0 .. $L-1) {
$diff++ if substr($s,$i,1) ne substr($t,$i,1);
}
return $diff;
}

#----------------------------------------------------------------------
# Option setting routines

sub setOptions {
use Getopt::Long;

@Options = (
{OPT=>"help", VAR=>\&usage, DESC=>"This help"},
{OPT=>"verbose!", VAR=>\$verbose, DEFAULT=>0, DESC=>"Verbose output"},
{OPT=>"sep=s", VAR=>\$sep, DEFAULT=>"\t", DESC=>"Output separator char"},
);

(!@ARGV) && (usage());

&GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage();

# Now setup default values.
foreach (@Options) {
if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) {
${$_->{VAR}} = $_->{DEFAULT};
}
}
}

sub usage {
print "Usage: $0 [options] <snps.aln>\n";
foreach (@Options) {
printf " --%-13s %s%s.\n",$_->{OPT},$_->{DESC},
defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : "";
}
exit(1);
}

#----------------------------------------------------------------------
124 changes: 124 additions & 0 deletions bin/fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env perl

use warnings;
use strict;

use List::Util qw(min max sum);
use List::MoreUtils qw(pairwise);
use Data::Dumper;

use constant SAMPLE_RATE => 1_000;

my(@Options, $quiet, $ref, $hist);
setOptions();

my $N=0;
my %len;
my $qv = '';
my $GC=0;

### http://en.wikipedia.org/wiki/FASTQ_format

@ARGV or die "Please provide some FASTQ files!";

for my $file (@ARGV) {
print STDERR "Opening: $file\n" unless $quiet;
open my $fh, "gzip -c -d -f \Q$file\E |";
while (<$fh>) {
my $seq = scalar(<$fh>);
chomp $seq;
$len{ length($seq) }++;
$GC += ($seq =~ tr/[GgCc]/[GgCc]/);
$N++;
scalar(<$fh>);
my $qual = scalar(<$fh>);
if ($N % SAMPLE_RATE == 0) {
chomp $qual;
$qv .= $qual;
print STDERR "\rProcessed: $N" unless $quiet;
}
}
print STDERR "\n" unless $quiet;
close $fh;
}

my @len = sort { $a <=> $b } map { $len{$_} ? $_ : () } keys %len;

my @k = keys %len;
my @v = values %len;
my $yield = sum( pairwise { $a * $b } @k, @v );

my %qv;
map { $qv{ord($_)}++ } (split m//, $qv);
#my $offset = max(keys %qv) > 73 ? 64 : 33;
my $quality = sum(map { $_ * $qv{$_} } keys %qv) / sum(values %qv);
#print STDERR Dumper(\%qv);
my $offset = $quality-64 < 20 ? 33 : 64;
$quality -= $offset;

my $maxcount = max(values %len);
my($mode) = grep { $len{$_}==$maxcount } (keys %len);

### Print out statistics

#printf "Nfiles\t%d\n", scalar(@ARGV);
printf "Files\t@ARGV\n";
printf "Reads\t%d\n", $N;
printf "Yield\t%d\n", $yield;
printf "GeeCee\t%.1f\n", (100*$GC/$yield);
printf "MinLen\t%d\n", $len[0];
printf "AvgLen\t%d\n", $yield/$N;
printf "MaxLen\t%d\n", $len[-1];
printf "ModeLen\t%d\n", $mode;
printf "Phred\t%s\n", $offset;
printf "AvgQual\t%.1f\n", $quality;

if ($ref) {
my $size = ($ref =~ m/^(\d+)$/ ? $1 : (-s $ref));
print STDERR "Calculating depth, using size $size\n";
printf "Depth\t%dx\n", $yield/$size;
}

if ($hist) {
for my $i ($len[0] .. $len[-1]) {
my $count = $len{$i} || 0;
my $bar = '#'x(int(($count*50/$maxcount)+1));
printf "Len-%d\t%d\t%s\n", $i, $count, $bar;
}
}

#----------------------------------------------------------------------
# Option setting routines

sub setOptions {
use Getopt::Long;

@Options = (
{OPT=>"help", VAR=>\&usage, DESC=>"This help"},
{OPT=>"quiet!", VAR=>\$quiet, DEFAULT=>0, DESC=>"Quiet mode: no progress output"},
{OPT=>"ref=s", VAR=>\$ref, DEFAULT=>'', DESC=>"Reference FASTA file OR size in bp"},
{OPT=>"hist", VAR=>\$hist, DEFAULT=>0, DESC=>"Length histogram"},
);

(!@ARGV) && (usage());

&GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage();

# Now setup default values.
foreach (@Options) {
if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) {
${$_->{VAR}} = $_->{DEFAULT};
}
}
}

sub usage {
print "Usage: $0 [options] <file.fq | file.fq.gz ...>\n";
foreach (@Options) {
printf " --%-13s %s%s.\n",$_->{OPT},$_->{DESC},
defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : "";
}
exit(1);
}

#----------------------------------------------------------------------
6 changes: 3 additions & 3 deletions bin/nullarbor.pl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# local modules

use FindBin;
use lib "$FindBin::RealBin/../lib";
use lib "$FindBin::RealBin/../perl5";
use Nullarbor::IsolateSet;
use Nullarbor::Logger qw(msg err);
use Nullarbor::Report;
Expand All @@ -24,7 +24,7 @@
# constants

my $EXE = "$FindBin::RealScript";
my $VERSION = '0.3';
my $VERSION = '0.4';
my $AUTHOR = 'Torsten Seemann <[email protected]>';

#-------------------------------------------------------------------
Expand Down Expand Up @@ -69,7 +69,7 @@
msg("This is $EXE $VERSION");
msg("Send complaints to $AUTHOR");

require_exe( qw'kraken snippy mlst fq abricate nw_display trimal FastTree convert pandoc afa-pairwise.pl' );
require_exe( qw'kraken snippy mlst fq abricate megahit nw_reroot nw_display trimal FastTree convert pandoc afa-pairwise.pl' );
require_perlmod( qw'Data::Dumper Moo Spreadsheet::Read SVG::Graph Bio::SeqIO File::Copy Time::Piece' );

if ($report) {
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion lib/Nullarbor/Report.pm → perl5/Nullarbor/Report.pm
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ sub generate {

msg("Generating $name report in: $outdir");
open my $fh, '>', "$outdir/index.md";
copy("$FindBin::Bin/../etc/nullarbor.css", "$outdir/");
copy("$FindBin::Bin/../conf/nullarbor.css", "$outdir/");

#...........................................................................................
# Load isolate list
Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions test/data/data.tab
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ genome01 data/genome01_R1.fastq data/genome01_R2.fastq
genome02 data/genome02_R1.fastq data/genome02_R2.fastq
genome03 data/genome03_R1.fastq data/genome03_R2.fastq
genome04 data/genome04_R1.fastq data/genome04_R2.fastq
ref data/ref_R1.fastq data/ref_R2.fastq
Loading

0 comments on commit 0603fb2

Please sign in to comment.