-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
355 additions
and
180 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#!/usr/bin/env perl | ||
use warnings; | ||
use strict; | ||
use Bio::SeqIO; | ||
|
||
my(@Options, $verbose, $sep); | ||
setOptions(); | ||
|
||
my %seq; | ||
my $afa = Bio::SeqIO->new(-fh=>\*ARGV, -format=>'fasta'); | ||
while (my $seq = $afa->next_seq) { | ||
$seq{ $seq->id } = $seq->seq; | ||
} | ||
|
||
my @id = sort keys %seq; | ||
|
||
print join($sep, 'ID', @id),"\n"; | ||
for my $i (0 .. $#id) { | ||
my @row = ($id[$i]); | ||
for my $j (0 .. $#id) { | ||
# my $d = $i==$j ? 0 : distance( $seq{ $id[$i] }, $seq{ $id[$j] } ); | ||
# push @row, $d; | ||
# push @row, distance( $seq{ $id[$i] }, $seq{ $id[$j] } ); | ||
my $d = distance( $seq{ $id[$i] }, $seq{ $id[$j] } ); | ||
push @row, $d; | ||
} | ||
print join($sep, @row), "\n"; | ||
} | ||
|
||
sub distance { | ||
my($s, $t) = @_; | ||
my $L = length($s); | ||
die "Strings not same length!" if $L != length($t); | ||
my $diff = 0; | ||
for my $i (0 .. $L-1) { | ||
$diff++ if substr($s,$i,1) ne substr($t,$i,1); | ||
} | ||
return $diff; | ||
} | ||
|
||
#---------------------------------------------------------------------- | ||
# Option setting routines | ||
|
||
sub setOptions { | ||
use Getopt::Long; | ||
|
||
@Options = ( | ||
{OPT=>"help", VAR=>\&usage, DESC=>"This help"}, | ||
{OPT=>"verbose!", VAR=>\$verbose, DEFAULT=>0, DESC=>"Verbose output"}, | ||
{OPT=>"sep=s", VAR=>\$sep, DEFAULT=>"\t", DESC=>"Output separator char"}, | ||
); | ||
|
||
(!@ARGV) && (usage()); | ||
|
||
&GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage(); | ||
|
||
# Now setup default values. | ||
foreach (@Options) { | ||
if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) { | ||
${$_->{VAR}} = $_->{DEFAULT}; | ||
} | ||
} | ||
} | ||
|
||
sub usage { | ||
print "Usage: $0 [options] <snps.aln>\n"; | ||
foreach (@Options) { | ||
printf " --%-13s %s%s.\n",$_->{OPT},$_->{DESC}, | ||
defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : ""; | ||
} | ||
exit(1); | ||
} | ||
|
||
#---------------------------------------------------------------------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
#!/usr/bin/env perl | ||
|
||
use warnings; | ||
use strict; | ||
|
||
use List::Util qw(min max sum); | ||
use List::MoreUtils qw(pairwise); | ||
use Data::Dumper; | ||
|
||
use constant SAMPLE_RATE => 1_000; | ||
|
||
my(@Options, $quiet, $ref, $hist); | ||
setOptions(); | ||
|
||
my $N=0; | ||
my %len; | ||
my $qv = ''; | ||
my $GC=0; | ||
|
||
### http://en.wikipedia.org/wiki/FASTQ_format | ||
|
||
@ARGV or die "Please provide some FASTQ files!"; | ||
|
||
for my $file (@ARGV) { | ||
print STDERR "Opening: $file\n" unless $quiet; | ||
open my $fh, "gzip -c -d -f \Q$file\E |"; | ||
while (<$fh>) { | ||
my $seq = scalar(<$fh>); | ||
chomp $seq; | ||
$len{ length($seq) }++; | ||
$GC += ($seq =~ tr/[GgCc]/[GgCc]/); | ||
$N++; | ||
scalar(<$fh>); | ||
my $qual = scalar(<$fh>); | ||
if ($N % SAMPLE_RATE == 0) { | ||
chomp $qual; | ||
$qv .= $qual; | ||
print STDERR "\rProcessed: $N" unless $quiet; | ||
} | ||
} | ||
print STDERR "\n" unless $quiet; | ||
close $fh; | ||
} | ||
|
||
my @len = sort { $a <=> $b } map { $len{$_} ? $_ : () } keys %len; | ||
|
||
my @k = keys %len; | ||
my @v = values %len; | ||
my $yield = sum( pairwise { $a * $b } @k, @v ); | ||
|
||
my %qv; | ||
map { $qv{ord($_)}++ } (split m//, $qv); | ||
#my $offset = max(keys %qv) > 73 ? 64 : 33; | ||
my $quality = sum(map { $_ * $qv{$_} } keys %qv) / sum(values %qv); | ||
#print STDERR Dumper(\%qv); | ||
my $offset = $quality-64 < 20 ? 33 : 64; | ||
$quality -= $offset; | ||
|
||
my $maxcount = max(values %len); | ||
my($mode) = grep { $len{$_}==$maxcount } (keys %len); | ||
|
||
### Print out statistics | ||
|
||
#printf "Nfiles\t%d\n", scalar(@ARGV); | ||
printf "Files\t@ARGV\n"; | ||
printf "Reads\t%d\n", $N; | ||
printf "Yield\t%d\n", $yield; | ||
printf "GeeCee\t%.1f\n", (100*$GC/$yield); | ||
printf "MinLen\t%d\n", $len[0]; | ||
printf "AvgLen\t%d\n", $yield/$N; | ||
printf "MaxLen\t%d\n", $len[-1]; | ||
printf "ModeLen\t%d\n", $mode; | ||
printf "Phred\t%s\n", $offset; | ||
printf "AvgQual\t%.1f\n", $quality; | ||
|
||
if ($ref) { | ||
my $size = ($ref =~ m/^(\d+)$/ ? $1 : (-s $ref)); | ||
print STDERR "Calculating depth, using size $size\n"; | ||
printf "Depth\t%dx\n", $yield/$size; | ||
} | ||
|
||
if ($hist) { | ||
for my $i ($len[0] .. $len[-1]) { | ||
my $count = $len{$i} || 0; | ||
my $bar = '#'x(int(($count*50/$maxcount)+1)); | ||
printf "Len-%d\t%d\t%s\n", $i, $count, $bar; | ||
} | ||
} | ||
|
||
#---------------------------------------------------------------------- | ||
# Option setting routines | ||
|
||
sub setOptions { | ||
use Getopt::Long; | ||
|
||
@Options = ( | ||
{OPT=>"help", VAR=>\&usage, DESC=>"This help"}, | ||
{OPT=>"quiet!", VAR=>\$quiet, DEFAULT=>0, DESC=>"Quiet mode: no progress output"}, | ||
{OPT=>"ref=s", VAR=>\$ref, DEFAULT=>'', DESC=>"Reference FASTA file OR size in bp"}, | ||
{OPT=>"hist", VAR=>\$hist, DEFAULT=>0, DESC=>"Length histogram"}, | ||
); | ||
|
||
(!@ARGV) && (usage()); | ||
|
||
&GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage(); | ||
|
||
# Now setup default values. | ||
foreach (@Options) { | ||
if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) { | ||
${$_->{VAR}} = $_->{DEFAULT}; | ||
} | ||
} | ||
} | ||
|
||
sub usage { | ||
print "Usage: $0 [options] <file.fq | file.fq.gz ...>\n"; | ||
foreach (@Options) { | ||
printf " --%-13s %s%s.\n",$_->{OPT},$_->{DESC}, | ||
defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : ""; | ||
} | ||
exit(1); | ||
} | ||
|
||
#---------------------------------------------------------------------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,7 @@ | |
# local modules | ||
|
||
use FindBin; | ||
use lib "$FindBin::RealBin/../lib"; | ||
use lib "$FindBin::RealBin/../perl5"; | ||
use Nullarbor::IsolateSet; | ||
use Nullarbor::Logger qw(msg err); | ||
use Nullarbor::Report; | ||
|
@@ -24,7 +24,7 @@ | |
# constants | ||
|
||
my $EXE = "$FindBin::RealScript"; | ||
my $VERSION = '0.3'; | ||
my $VERSION = '0.4'; | ||
my $AUTHOR = 'Torsten Seemann <[email protected]>'; | ||
|
||
#------------------------------------------------------------------- | ||
|
@@ -69,7 +69,7 @@ | |
msg("This is $EXE $VERSION"); | ||
msg("Send complaints to $AUTHOR"); | ||
|
||
require_exe( qw'kraken snippy mlst fq abricate nw_display trimal FastTree convert pandoc afa-pairwise.pl' ); | ||
require_exe( qw'kraken snippy mlst fq abricate megahit nw_reroot nw_display trimal FastTree convert pandoc afa-pairwise.pl' ); | ||
require_perlmod( qw'Data::Dumper Moo Spreadsheet::Read SVG::Graph Bio::SeqIO File::Copy Time::Piece' ); | ||
|
||
if ($report) { | ||
|
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.