-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_stats_from_flagstat.pl
executable file
·50 lines (42 loc) · 2.02 KB
/
get_stats_from_flagstat.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/perl -w
use strict;
use Number::Format;
#collect flagstats and tally up the totals.
my $fn=new Number::Format( -thousands_sep => '.',
-decimal_point => ',');
my @files = `find \$PWD -iname \"*_dedup.flagstat\"`;
print scalar @files, " flagstats found\n";
my %totals;
foreach my $file (@files) {
chomp($file);
print "working on $file...\n";
open IN, $file;
my ($mapped, $dups) = (0,0);
while (my $line=<IN>) {
print "\t$line";
if ($line =~ /(\d+) \+ \d in total/) {
$totals{'total'} += $1;
}elsif($line =~ /(\d+) \+ \d paired in/) {
$totals{'paired'} += $1;
}elsif($line =~ /(\d+) \+ \d properly paired/) {
$totals{'ppaired'} += $1;
}elsif($line =~ /(\d+) \+ \d duplicates/) {
$totals{'dups'} += $1;
$dups = $1;
}elsif($line =~ /(\d+) \+ \d mapped \(/) {
$totals{'mapped'} += $1;
$mapped = $1;
}
}
print "\n\t";
print (100*$dups/$mapped);
print " %duplication\n\n";
}
#print "Total raw reads: ".$fn->format_number($totals{'total'})," reads (", $fn->format_number(100*$totals{'total'})," bp)\n";
#
print "Total raw reads: ".$fn->format_number($totals{'total'})," reads (Total throughput, 75bp=", $fn->format_number(75*$totals{'total'})," bp, 100bp=", $fn->format_number(100*$totals{'total'})," bp, 150bp=", $fn->format_number(150*$totals{'total'})," bp)\n";
#print "Total mapped reads: ".$fn->format_number($totals{'mapped'})," reads (", $fn->format_number(100*$totals{'mapped'})," bp)\n";
print "Total mapped reads: ".$fn->format_number($totals{'mapped'})," reads (Total throughput, 75bp=", $fn->format_number(75*$totals{'mapped'})," bp, 100bp=", $fn->format_number(100*$totals{'mapped'})," bp,150bp=", $fn->format_number(150*$totals{'mapped'})," bp)\n";
print "Average mapped per lib: ".$fn->format_number(int($totals{'total'}/(scalar @files)))," reads\n";
print "Average dups per lib: ".$fn->format_number(int($totals{'dups'}/(scalar @files)))," reads\n";
print "Average dups % per lib: ",$fn->format_number( 100*($totals{'dups'}/$totals{'mapped'}))," %\n";