-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbam_exclude_reads_by_windows_size_and_numpairs.evaluation.pl
executable file
·129 lines (115 loc) · 3.05 KB
/
bam_exclude_reads_by_windows_size_and_numpairs.evaluation.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper qw /Dumper/;
use constant USAGE =><<EOH;
usage: $0 input order output
v20161205
EOH
die USAGE if (scalar(@ARGV) !=3 or $ARGV[0] eq '-h' or $ARGV[0] eq '--help');
my $input=$ARGV[0];
my $order=$ARGV[1];
my $output=$ARGV[2];
my %idhash=();
open (CHRORDER, "< $order") || die "Error: can not open order\n";
while (my $line=<CHRORDER>) {
chomp $line;
my @arr=split(/\t/, $line);
unless (exists $idhash{$arr[0]}) {
$idhash{$arr[0]}{'beg'}=$arr[4];
$idhash{$arr[0]}{'end'}=$arr[5];
$idhash{$arr[0]}{'str'}=$arr[7];
}
else {
die "Error: repeated ID:$arr[0]\n"
}
}
close CHRORDER;
#print Dumper \%idhash;exit 0; ### For test ###
open (LINKINPUT, " < $input") || die "Error: can not open links input\n";
open (LINKOUTPUT, " > $output ") || die "Error: can not write oputput\n";
while (my $line=<LINKINPUT>) {
chomp $line;
my @arr=split(/\t/, $line);
my $strandol='';
if (exists $idhash{$arr[0]} and exists $idhash{$arr[2]}) {
my $distance=0;
if ($idhash{$arr[0]}{'str'} eq '+') {
$strandol.='+>';
}
else {
$strandol.='<-';
}
if ($idhash{$arr[2]}{'str'} eq '+') {
$strandol.='+>';
}
else {
$strandol.='<-';
}
if ($idhash{$arr[0]}{'beg'}>$idhash{$arr[2]}{'end'} or $idhash{$arr[2]}{'beg'}>$idhash{$arr[0]}{'end'}) {
if ($idhash{$arr[0]}{'beg'}>$idhash{$arr[2]}{'end'}) {
$distance=$idhash{$arr[0]}{'beg'}-$idhash{$arr[2]}{'end'};
}
if ($idhash{$arr[2]}{'beg'}>$idhash{$arr[0]}{'end'}) {
$distance=$idhash{$arr[2]}{'beg'}-$idhash{$arr[0]}{'end'}-1;
}
}
elsif ($idhash{$arr[0]}{'beg'}<=$idhash{$arr[2]}{'end'} and $idhash{$arr[0]}{'beg'}>=$idhash{$arr[2]}{'beg'}) {
if ($idhash{$arr[0]}{'end'} > $idhash{$arr[2]}{'end'}) {
$distance=-($idhash{$arr[2]}{'end'}-$idhash{$arr[0]}{'beg'}+1);
}
else {
$distance=-($idhash{$arr[0]}{'end'}-$idhash{$arr[0]}{'beg'}+1);
}
}
elsif ($idhash{$arr[2]}{'beg'}<=$idhash{$arr[0]}{'end'} and $idhash{$arr[2]}{'beg'}>=$idhash{$arr[0]}{'beg'}) {
if ($idhash{$arr[2]}{'end'} > $idhash{$arr[0]}{'end'}) {
$distance=-($idhash{$arr[0]}{'end'}-$idhash{$arr[2]}{'beg'}+1);
}
else {
$distance=-($idhash{$arr[2]}{'end'}-$idhash{$arr[2]}{'beg'}+1);
}
}
$line= $line."\t$strandol\t$distance";
}
else {
if (exists $idhash{$arr[0]}) {
if ($idhash{$arr[0]}{'str'} eq '+') {
$strandol.='+>';
}
else {
$strandol.='<-';
}
}
else {
$strandol.='XX';
}
if ($idhash{$arr[2]}) {
if ($idhash{$arr[2]}{'str'} eq '+') {
$strandol.='+>';
}
else {
$strandol.='<-';
}
}
else {
$strandol.='XX';
}
$line= $line."\t$strandol\tNaN";
}
if (exists $idhash{$arr[0]}) {
$line=$line."\t".$idhash{$arr[0]}{'beg'}."\t".$idhash{$arr[0]}{'end'}."\t".$idhash{$arr[0]}{'str'};
}
else {
$line=$line."\tNaN\tNaN\tNaN";
}
if (exists $idhash{$arr[2]}) {
$line=$line."\t".$idhash{$arr[2]}{'beg'}."\t".$idhash{$arr[2]}{'end'}."\t".$idhash{$arr[2]}{'str'};
}
else {
$line=$line."\tNaN\tNaN\tNaN";
}
print LINKOUTPUT $line, "\n";
}
close LINKINPUT;
close LINKOUTPUT;