-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathbagel4_GeneTable_2_json.pl
executable file
·181 lines (148 loc) · 6.27 KB
/
bagel4_GeneTable_2_json.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/env perl
# Anne de Jong, November 2017
# Convert a region GeneTable to json
# New version Create from one queryname.Genetables multiple JSON for each AOI_name : queryname.AOI_x
use strict ;
use warnings ;
use lib "/data/bagel4/lib" ;
use bagel4_functions ;
use lib "/data/molgentools/lib";
use anne_files ;
use anne_genomics ;
use File::Basename;
# ---------------------------------------------------------- parameters -------------------------------------------------------------------------
my $sessiondir = '/usr/bagel4/test';
my $program_dir = dirname($0) ;
my $GeneTables ;
my $usage = "option:
-s sessiondir [default=$sessiondir]
-table table input file
-json json output file
input table columns:
AOI_name name of the dna sequence
region_size
gene_name name of the genes
gene_start
gene_end
gene_strand
gene_color
motifs
function
annotation
reads the annotaion file generated by bagel4_annotation_of_AOI.pl and convert it to a JSON file
e.g. /usr/bagel4/bagel4_gene_table_2_json.pl -s /usr/bagel4 -table gene_table_example.txt -json /var/www/ngs/anne/d3/all_genes.json
e.g. /usr/bagel4/bagel4_gene_table_2_json.pl -s /usr/bagel4/test -table NC_0085331.GeneTables -json /var/www/ngs/anne/d3/NC_0085331.all_genes.json
" ;
&parseparam() ;
# ------------------------------------------------------------------------------------ main -------------------------------------------------------------------
my $gene_hight = 2 ;
my $gene_point = 0.4 ;
my $line_ypos = 10 ;
# 1. Get the AOIs of the query from the GeneTables
my %tmp_hash = anne_files::read_table_to_hash("$sessiondir/$GeneTables"); # read the AOI_names from the original GeneTable
my %all_contigs = anne_files::read_table_to_hash("$sessiondir/00.all_contigs.table");
my @AOI_names = keys %tmp_hash ;
my %table ;
# 2. Make for each AOI a GeneTable JSON file
foreach my $AOI_name (sort @AOI_names) {
print "GeneTable to JSON: $AOI_name\n";
my $filename = get_filename($AOI_name);
#print "============>AOIname= $AOI_name ; filename=$filename" ;
%table = anne_files::Table2hash_v2("$sessiondir/$AOI_name.GeneTable", 'true');
# data for the dna base line
my @json = "{\n\"AOIs\": [ \n";
my @elements ;
my @element = '{' ;
push @element, "\"name\": \"$AOI_name\",";
push @element, "\"filename\": \"$filename\",";
push @element, "\"color\": \"gray\",";
push @element, "\"yline\": $line_ypos,";
push @element, "\"xstart\": 5,";
push @element, "\"points\": [ {\"x\":0,\"y\":$line_ypos}, {\"x\":800,\"y\":$line_ypos} ]", ;
push @element, '}';
push @elements, (join "\n", @element)."\n" ;
push @json, (join ',', @elements) ;
push @json, '],' ;
# data and coords for the genes
push @json,'"Genes": [';
my @genes ;
foreach my $ID (sort {$table{$a}{gene_start} <=> $table{$b}{gene_start}} keys %table) {
# foreach my $ID (sort {$a <=> $b} keys %table) {
$table{$ID}{motifs} = "no motifs found" if (!defined($table{$ID}{motifs})) ;
$table{$ID}{annotation} = "unkown function" if (!defined($table{$ID}{annotation})) ;
#print $ID."\t$table{$ID}{AOI_name}\n";
my $gennamePos = $table{$ID}{gene_start} + abs($table{$ID}{gene_start} - $table{$ID}{gene_end})/2 ; # genename will be placed in the center
my $xtext = 100 * $gennamePos / $table{$ID}{region_size} ; # rescale to 100
my @element = '{' ;
push @element, "\"name\": \"$table{$ID}{gene_name}\",";
push @element, "\"orfname\": \"$table{$ID}{orf}\",";
push @element, "\"gene_start\": $table{$ID}{gene_start},";
push @element, "\"gene_end\": $table{$ID}{gene_end},";
push @element, "\"gene_strand\": \"$table{$ID}{gene_strand}\",";
push @element, "\"real_start\": $table{$ID}{real_start},";
push @element, "\"real_end\": $table{$ID}{real_end} ,";
push @element, "\"real_strand\": \"$table{$ID}{real_strand}\",";
push @element, "\"name\": \"$table{$ID}{gene_name}\",";
push @element, "\"color\": \"$table{$ID}{gene_color}\",";
push @element, "\"yline\": $line_ypos,";
push @element, "\"angle\": -60,";
push @element, "\"xtext\": $xtext,";
push @element, "\"motifs\": \"$table{$ID}{motifs}\",";
push @element, "\"function\": \"$table{$ID}{function}\",";
push @element, "\"annotation\":\"$table{$ID}{annotation}\",";
push @element, "\"protein\":\"$table{$ID}{protein}\",";
push @element, "\"dna\":\"$table{$ID}{dna}\",";
push @element, "\"points\": [".add_gene_polygon($ID)."]}" ;
push @genes, (join "\n", @element)."\n" ;
}
push @json, (join ',', @genes) ;
push @json, ']}' ;
anne_files::write_lines("$sessiondir/$AOI_name.GeneTable.json", @json) ;
}
# ----------------------------------------------------------------------------- functions ----------------------------------------------------------
sub get_filename {
my $AOI_name = shift ;
my $result = '' ;
if ($AOI_name =~ /(.*)\.AOI/) {
my $queryname = $1;
my $key = grep { $all_contigs{$_}{queryname} eq $queryname } keys %all_contigs;
$result = $all_contigs{$key}{filename} ;
}
return $result ;
}
sub json_coord {
my ($x,$y) = @_ ;
return "{\"x\":$x,\"y\":$y}";
}
sub add_gene_polygon {
my $ID = shift ;
my $start = 100 * $table{$ID}{gene_start} / $table{$ID}{region_size} ;
my $end = 100 * $table{$ID}{gene_end} / $table{$ID}{region_size} ;
my @result ;
#print "$table{$ID}{gene_name}\t$table{$ID}{gene_start}\t$table{$ID}{gene_strand} eq '+'\n";
if ($table{$ID}{gene_strand} eq '+') {
push @result, json_coord($start,0) ;
push @result, json_coord($start,$gene_hight) ;
push @result, json_coord($end-$gene_point, $gene_hight) ;
push @result, json_coord($end, $gene_hight/2) ;
push @result, json_coord($end-$gene_point,0) ;
} else {
push @result, json_coord($start,$gene_hight/2) ;
push @result, json_coord($start+$gene_point,$gene_hight) ;
push @result, json_coord($end, $gene_hight) ;
push @result, json_coord($end, 0) ;
push @result, json_coord($start+$gene_point,0) ;
}
return join ',', @result ;
}
sub parseparam {
my $var ;
my @arg = @ARGV ;
while(@arg) {
$var = shift(@arg) ;
die $usage if ($var eq '-h' or $var eq '--help') ;
$sessiondir = shift(@arg) if($var eq '-s') ;
$GeneTables = shift(@arg) if($var eq '-table') ;
}
die $usage if (!$GeneTables) ;
}