-
Notifications
You must be signed in to change notification settings - Fork 60
/
Copy pathtabix.cpp
160 lines (142 loc) · 4.49 KB
/
tabix.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#include "tabix.hpp"
Tabix::Tabix(void) { }
Tabix::Tabix(string& file) {
has_jumped = false;
filename = file;
str.l = 0;
str.m = 0;
str.s = NULL;
const char* cfilename = file.c_str();
struct stat stat_tbi,stat_vcf;
char *fnidx = (char*) calloc(strlen(cfilename) + 5, 1);
strcat(strcpy(fnidx, cfilename), ".tbi");
hFILE *fp;
if (!(fp = hopen(cfilename, "r"))) {
cerr << "can't open " << cfilename;
return;
}
htsFormat fmt;
if ( hts_detect_format(fp,&fmt) < 0 )
{
cerr << "[tabix++] was bgzip used to compress this file? " << file << endl;
free(fnidx);
exit(1);
}
if (hclose(fp) != 0) {
cerr << "can't close " << cfilename;
return;
}
// Common source of errors: new VCF is used with an old index
stat(fnidx, &stat_tbi);
stat(cfilename, &stat_vcf);
if ( stat_vcf.st_mtime > stat_tbi.st_mtime )
{
cerr << "[tabix++] the index file is older than the vcf file. Please use '-f' to overwrite or reindex." << endl;
free(fnidx);
exit(1);
}
free(fnidx);
if ((fn = hts_open(cfilename, "r")) == 0) {
cerr << "[tabix++] fail to open the data file." << endl;
exit(1);
}
if ((tbx = tbx_index_load(cfilename)) == NULL) {
cerr << "[tabix++] failed to load the index file." << endl;
exit(1);
}
int nseq;
const char** seq = tbx_seqnames(tbx, &nseq);
for (int i=0; i<nseq; i++) {
chroms.push_back(seq[i]);
}
free(seq);
idxconf = &tbx_conf_vcf;
// set up the iterator, defaults to the beginning
current_chrom = chroms.begin();
iter = tbx_itr_querys(tbx, (current_chrom != chroms.end() ? current_chrom->c_str() : ""));
}
Tabix::~Tabix(void) {
tbx_itr_destroy(iter);
tbx_destroy(tbx);
free(str.s);
}
const kstring_t * Tabix::getKstringPtr(){
return &str;
}
void Tabix::getHeader(string& header) {
header.clear();
while ( hts_getline(fn, KS_SEP_LINE, &str) >= 0 ) {
if ( !str.l || str.s[0]!=tbx->conf.meta_char ) {
break;
} else {
header += string(str.s);
header += "\n";
}
}
// set back to start
current_chrom = chroms.begin();
if (iter) tbx_itr_destroy(iter);
iter = tbx_itr_querys(tbx, (current_chrom != chroms.end() ? current_chrom->c_str() : ""));
}
bool Tabix::setRegion(string& region) {
tbx_itr_destroy(iter);
iter = tbx_itr_querys(tbx, region.c_str());
has_jumped = true;
return true;
}
bool Tabix::getNextLine(string& line) {
if (has_jumped) {
if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) {
line = string(str.s);
return true;
} else return false;
} else { // step through all sequences in the file
// we've never jumped, so read everything
if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) {
line = string(str.s);
return true;
} else {
// While we aren't at the end, advance. While we're still not at the end...
while (current_chrom != chroms.end() && ++current_chrom != chroms.end()) {
tbx_itr_destroy(iter);
iter = tbx_itr_querys(tbx, current_chrom->c_str());
if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) {
line = string(str.s);
return true;
} else {
++current_chrom;
}
}
return false;
}
}
}
bool Tabix::getNextLineKS() {
if (has_jumped) {
if (iter &&
tbx_itr_next(fn, tbx, iter, &str) >= 0) {
//line = &str;
return true;
} else
return false;
} else { // step through all sequences in the file
// we've never jumped, so read everything
if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) {
//line = &str;
return true;
} else {
// While we aren't at the end, advance. While we're still not at the end...
while (current_chrom != chroms.end() && ++current_chrom != chroms.end()) {
tbx_itr_destroy(iter);
iter = tbx_itr_querys(tbx, current_chrom->c_str());
if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) {
//line = &str;
return true;
} else {
++current_chrom;
}
}
return false;
}
}
}