-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFastaIndex.h
90 lines (80 loc) · 2.04 KB
/
FastaIndex.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#ifndef FAI_H_
#define FAI_H_
#include <map>
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
#include <assert.h>
using namespace std;
class GFFastaIndex {
public:
map<string, vector<long> > fai;
ifstream fastaIn;
void Initialize(string name) {
fastaIn.open(name.c_str());
string faiName = name + ".fai";
ifstream faiIn(faiName.c_str());
string contig;
long length, start, lineSeq, lineLen;
while ((faiIn >> contig >> length >> start >> lineSeq >> lineLen)) {
fai[contig] = vector<long>(4);
fai[contig][0] = length;
fai[contig][1] = start;
fai[contig][2] = lineSeq;
fai[contig][3] = lineLen;
}
}
void Print() {
return;
map<string, vector<long> >::iterator faiIt;
for (faiIt = fai.begin(); faiIt != fai.end(); ++faiIt) {
int i;
std::cout << (*faiIt).first << " ";
for (i = 0; i < 5; i++) {
cout << (*faiIt).second[i] << " ";
}
cout << endl;
}
}
void GetSeq(string &seq, string &contig, int start=0, int end=0) {
if (fai.find(contig) == fai.end()) {
start=0;
end = 0;
seq="";
return;
}
this->Print();
vector<long> &v = fai[contig];
long lineLen = fai[contig][2];
long fileLineLength = fai[contig][3];
long chrStart = fai[contig][1];
long seqLength;
if (start == end and start == 0) {
start = 0;
end = fai[contig][0];
}
long startLine = start / lineLen;
long endLine = end / lineLen;
long startLinePos = start % lineLen;
long endLinePos = end % lineLen;
long startFilePos = chrStart + startLine * fileLineLength + startLinePos;
long endFilePos = chrStart + endLine * fileLineLength + endLinePos;
long fileReadLength = endFilePos - startFilePos;
fastaIn.seekg(startFilePos);
string fileBuf;
fileBuf.resize(endFilePos - startFilePos);
fastaIn.get(&fileBuf[0], endFilePos - startFilePos, '\0');
seq.resize(end - start);
int i,j;
j = 0;
for (i = 0; i < fileBuf.size(); i++) {
if (fileBuf[i] != '\n' and fileBuf[i] != '\0') {
assert(j < seq.size());
seq[j] = fileBuf[i];
j++;
}
}
}
};
#endif