forked from simongog/sdsl-lite
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext-statistics.cpp
41 lines (36 loc) · 1.09 KB
/
text-statistics.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#include <sdsl/suffix_trees.hpp>
#include <iostream>
using namespace std;
using namespace sdsl;
typedef cst_sct3<> cst_t;
typedef cst_t::char_type char_type;
int main(int argc, char* argv[])
{
if (argc < 2) {
cout << "Usage: "<< argv[0] << " file" << endl;
cout << "(1) Generates the CST of file." << endl;
cout << "(2) Calculates the avg LCP value and the runs in the BWT." << endl;
return 1;
}
cst_t cst;
construct(cst, argv[1], 1);
long double runs = 1;
long double avg_lcp = 0;
if (cst.csa.size()) {
char_type prev_bwt = cst.csa.bwt[0];
for (uint64_t i=1; i<cst.csa.size(); ++i) {
char_type bwt = cst.csa.bwt[i];
if (prev_bwt != bwt) {
runs += 1.0;
}
prev_bwt = bwt;
avg_lcp += cst.lcp[i];
}
avg_lcp /= cst.csa.size();
for (size_t k=0; k<=5; k++) {
cout << "H_" << k << ": " << Hk(cst,k).first << endl;
}
cout << "avg LCP: " << avg_lcp << endl;
cout << "runs in BWT: " << runs << endl;
}
}