-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathkmap.hpp
134 lines (119 loc) · 3.83 KB
/
kmap.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/**
* MALVA - genotyping by Mapping-free ALternate-allele detection of known VAriants
* Copyright (C) 2019 Giulia Bernardini, Luca Denti, Marco Previtali
*
* This file is part of MALVA.
*
* MALVA is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MALVA is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with MALVA; see the file LICENSE. If not, see
* <https://www.gnu.org/licenses/>.
**/
#ifndef _KMAP_HPP_
#define _KMAP_HPP_
#include <string>
#include <unordered_map>
using namespace std;
// static const char RCN[128] = {
// 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0
// 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
// 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20
// 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 30
// 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40
// 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 50
// 0, 0, 0, 0, 0, 'T', 0, 'G', 0, 0, // 60
// 0, 'C', 0, 0, 0, 0, 0, 0, 'N', 0, // 70
// 0, 0, 0, 0, 'A', 0, 0, 0, 0, 0, // 80
// 0, 0, 0, 0, 0, 0, 0, 'T', 0, 'G', // 90
// 0, 0, 0, 'G', 0, 0, 0, 0, 0, 0, // 100
// 'N', 0, 0, 0, 0, 0, 'A', 0, 0, 0, // 110
// 0, 0, 0, 0, 0, 0, 0, 0 // 120
// };
struct KMAP
{
unordered_map<string, int> kmers;
KMAP() {}
ostream &operator>>(ostream &stream)
{
size_t size = kmers.size();
stream.write(reinterpret_cast<const char *>(&size), sizeof(size_t));
for (const auto &pair : kmers)
{
string::size_type l = pair.first.length();
stream.write(reinterpret_cast<const char *>(&l), sizeof(string::size_type));
stream.write(reinterpret_cast<const char *>((char *)pair.first.data()), l);
stream.write(reinterpret_cast<const char *>(&pair.second), sizeof(int));
}
return stream;
}
istream &operator<<(istream &stream)
{
size_t size;
stream.read(reinterpret_cast<char *>(&size), sizeof(size_t));
for (size_t i = 0; i < size; ++i)
{
string::size_type l;
string k;
int v;
stream.read(reinterpret_cast<char *>(&l), sizeof(string::size_type));
k.resize(l);
stream.read(reinterpret_cast<char *>((char *)k.data()), l);
stream.read(reinterpret_cast<char *>(&v), sizeof(int));
kmers[k] = v;
}
return stream;
}
static const char _compl(const char &c) { return RCN[c]; }
string canonical(const char *kmer)
{
uint k = strlen(kmer);
char ckmer[k + 1];
strcpy(ckmer, kmer);
transform(ckmer, ckmer + k, ckmer, _compl);
reverse(ckmer, ckmer + k);
if (strcmp(kmer, ckmer) < 0)
memmove(ckmer, kmer, k);
string kmer_string(ckmer);
return kmer_string;
}
bool test_key(const char *kmer)
{
string ckmer = canonical(kmer);
if (kmers.find(ckmer) == kmers.end())
return false;
else
return true;
}
void add_key(const char *kmer)
{
string ckmer = canonical(kmer);
kmers[ckmer] = 0;
}
void increment(const char *kmer, int counter)
{
string ckmer = canonical(kmer);
if (kmers.find(ckmer) != kmers.end())
{
uint32 new_value = kmers[ckmer] + counter;
kmers[ckmer] = new_value;
}
}
int get_count(const char *kmer)
{
string ckmer = canonical(kmer);
if (kmers.find(ckmer) != kmers.end())
return kmers[ckmer];
else
return 0;
}
};
#endif