forked from mourisl/T1K
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathKmerCode.hpp
150 lines (126 loc) · 2.86 KB
/
KmerCode.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#ifndef _MOURISL_KMERCODE_HEADER
#define _MOURISL_KMERCODE_HEADER
#include <stdio.h>
#include "defs.h"
extern char nucToNum[26] ;
extern char numToNuc[4] ;
class KmerCode
{
private:
int kmerLength ;
int invalidPos ; // The position contains characters other than A,C,G,T in the code
uint64_t code ;
uint64_t mask ;
public:
KmerCode()
{
}
KmerCode( int kl )
{
int i ;
kmerLength = kl ;
code = 0 ;
invalidPos = -1 ;
mask = 0 ;
for ( i = 0 ; i < kmerLength ; ++i )
{
mask = mask << 2 ;
mask = mask | 3 ;
}
}
KmerCode( const KmerCode& in )
{
kmerLength = in.kmerLength ;
invalidPos = in.invalidPos ;
mask = in.mask ;
code = in.code ;
}
void Restart() { code = 0ull ; invalidPos = -1 ; }
uint64_t GetCode() { return code ; }
void SetCode( uint64_t in )
{
code = in ;
invalidPos = -1 ;
}
uint64_t GetCanonicalKmerCode()
{
int i ;
uint64_t crCode = 0ull ; // complementary code
for ( i = 0 ; i < kmerLength ; ++i )
{
//uint64_t tmp = ( code >> ( 2ull * (k - i - 1) ) ) & 3ull ;
//crCode = ( crCode << 2ull ) | ( 3 - tmp ) ;
uint64_t tmp = ( code >> ( 2ull * i ) ) & 3ull ;
crCode = ( crCode << 2ull ) | ( 3ull - tmp ) ;
}
return crCode < code ? crCode : code ;
}
uint64_t GetReverseComplementCode()
{
uint64_t crCode = 0ull ; // complementary code
for ( int i = 0 ; i < kmerLength ; ++i )
{
//uint64_t tmp = ( code >> ( 2ull * (k - i - 1) ) ) & 3ull ;
//crCode = ( crCode << 2ull ) | ( 3 - tmp ) ;
uint64_t tmp = ( code >> ( 2ull * i ) ) & 3ull ;
crCode = ( crCode << 2ull ) | ( 3ull - tmp ) ;
}
return crCode ;
}
int GetKmerLength() { return kmerLength ; }
bool IsValid()
{
/*if ( invalidPos != -1 )
{
printf( "hi\n") ;
}*/
return ( invalidPos == -1 ) ;
}
void Append( char c )
{
if ( invalidPos != -1 )
++invalidPos ;
code = ( ( code << 2ull ) & mask ) |
( (uint64_t)( nucToNum[ c - 'A' ] & 3 ) ) ;
//if ( nucToNum[c - 'A'] == -1 )
if ( c == 'N' )
{
invalidPos = 0 ;
}
if ( invalidPos >= kmerLength )
invalidPos = -1 ;
}
void Prepend( char c )
{
ShiftRight( 1 ) ;
if ( nucToNum[c-'A'] == -1 )
{
invalidPos = kmerLength - 1 ;
}
code = ( code | ( (uint64_t)( nucToNum[c - 'A'] & 3 ) << ( 2ull * ( kmerLength - 1 ) ) ) ) & mask ;
}
inline void ShiftRight( int k )
{
if ( invalidPos != -1 )
invalidPos -= k ;
code = ( code >> ( 2ull * k ) ) & ( mask >> ( 2ull * k ) ) ;
if ( invalidPos < 0 )
invalidPos = -1 ;
}
bool IsEqual( const KmerCode &in )
{
if ( code == in.code )
return true ;
else
return false ;
}
KmerCode& operator=( const KmerCode& in )
{
kmerLength = in.kmerLength ;
invalidPos = in.invalidPos ;
mask = in.mask ;
code = in.code ;
return *this ;
}
} ;
#endif