-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcyclichhm.cpp
160 lines (140 loc) · 4.25 KB
/
cyclichhm.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
/*********************************************
* Cyclic hashed history model
*
* This is a memory efficient way of tracking the probability history of arbitrarily large values in O(n) space and O(1) time.
* ModDensity figures out common multiples of all offsets so we can utilize it for more accurate parsing.
*
* Update(Word); // Update cyclic stack with new symbol, remove oldest symbol, update histogram
* GetHistory(Word); // Access histogram
* FindPeaks(Word, limit); // Compare word to a limited level of skewness
**********************************************/
#include "cyclichhm.hpp"
/**
* Create a circular buffer of a certain width (width determines how many elements we can store before overwriting old elements)
*/
CyclicHashHistory::CyclicHashHistory(int size)
{
if(size < 0)
Error("Cannot initialize a cyclic hash history model of a negative size!");
SizeOfCB = size;
PosInCB = 0;
PreviousValue = 0;
CircularBuffer = (unsigned short*)calloc(SizeOfCB, sizeof(unsigned short));
History = (unsigned int*)calloc(HashSize, sizeof(unsigned int));
ModDensity = (unsigned int*)calloc(ModSize, sizeof(unsigned int));
if(History == NULL || CircularBuffer == NULL || ModDensity == NULL)
Error("Failed to allocate cyclic model buffers!");
}
CyclicHashHistory::~CyclicHashHistory()
{
free(CircularBuffer);
free(History);
free(ModDensity);
}
inline unsigned int CyclicHashHistory::Hash(Index value)
{
return (value * 0x9E3779B1) >> (32 - HashBits);
}
/**
* Stack the value into the next position
*/
void CyclicHashHistory::Update(Index value)
{
unsigned int h = Hash(value);
unsigned int oldh = CircularBuffer[PosInCB % SizeOfCB];
// Add element to buffer and increment the hash histogram
CircularBuffer[PosInCB % SizeOfCB] = h;
History[h]++;
// Remove the overwritten element from the hash histogram
if(PosInCB >= SizeOfCB)
History[oldh]--;
// Compute a histogram of common multiples
unsigned int diff = PreviousValue ^ value;
ModDensity[diff % ModSize]++;
PosInCB++;
}
/**
* Return the histogram associated with the current value's statistics within the circular buffer
*/
unsigned int CyclicHashHistory::GetHistory(Index value)
{
return History[Hash(value)];
}
/**
* Make sure nothing fucked up
*/
void CyclicHashHistory::Assert()
{
if(PosInCB >= SizeOfCB)
{
int total = 0;
for(int i = 0; i < HashSize; i++)
total += History[i];
if(total != SizeOfCB)
{
printf("Hist size: %i, expected: %i\n", total, SizeOfCB);
Error("Invalid history table!");
}
}
}
/**
* Reduce value until we find peaks within the history buffer of direct subsets.
* Return true if the value within histogram is greater than the limit, else return false.
*/
bool CyclicHashHistory::FindPeaks(Index value)
{
// Decompose value into subsets, if the structure is highly skewed return true, else if the history of the current value is common; return true.
int k = value;
int reduce = (StructureWidth <= 1) ? 2 : StructureWidth;
while(k)
{
//int MaxDensity = ModDensity[StructureWidth];
int div = (AverageDensity == 0) ? 1 : AverageDensity;
if(ModDensity[k % ModSize] > (UniqueDensities / (div * div))) // A simple hueristic for anti-context parsing, generally very accurate (low noise)
return true;
k /= reduce;
}
return false;
}
/**
* TODO: AverageDensity is actually pointless, the best thing to do is create an inverse sorted map for ranks.
* eg: if(rank[value % size] < 128)
*/
void CyclicHashHistory::BuildModel()
{
// Find the most dense region (this helps us find underlying structures)
AverageDensity = 0;
int zeros = 0;
for (int j = 0; j < ModSize; j++)
{
AverageDensity += ModDensity[j];
if(ModDensity[j] == 0) zeros++;
}
if(ModSize > zeros)
AverageDensity /= (ModSize - zeros);
UniqueDensities = ModSize - zeros;
int max = ModDensity[0];
int bsym = 0;
for(int i = 1; i < ModSize; i++)
{
int cur = ModDensity[i];
if(cur > max)
{
bsym = i;
max = cur;
}
}
if(bsym == 0)
StructureWidth = 1;
else
StructureWidth = bsym;
}
void CyclicHashHistory::CleanModel()
{
AverageDensity = 0;
for(int i = 0; i < ModSize; i++)
{
ModDensity[i] = 0;
}
StructureWidth = 1;
}