-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxor-benchmark.cpp
128 lines (97 loc) · 3.67 KB
/
xor-benchmark.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <iostream>
#include <immintrin.h>
#include <memory.h>
#include <windows.h>
#include <time.h>
#include <array>
#include <fstream>
#include "workbench.hpp"
#define XOR_BLOCK_SIZE ((256)/(8)) // 32
// totalxor XORes all the bytes inside the buffer to all the other bytes
std::array<uint8_t, XOR_BLOCK_SIZE> totalxor(const uint8_t* data) {
std::array<uint8_t, XOR_BLOCK_SIZE> result;
memcpy(result.data(), data, XOR_BLOCK_SIZE);
for (size_t m = 0; m < XOR_BLOCK_SIZE; m++) {
for (size_t n = 0; n < XOR_BLOCK_SIZE; n++) {
result[m] ^= data[n];
}
}
return result;
}
std::array<uint8_t, XOR_BLOCK_SIZE> totalxor_avx2(const uint8_t* data) {
auto vect_data = _mm256_loadu_si256((const __m256i_u*)data);
for (size_t i = 0; i < XOR_BLOCK_SIZE; i++) {
auto vect_xor = _mm256_set1_epi8(data[i]);
vect_data = _mm256_xor_si256(vect_data, vect_xor);
}
std::array<uint8_t, XOR_BLOCK_SIZE> result;
_mm256_storeu_si256((__m256i_u*)result.data(), vect_data);
return result;
}
std::array<uint8_t, XOR_BLOCK_SIZE> totalxor_sse(const uint8_t* data) {
__m128i_u vect_data[2] = {
_mm_loadu_si128((const __m128i_u*)data),
_mm_loadu_si128((const __m128i_u*)(data + (XOR_BLOCK_SIZE / 2)))
};
for (size_t m = 0; m < XOR_BLOCK_SIZE; m++) {
auto vect_xor = _mm_set1_epi8(data[m]);
vect_data[0] = _mm_xor_si128(vect_data[0], vect_xor);
vect_data[1] = _mm_xor_si128(vect_data[1], vect_xor);
}
std::array<uint8_t, XOR_BLOCK_SIZE> result;
_mm_storeu_si128((__m128i_u*)result.data(), vect_data[0]);
_mm_storeu_si128((__m128i_u*)(result.data() + (XOR_BLOCK_SIZE / 2)), vect_data[1]);
return result;
}
int main() {
std::cout << "This test is gonna perform " << TEST_RUNS << " runs of " << TEST_OPS << " XOR operations on 256-bit buffers\r\n";
std::cout << "Test has been started...\r\n\r\n";
const std::array<uint8_t, XOR_BLOCK_SIZE> dataBlock = {156,252,14,198,96,30,193,195,143,159,237,175,168,57,210,42,10,6,55,236,246,92,66,62,139,123,5,203,47,172,194,93};
time_t timer;
// XOR benchmark
std::array<time_t, TEST_RUNS> test1_ctrl;
std::array<time_t, TEST_RUNS> test1_sse;
std::array<time_t, TEST_RUNS> test1_avx2;
{
// test without any simd
std::cout << "XOR test control run...";
for (size_t m = 0; m < TEST_RUNS; m++) {
timer = timeGetTime();
for (size_t n = 0; n < TEST_OPS; n++) {
auto result = totalxor(dataBlock.data());
}
test1_ctrl[m] = timeGetTime() - timer;
}
std::cout << " AVG: " << avgtime(test1_ctrl.data(), TEST_RUNS) << "ms/" << TEST_OPS <<"ops\r\n";
// test with sse
std::cout << "XOR teset SSE run...";
for (size_t m = 0; m < TEST_RUNS; m++) {
timer = timeGetTime();
for (size_t n = 0; n < TEST_OPS; n++) {
auto result = totalxor_sse(dataBlock.data());
}
test1_sse[m] = timeGetTime() - timer;
}
std::cout << " AVG: " << avgtime(test1_sse.data(), TEST_RUNS) << "ms/" << TEST_OPS <<"ops\r\n";
// test with avx2
std::cout << "XOR teset AVX2 run...";
for (size_t m = 0; m < TEST_RUNS; m++) {
timer = timeGetTime();
for (size_t n = 0; n < TEST_OPS; n++) {
auto result = totalxor_avx2(dataBlock.data());
}
test1_avx2[m] = timeGetTime() - timer;
}
std::cout << " AVG: " << avgtime(test1_avx2.data(), TEST_RUNS) << "ms/" << TEST_OPS <<"ops\r\n";
}
// save test data
std::cout << "\r\nWriting test data to .csv...\r\n";
std::string filename = std::string("benchmarks-data/") + "benchmark_xor_" + std::to_string(time(nullptr)) + ".csv";
std::ofstream output(filename, std::ios::out);
output << "Control,SSE,AVX2,Unit" << "\n";
for (size_t i = 0; i < TEST_RUNS; i++){
output << test1_ctrl[i] << "," << test1_sse[i] << "," << test1_avx2[i] << ",ms/" << TEST_OPS << " ops\n";
}
output.close();
return 0;
}