-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathanalyzer_oneapi.cc
116 lines (99 loc) · 3.86 KB
/
analyzer_oneapi.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#include <chrono>
#include <cstring>
#include <iostream>
#include <memory>
#include <CL/sycl.hpp>
#include "input.h"
#include "loops.h"
#include "modules.h"
#include "output.h"
#include "rawtodigi_oneapi.h"
namespace oneapi {
void exception_handler(cl::sycl::exception_list exceptions) {
for (auto const &exc_ptr : exceptions) {
try {
std::rethrow_exception(exc_ptr);
} catch (cl::sycl::exception const &e) {
std::cerr << "Caught asynchronous SYCL exception:\n" << e.what() << std::endl;
}
}
}
void analyze(cl::sycl::device device, Input const &input, Output &output, double &totaltime) {
#if __SYCL_COMPILER_VERSION <= 20200118
// Intel oneAPI beta 4
cl::sycl::ordered_queue queue{device, exception_handler};
#else
// Intel SYCL branch
cl::sycl::queue queue{device, exception_handler, cl::sycl::property::queue::in_order()};
#endif
totaltime = 0;
for (int i = 0; i <= NLOOPS; ++i) {
output = Output{};
#if __SYCL_COMPILER_VERSION <= 20200118
auto input_d = (Input *)cl::sycl::malloc_device(sizeof(Input), queue.get_device(), queue.get_context());
#else
auto input_d = (Input *)cl::sycl::malloc_device(sizeof(Input), queue);
#endif
if (input_d == nullptr) {
std::cerr << "oneAPI runtime failed to allocate " << sizeof(Input) << " bytes of device memory" << std::endl;
exit(1);
}
#if __SYCL_COMPILER_VERSION <= 20200118
auto input_h = (Input *)cl::sycl::malloc_host(sizeof(Input), queue.get_context());
#else
auto input_h = (Input *)cl::sycl::malloc_host(sizeof(Input), queue);
#endif
if (input_h == nullptr) {
std::cerr << "oneAPI runtime failed to allocate " << sizeof(Input) << " bytes of host memory" << std::endl;
exit(1);
}
std::memcpy(input_h, &input, sizeof(Input));
#if __SYCL_COMPILER_VERSION <= 20200118
auto output_d = (Output *)cl::sycl::malloc_device(sizeof(Output), queue.get_device(), queue.get_context());
#else
auto output_d = (Output *)cl::sycl::malloc_device(sizeof(Output), queue);
#endif
if (output_d == nullptr) {
std::cerr << "oneAPI runtime failed to allocate " << sizeof(Output) << " bytes of device memory" << std::endl;
exit(1);
}
#if __SYCL_COMPILER_VERSION <= 20200118
auto output_h = (Output *)cl::sycl::malloc_host(sizeof(Output), queue.get_context());
#else
auto output_h = (Output *)cl::sycl::malloc_host(sizeof(Output), queue);
#endif
if (output_h == nullptr) {
std::cerr << "oneAPI runtime failed to allocate " << sizeof(Output) << " bytes of host memory" << std::endl;
exit(1);
}
output_h->err.construct(pixelgpudetails::MAX_FED_WORDS, output_d->err_d);
auto start = std::chrono::high_resolution_clock::now();
queue.memcpy(input_d, input_h, sizeof(Input));
queue.memcpy(output_d, output_h, sizeof(Output));
rawtodigi(input_d, output_d, input.wordCounter, true, true, i == 0, queue);
queue.memcpy(output_h, output_d, sizeof(Output));
queue.wait_and_throw();
auto stop = std::chrono::high_resolution_clock::now();
output_h->err.set_data(output_h->err_d);
std::memcpy(&output, output_h, sizeof(Output));
output.err.set_data(output.err_d);
#if __SYCL_COMPILER_VERSION <= 20200118
cl::sycl::free(output_d, queue.get_context());
cl::sycl::free(input_d, queue.get_context());
cl::sycl::free(output_h, queue.get_context());
cl::sycl::free(input_h, queue.get_context());
#else
cl::sycl::free(output_d, queue);
cl::sycl::free(input_d, queue);
cl::sycl::free(output_h, queue);
cl::sycl::free(input_h, queue);
#endif
auto diff = stop - start;
auto time = std::chrono::duration_cast<std::chrono::microseconds>(diff).count();
if (i != 0) {
totaltime += time;
}
}
totaltime /= NLOOPS;
}
} // namespace oneapi