-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlinearCombination_aot_run.cpp
394 lines (319 loc) · 14.8 KB
/
linearCombination_aot_run.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
//To compile and run:
//Look in linearCombination_aot_compile.cpp
//#define USE_LSST
#ifdef USE_LSST
#include "lsst/afw/image.h"
namespace afwImage = lsst::afw::image;
namespace afwMath = lsst::afw::math;
#endif
#include <stdio.h>
#include <bitset>
#include "clock.h"
#include <iostream>
using namespace std;
// this is the header file generated by ahead of time compilating
// process. We just include it here like any other C header.
#include "lincombo_aot.h"
/*
* This file serves of an example of how to use an "ahead of time"
* compiled Halide pipeline in an application. The compilation
* process generated a .o (linked against in this application's build
* process) as well as header file (lincombo_aot.h) that is included
* above.
*
*/
int main(int argc, char *argv[]) {
//the precompiled Halide pipeline we are using expects 5 kernels
//and 5 polynomials (each 3rd degree) with 10 coefficients
const int num_kernels = 5;
const int num_poly_coeff = 10;
const int num_kernel_params = 3;
#ifdef USE_LSST
auto im = afwImage::MaskedImage<float>("./images/calexp-004207-g3-0123.fits");
int width = im.getWidth(), height = im.getHeight();
#else
int width = 2048, height = 1489;
printf("[no load]");
#endif
printf("Loaded: %d x %d\n", width, height);
uint8_t *image = new uint8_t[width*height*4];
uint8_t *variance = new uint8_t[width*height*4];
uint8_t *mask = new uint8_t[width*height*2];
#ifdef USE_LSST
//Read image, converting all three planes to uint8_t arrays
//for passing to the aot compiled Halide
float curImage;
float curVariance;
uint16_t curMask;
uint8_t *curImageUInt8Array;
uint8_t *curVarianceUInt8Array;
uint8_t *curMaskUInt8Array;
for (int y = 0; y < height; y++) {
afwImage::MaskedImage<float, lsst::afw::image::MaskPixel, lsst::afw::image::VariancePixel>::x_iterator inPtr = im.x_at(0, y);
for (int x = 0; x < width; x++){
curImage = (*inPtr).image();
curVariance = (*inPtr).variance();
curMask = (*inPtr).mask();
inPtr++;
curImageUInt8Array = reinterpret_cast<uint8_t*>(&curImage);
curVarianceUInt8Array = reinterpret_cast<uint8_t*>(&curVariance);
curMaskUInt8Array = reinterpret_cast<uint8_t*>(&curMask);
for(int i = 0; i < 4; i++){
image[(y*width + x)*4 + i] = curImageUInt8Array[i];
variance[(y*width + x)*4 + i] = curVarianceUInt8Array[i];
}
for(int i = 0; i < 2; i++){
mask[(y*width + x)*2 + i] = curMaskUInt8Array[i];
}
}
}
#endif
// Have a look in the header file above (it won't exist until you've run
// lesson_10_generate).
// It starts with a definition of a buffer_t:
//
// typedef struct buffer_t {
// uint64_t dev;
// uint8_t* host;
// int32_t extent[4];
// int32_t stride[4];
// int32_t min[4];
// int32_t elem_size;
// bool host_dirty;
// bool dev_dirty;
// } buffer_t;
//
// This is how Halide represents input and output images in
// pre-compiled pipelines. There's a 'host' pointer that points to the
// start of the image data, some fields that describe how to access
// pixels, and some fields related to using the GPU that we'll ignore
// for now (dev, host_dirty, dev_dirty).
//Let's allocate the memory where we want to write our output:
//for every pixel we need 4 image bytes, 4 variance bytes, and 2 mask bytes
uint8_t *image_output = new uint8_t[width*height*4];
uint8_t *variance_output = new uint8_t[width*height*4];
uint8_t *mask_output = new uint8_t[width*height*2];
//And the memory to store our parameters:
//We need num_kernels*num_poly_coeff floats for the polynomial coefficents
uint8_t *polynomial_coefficients = new uint8_t[num_kernels*num_poly_coeff*4];
//We need num_kernels*num_kernel_params kernel parameters (2 standard deviations
//and a rotation per kernel in this case)
uint8_t *ker_params = new uint8_t[num_kernels*num_kernel_params*4];
// In AOT-compiled mode, Halide doesn't manage this memory for
// you. You should use whatever image data type makes sense for
// your application. Halide just needs pointers to it.
// Now we make a buffer_t to represent our input and output. It's
// important to zero-initialize them so you don't end up with
// garbage fields that confuse Halide.
buffer_t image_buf = {0};
buffer_t variance_buf = {0};
buffer_t mask_buf = {0};
buffer_t poly_coef_buf = {0};
buffer_t ker_params_buf = {0};
buffer_t image_output_buf = {0};
buffer_t variance_output_buf = {0};
buffer_t mask_output_buf = {0};
// The host pointers point to the start of the image data:
image_buf.host = &image[0];
variance_buf.host = &variance[0];
mask_buf.host = &mask[0];
poly_coef_buf.host = &polynomial_coefficients[0];
ker_params_buf.host = &ker_params[0];
image_output_buf.host = &image_output[0];
variance_output_buf.host = &variance_output[0];
mask_output_buf.host = &mask_output[0];
// To access pixel (x, y) in a two-dimensional buffer_t, Halide
// looks at memory address:
// host + elem_size * ((x - min[0])*stride[0] + (y - min[1])*stride[1])
// The stride in a dimension represents the number of elements in
// memory between adjacent entries in that dimension. We have a
// grayscale image stored in scanline order, so stride[0] is 1,
// because pixels that are adjacent in x are next to each other in
// memory.
image_buf.stride[0] = variance_buf.stride[0] = mask_buf.stride[0] = 1;
image_output_buf.stride[0] = variance_output_buf.stride[0] = 1;
mask_output_buf.stride[0] = 1;
poly_coef_buf.stride[0] = ker_params_buf.stride[0] = 1;
// stride[1] is the width of the image, because pixels that are
// adjacent in y are separated by a scanline's worth of pixels in
// memory.
image_buf.stride[1] = variance_buf.stride[1] = mask_buf.stride[1] = width;
image_output_buf.stride[1] = variance_output_buf.stride[1] = width;
mask_output_buf.stride[1] = width;
//we are storing polynomial coefficients as poly_coef_buf(coef#, kernel#)
poly_coef_buf.stride[1] = num_poly_coeff;
//we are storing kernel parameters as ker_params_buf(param#, kernel#)
ker_params_buf.stride[1] = num_kernel_params;
// The extent tells us how large the image is in each dimension.
image_buf.extent[0] = variance_buf.extent[0] = mask_buf.extent[0] = width;
image_output_buf.extent[0] = variance_output_buf.extent[0] = width;
mask_output_buf.extent[0] = width;
poly_coef_buf.extent[0] = num_poly_coeff;
ker_params_buf.extent[0] = num_kernel_params;
image_buf.extent[1] = variance_buf.extent[1] = mask_buf.extent[1] = height;
image_output_buf.extent[1] = variance_output_buf.extent[1] = height;
mask_output_buf.extent[1] = height;
poly_coef_buf.extent[1] = num_kernels;
ker_params_buf.extent[1] = num_kernels;
// We'll leave the mins as zero. This is what they typically
// are. The host pointer points to the memory location of the min
// coordinate (not the origin!). See lesson 6 for more detail
// about the mins.
// The elem_size field tells us how many bytes each element
// uses. This is 4 for floats and 2 for type uint16_t
image_buf.elem_size = variance_buf.elem_size = 4;
mask_buf.elem_size = 2;
image_output_buf.elem_size = variance_output_buf.elem_size = 4;
mask_output_buf.elem_size = 2;
poly_coef_buf.elem_size = ker_params_buf.elem_size = 4;
// To avoid repeating all the boilerplate above, We recommend you
// make a helper function that populates a buffer_t given whatever
// image type you're using.
//Now we set the polynomial coeffecients
float curCoef;
uint8_t *curCoefUInt8Array;
//we are storing polynomial coefficients as poly_coef_buf(coef#, kernel#)
for (int y = 1; y <= num_kernels; y++) {
for (int x = 1; x <= num_poly_coeff; x++){
curCoef = (float)y + ((float)x)/1000.0f;
curCoefUInt8Array = reinterpret_cast<uint8_t*>(&curCoef);
for(int i = 0; i < 4; i++){
polynomial_coefficients[(y*num_poly_coeff + x)*4 + i] =
curCoefUInt8Array[i];
}
}
}
//Now we set the kernel parameters
float curParam;
uint8_t *curParamUInt8Array;
//we are storing kernel parameters as ker_params_buf(param#, kernel#)
for (int y = 1; y <= num_kernels; y++) {
for (int x = 1; x <= num_kernel_params; x++){
curParam = (float)y + ((float)x)/1000.0f;
curParamUInt8Array = reinterpret_cast<uint8_t*>(&curParam);
for(int i = 0; i < 4; i++){
ker_params[(y*num_kernel_params + x)*4 + i] = curParamUInt8Array[i];
}
}
}
// Now that we've setup all input and output buffers, it is now
// time to call the main entrypoint function for the Halide
// pipeline. Looking in the header file, it's signature is:
// int test_aot(buffer_t *_input, const int32_t _offset, buffer_t *_brighter);
// int lincombo_aot(buffer_t *_image_buffer, buffer_t *_variance_buffer,
// buffer_t *_mask_buffer, buffer_t *_polynomialCoefficients_buffer,
// buffer_t *_kerParams_buffer, buffer_t *_combined_output_0_buffer,
// buffer_t *_combined_output_1_buffer, buffer_t *_combined_output_2_buffer);
// The return value is an error code. It's zero on success.
int error = lincombo_aot(&image_buf, &variance_buf, &mask_buf, &poly_coef_buf,
&ker_params_buf, &image_output_buf, &variance_output_buf, &mask_output_buf);
if (error) {
printf("Halide returned an error: %d\n", error);
return -1;
}
// The following code is only for benchmarking. It invokes the
// Halide pipeline a number of times.
double mean = 0;
double min;
double max;
int numberOfRuns = 100;
for (int i = 0; i < numberOfRuns; i++) {
double t1 = current_time();
error = lincombo_aot(&image_buf, &variance_buf, &mask_buf,
&poly_coef_buf, &ker_params_buf,
&image_output_buf, &variance_output_buf, &mask_output_buf);
double t2 = current_time();
double curTime = (t2-t1);
mean += curTime;
if(i == 0){
min = curTime;
max = curTime;
}
else{
if(curTime < min)
min = curTime;
if(curTime > max)
max = curTime;
}
}
mean = mean/numberOfRuns;
std::cout << "Mean Time: " << mean << ", Min = " <<
min << ", Max = " << max << ", with " << numberOfRuns <<
" runs" << '\n';
#ifdef USE_LSST
bool writePlanesSeparately = false;
if(!writePlanesSeparately){
//write image out
auto imOut = afwImage::MaskedImage<float, lsst::afw::image::MaskPixel,
lsst::afw::image::VariancePixel>(im.getWidth(), im.getHeight());
for (int y = 0; y < imOut.getHeight(); y++) {
afwImage::MaskedImage<float, lsst::afw::image::MaskPixel,
lsst::afw::image::VariancePixel>::x_iterator inPtr = imOut.x_at(0, y);
for (int x = 0; x < imOut.getWidth(); x++){
curImageUInt8Array = image_output + 4*(y*width + x);
curVarianceUInt8Array = variance_output + 4*(y*width + x);
curMaskUInt8Array = mask_output + 2*(y*width + x);
curImage = *(reinterpret_cast<float*>(curImageUInt8Array));
curVariance = *(reinterpret_cast<float*>(curVarianceUInt8Array));
curMask = *(reinterpret_cast<uint16_t*>(curMaskUInt8Array));
afwImage::pixel::SinglePixel<float, lsst::afw::image::MaskPixel,
lsst::afw::image::VariancePixel> curPixel(curImage, curMask, curVariance);
(*inPtr) = curPixel;
inPtr++;
}
}
imOut.writeFits("./halideCleanLinearCombination5x5.fits");
}
else{
//write three planes separately
auto imOut = afwImage::MaskedImage<float, lsst::afw::image::MaskPixel, lsst::afw::image::VariancePixel>(im.getWidth(), im.getHeight());
for (int y = 0; y < imOut.getHeight(); y++) {
afwImage::MaskedImage<float, lsst::afw::image::MaskPixel, lsst::afw::image::VariancePixel>::x_iterator inPtr = imOut.x_at(0, y);
for (int x = 0; x < imOut.getWidth(); x++){
curImageUInt8Array = image_output + 4*(y*width + x);
curImage = *(reinterpret_cast<float*>(curImageUInt8Array));
afwImage::pixel::SinglePixel<float, lsst::afw::image::MaskPixel,
lsst::afw::image::VariancePixel> curPixel(curImage, 0, 0);
(*inPtr) = curPixel;
inPtr++;
}
}
auto varOut = afwImage::MaskedImage<float, lsst::afw::image::MaskPixel, lsst::afw::image::VariancePixel>(im.getWidth(), im.getHeight());
for (int y = 0; y < imOut.getHeight(); y++) {
afwImage::MaskedImage<float, lsst::afw::image::MaskPixel, lsst::afw::image::VariancePixel>::x_iterator inPtr = varOut.x_at(0, y);
for (int x = 0; x < imOut.getWidth(); x++){
curVarianceUInt8Array = variance_output + 4*(y*width + x);
curVariance = *(reinterpret_cast<float*>(curVarianceUInt8Array));
afwImage::pixel::SinglePixel<float, lsst::afw::image::MaskPixel,
lsst::afw::image::VariancePixel> curPixel(curVariance, 0, 0);
(*inPtr) = curPixel;
inPtr++;
}
}
auto maskOutPlane = afwImage::MaskedImage<float, lsst::afw::image::MaskPixel, lsst::afw::image::VariancePixel>(im.getWidth(), im.getHeight());
for (int y = 0; y < imOut.getHeight(); y++) {
afwImage::MaskedImage<float, lsst::afw::image::MaskPixel, lsst::afw::image::VariancePixel>::x_iterator inPtr = maskOutPlane.x_at(0, y);
for (int x = 0; x < imOut.getWidth(); x++){
curMaskUInt8Array = mask_output + 2*(y*width + x);
curMask = *(reinterpret_cast<uint16_t*>(curMaskUInt8Array));
afwImage::pixel::SinglePixel<float, lsst::afw::image::MaskPixel,
lsst::afw::image::VariancePixel> curPixel(curMask, 0, 0);
(*inPtr) = curPixel;
inPtr++;
}
}
imOut.writeFits("./halideLinComboImage5x5.fits");
varOut.writeFits("./halideLinComboVar5x5.fits");
maskOutPlane.writeFits("./halideLinComboMask5x5.fits");
}
#endif
delete[] image;
delete[] variance;
delete[] mask;
delete[] polynomial_coefficients;
delete[] ker_params;
delete[] image_output;
delete[] variance_output;
delete[] mask_output;
return 0;
}