forked from cms-patatrack/pixeltrack-standalone
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpuSortByPt2.h
73 lines (58 loc) · 1.93 KB
/
gpuSortByPt2.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#ifndef RecoPixelVertexing_PixelVertexFinding_src_gpuSortByPt2_h
#define RecoPixelVertexing_PixelVertexFinding_src_gpuSortByPt2_h
#include <algorithm>
#include <cmath>
#include <cstdint>
#include "CUDACore/HistoContainer.h"
#include "CUDACore/cuda_assert.h"
#ifdef __CUDA_ARCH__
#include "CUDACore/radixSort.h"
#endif
#include "gpuVertexFinder.h"
namespace gpuVertexFinder {
__device__ __forceinline__ void sortByPt2(ZVertices* pdata, WorkSpace* pws) {
auto& __restrict__ data = *pdata;
auto& __restrict__ ws = *pws;
auto nt = ws.ntrks;
float const* __restrict__ ptt2 = ws.ptt2;
uint32_t const& nvFinal = data.nvFinal;
int32_t const* __restrict__ iv = ws.iv;
float* __restrict__ ptv2 = data.ptv2;
uint16_t* __restrict__ sortInd = data.sortInd;
// if (threadIdx.x == 0)
// printf("sorting %d vertices\n",nvFinal);
if (nvFinal < 1)
return;
// fill indexing
for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
data.idv[ws.itrk[i]] = iv[i];
}
// can be done asynchronoisly at the end of previous event
for (auto i = threadIdx.x; i < nvFinal; i += blockDim.x) {
ptv2[i] = 0;
}
__syncthreads();
for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
if (iv[i] > 9990)
continue;
atomicAdd(&ptv2[iv[i]], ptt2[i]);
}
__syncthreads();
if (1 == nvFinal) {
if (threadIdx.x == 0)
sortInd[0] = 0;
return;
}
#ifdef __CUDA_ARCH__
__shared__ uint16_t sws[1024];
// sort using only 16 bits
radixSort<float, 2>(ptv2, sortInd, sws, nvFinal);
#else
for (uint16_t i = 0; i < nvFinal; ++i)
sortInd[i] = i;
std::sort(sortInd, sortInd + nvFinal, [&](auto i, auto j) { return ptv2[i] < ptv2[j]; });
#endif
}
__global__ void sortByPt2Kernel(ZVertices* pdata, WorkSpace* pws) { sortByPt2(pdata, pws); }
} // namespace gpuVertexFinder
#endif // RecoPixelVertexing_PixelVertexFinding_src_gpuSortByPt2_h