forked from cms-patatrack/pixeltrack-standalone
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpuFishbone.h
93 lines (83 loc) · 3.17 KB
/
gpuFishbone.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#ifndef RecoLocalTracker_SiPixelRecHits_plugins_gpuFishbone_h
#define RecoLocalTracker_SiPixelRecHits_plugins_gpuFishbone_h
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <cstdio>
#include <limits>
#include "DataFormats/approx_atan2.h"
#include "Geometry/phase1PixelTopology.h"
#include "CUDACore/VecArray.h"
#include "CUDACore/cuda_assert.h"
#include "GPUCACell.h"
namespace gpuPixelDoublets {
// __device__
// __forceinline__
__global__ void fishbone(GPUCACell::Hits const* __restrict__ hhp,
GPUCACell* cells,
uint32_t const* __restrict__ nCells,
GPUCACell::OuterHitOfCell const* __restrict__ isOuterHitOfCell,
uint32_t nHits,
bool checkTrack) {
constexpr auto maxCellsPerHit = GPUCACell::maxCellsPerHit;
auto const& hh = *hhp;
// auto layer = [&](uint16_t id) { return hh.cpeParams().layer(id); };
// x run faster...
auto firstY = threadIdx.y + blockIdx.y * blockDim.y;
auto firstX = threadIdx.x;
float x[maxCellsPerHit], y[maxCellsPerHit], z[maxCellsPerHit], n[maxCellsPerHit];
uint16_t d[maxCellsPerHit]; // uint8_t l[maxCellsPerHit];
uint32_t cc[maxCellsPerHit];
for (int idy = firstY, nt = nHits; idy < nt; idy += gridDim.y * blockDim.y) {
auto const& vc = isOuterHitOfCell[idy];
auto s = vc.size();
if (s < 2)
continue;
// if alligned kill one of the two.
// in principle one could try to relax the cut (only in r-z?) for jumping-doublets
auto const& c0 = cells[vc[0]];
auto xo = c0.get_outer_x(hh);
auto yo = c0.get_outer_y(hh);
auto zo = c0.get_outer_z(hh);
auto sg = 0;
for (int32_t ic = 0; ic < s; ++ic) {
auto& ci = cells[vc[ic]];
if (0 == ci.theUsed)
continue; // for triplets equivalent to next
if (checkTrack && ci.tracks().empty())
continue;
cc[sg] = vc[ic];
d[sg] = ci.get_inner_detIndex(hh);
// l[sg] = layer(d[sg]);
x[sg] = ci.get_inner_x(hh) - xo;
y[sg] = ci.get_inner_y(hh) - yo;
z[sg] = ci.get_inner_z(hh) - zo;
n[sg] = x[sg] * x[sg] + y[sg] * y[sg] + z[sg] * z[sg];
++sg;
}
if (sg < 2)
continue;
// here we parallelize
for (int32_t ic = firstX; ic < sg - 1; ic += blockDim.x) {
auto& ci = cells[cc[ic]];
for (auto jc = ic + 1; jc < sg; ++jc) {
auto& cj = cells[cc[jc]];
// must be different detectors (in the same layer)
// if (d[ic]==d[jc]) continue;
// || l[ic]!=l[jc]) continue;
auto cos12 = x[ic] * x[jc] + y[ic] * y[jc] + z[ic] * z[jc];
if (d[ic] != d[jc] && cos12 * cos12 >= 0.99999f * n[ic] * n[jc]) {
// alligned: kill farthest (prefer consecutive layers)
if (n[ic] > n[jc]) {
ci.theDoubletId = -1;
break;
} else {
cj.theDoubletId = -1;
}
}
} //cj
} // ci
} // hits
}
} // namespace gpuPixelDoublets
#endif // RecoLocalTracker_SiPixelRecHits_plugins_gpuFishbone_h