-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Speed up Patatrack CA #34250
Speed up Patatrack CA #34250
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -218,12 +218,13 @@ namespace gpuClustering { | |
auto l = nn[k][kk]; | ||
auto m = l + firstPixel; | ||
assert(m != i); | ||
auto old = atomicMin(&clusterId[m], clusterId[i]); | ||
auto old = atomicMin_block(&clusterId[m], clusterId[i]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. clusterId is in global memory, but "m" (and "i") below refers to digis on the module that is reconstructed in this block. (one module per block, one block per module) |
||
// do we need memory fence? | ||
if (old != clusterId[i]) { | ||
// end the loop only if no changes were applied | ||
more = true; | ||
} | ||
atomicMin(&clusterId[i], old); | ||
atomicMin_block(&clusterId[i], old); | ||
} // nnloop | ||
} // pixel loop | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -350,7 +350,9 @@ __global__ void kernel_find_ntuplets(GPUCACell::Hits const *__restrict__ hhp, | |
auto const &thisCell = cells[idx]; | ||
if (thisCell.isKilled()) | ||
continue; // cut by earlyFishbone | ||
|
||
// we require at least three hits... | ||
if (thisCell.outerNeighbors().empty()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. simply avoids the whole lot below (in particular the function call that being recursive cannot be inlined) |
||
continue; | ||
auto pid = thisCell.layerPairId(); | ||
auto doit = minHitsPerNtuplet > 3 ? pid < 3 : pid < 8 || pid > 12; | ||
if (doit) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,8 +63,8 @@ namespace gpuVertexFinder { | |
assert(iv[i] >= 0); | ||
assert(iv[i] < int(foundClusters)); | ||
auto w = 1.f / ezt2[i]; | ||
atomicAdd(&zv[iv[i]], zt[i] * w); | ||
atomicAdd(&wv[iv[i]], w); | ||
atomicAdd_block(&zv[iv[i]], zt[i] * w); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. vertex reconstruction is performed in a single block. |
||
atomicAdd_block(&wv[iv[i]], w); | ||
} | ||
|
||
__syncthreads(); | ||
|
@@ -87,8 +87,8 @@ namespace gpuVertexFinder { | |
iv[i] = 9999; | ||
continue; | ||
} | ||
atomicAdd(&chi2[iv[i]], c2); | ||
atomicAdd(&nn[iv[i]], 1); | ||
atomicAdd_block(&chi2[iv[i]], c2); | ||
atomicAdd_block(&nn[iv[i]], 1); | ||
} | ||
__syncthreads(); | ||
for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,7 +46,7 @@ namespace gpuVertexFinder { | |
for (auto i = threadIdx.x; i < nt; i += blockDim.x) { | ||
if (iv[i] > 9990) | ||
continue; | ||
atomicAdd(&ptv2[iv[i]], ptt2[i]); | ||
atomicAdd_block(&ptv2[iv[i]], ptt2[i]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. vertex sorting is performed in a single block |
||
} | ||
__syncthreads(); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for my education,
what does this and the other
_block
methods do in this PR, especially in the context of speeding things up.