Skip to content

Commit

Permalink
Use alpaka methods instead of old alpakatools (cms-patatrack#79)
Browse files Browse the repository at this point in the history
  • Loading branch information
AuroraPerego authored Jan 24, 2025
1 parent 6691bae commit ad459ad
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 226 deletions.
209 changes: 0 additions & 209 deletions include/CLUEstering/AlpakaCore/alpakaWorkDiv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,213 +209,4 @@ namespace clue {
acc, maxNumberOfElements, elementIdxShift, dimIndex);
}

/*********************************************
* LOOP ON ALL ELEMENTS
********************************************/

/*
* Loop on all (CPU) elements.
* Elements loop makes sense in CPU case only. In GPU case, elementIdx = firstElementIdx = threadIdx + shift.
* Indexes are local to the BLOCK.
*/
template <typename TAcc, typename Func>
ALPAKA_FN_ACC void for_each_element_in_block(const TAcc& acc,
const Idx maxNumberOfElements,
const Idx elementIdxShift,
const Func func,
const unsigned int dimIndex = 0) {
const auto& [firstElementIdx, endElementIdx] = element_index_range_in_block_truncated(
acc, maxNumberOfElements, elementIdxShift, dimIndex);

for (Idx elementIdx = firstElementIdx; elementIdx < endElementIdx; ++elementIdx) {
func(elementIdx);
}
}

/*
* Overload for elementIdxShift = 0
*/
template <typename TAcc, typename Func>
ALPAKA_FN_ACC void for_each_element_in_block(const TAcc& acc,
const Idx maxNumberOfElements,
const Func func,
const unsigned int dimIndex = 0) {
const Idx elementIdxShift = 0;
for_each_element_in_block(acc, maxNumberOfElements, elementIdxShift, func, dimIndex);
}

/*
* Loop on all (CPU) elements.
* Elements loop makes sense in CPU case only. In GPU case, elementIdx = firstElementIdx = threadIdx + shift.
* Indexes are expressed in GRID 'frame-of-reference'.
*/
template <typename TAcc, typename Func>
ALPAKA_FN_ACC void for_each_element_in_grid(const TAcc& acc,
const Idx maxNumberOfElements,
Idx elementIdxShift,
const Func func,
const unsigned int dimIndex = 0) {
// Take into account the block index in grid to compute the element indices.
const Idx blockIdxInGrid(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[dimIndex]);
const Idx blockDimension(
alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[dimIndex]);
elementIdxShift += blockIdxInGrid * blockDimension;

for_each_element_in_block(acc, maxNumberOfElements, elementIdxShift, func, dimIndex);
}

/*
* Overload for elementIdxShift = 0
*/
template <typename TAcc, typename Func>
ALPAKA_FN_ACC void for_each_element_in_grid(const TAcc& acc,
const Idx maxNumberOfElements,
const Func func,
const unsigned int dimIndex = 0) {
const Idx elementIdxShift = 0;
for_each_element_in_grid(acc, maxNumberOfElements, elementIdxShift, func, dimIndex);
}

/**************************************************************
* LOOP ON ALL ELEMENTS, WITH STRIDED ACCESS
**************************************************************/

/*
* (CPU) Loop on all elements + (CPU/GPU) Strided access.
* Elements loop makes sense in CPU case only. In GPU case, elementIdx = firstElementIdx = threadIdx + shift.
* Stride to full problem size, by BLOCK size.
* Indexes are local to the BLOCK.
*/
template <typename TAcc, typename Func>
ALPAKA_FN_ACC void for_each_element_in_block_strided(const TAcc& acc,
const Idx maxNumberOfElements,
const Idx elementIdxShift,
const Func func,
const unsigned int dimIndex = 0) {
// Get thread / element indices in block.
const auto& [firstElementIdxNoStride, endElementIdxNoStride] =
element_index_range_in_block(acc, elementIdxShift, dimIndex);

// Stride = block size.
const Idx blockDimension(
alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[dimIndex]);

// Strided access.
for (Idx threadIdx = firstElementIdxNoStride, endElementIdx = endElementIdxNoStride;
threadIdx < maxNumberOfElements;
threadIdx += blockDimension, endElementIdx += blockDimension) {
// (CPU) Loop on all elements.
if (endElementIdx > maxNumberOfElements) {
endElementIdx = maxNumberOfElements;
}
for (Idx i = threadIdx; i < endElementIdx; ++i) {
func(i);
}
}
}

/*
* Overload for elementIdxShift = 0
*/
template <typename TAcc, typename Func>
ALPAKA_FN_ACC void for_each_element_in_block_strided(const TAcc& acc,
const Idx maxNumberOfElements,
const Func func,
const unsigned int dimIndex = 0) {
const Idx elementIdxShift = 0;
for_each_element_in_block_strided(
acc, maxNumberOfElements, elementIdxShift, func, dimIndex);
}

/*
* (CPU) Loop on all elements + (CPU/GPU) Strided access.
* Elements loop makes sense in CPU case only. In GPU case, elementIdx = firstElementIdx = threadIdx + shift.
* Stride to full problem size, by GRID size.
* Indexes are local to the GRID.
*/
template <typename TAcc, typename Func>
ALPAKA_FN_ACC void for_each_element_in_grid_strided(const TAcc& acc,
const Idx maxNumberOfElements,
const Idx elementIdxShift,
const Func func,
const unsigned int dimIndex = 0) {
// Get thread / element indices in block.
const auto& [firstElementIdxNoStride, endElementIdxNoStride] =
element_index_range_in_grid(acc, elementIdxShift, dimIndex);

// Stride = grid size.
const Idx gridDimension(
alpaka::getWorkDiv<alpaka::Grid, alpaka::Elems>(acc)[dimIndex]);

// Strided access.
for (Idx threadIdx = firstElementIdxNoStride, endElementIdx = endElementIdxNoStride;
threadIdx < maxNumberOfElements;
threadIdx += gridDimension, endElementIdx += gridDimension) {
// (CPU) Loop on all elements.
if (endElementIdx > maxNumberOfElements) {
endElementIdx = maxNumberOfElements;
}
for (Idx i = threadIdx; i < endElementIdx; ++i) {
func(i);
}
}
}

/* once_per_grid
*
* `once_per_grid(acc)` returns true for a single thread within the kernel execution grid.
*
* Usually the condition is true for block 0 and thread 0, but these indices should not be relied upon.
*/
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC inline constexpr bool once_per_grid(TAcc const& acc) {
return alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) ==
Vec<alpaka::Dim<TAcc>>::zeros();
}

/*
* Overload for elementIdxShift = 0
*/
template <typename TAcc, typename Func>
ALPAKA_FN_ACC void for_each_element_in_grid_strided(const TAcc& acc,
const Idx maxNumberOfElements,
const Func func,
const unsigned int dimIndex = 0) {
const Idx elementIdxShift = 0;
for_each_element_in_grid_strided(
acc, maxNumberOfElements, elementIdxShift, func, dimIndex);
}

/**************************************************************
* LOOP ON ALL ELEMENTS WITH ONE LOOP
**************************************************************/

/*
* Case where the input index i has reached the end of threadDimension: strides the input index.
* Otherwise: do nothing.
* NB 1: This helper function is used as a trick to only have one loop (like in legacy), instead of 2 loops
* (like in all the other Alpaka helpers, 'for_each_element_in_block_strided' for example,
* because of the additional loop over elements in Alpaka model).
* This allows to keep the 'continue' and 'break' statements as-is from legacy code,
* and hence avoids a lot of legacy code reshuffling.
* NB 2: Modifies i, firstElementIdx and endElementIdx.
*/
ALPAKA_FN_ACC ALPAKA_FN_INLINE bool next_valid_element_index_strided(
Idx& i,
Idx& firstElementIdx,
Idx& endElementIdx,
const Idx stride,
const Idx maxNumberOfElements) {
bool isNextStrideElementValid = true;
if (i == endElementIdx) {
firstElementIdx += stride;
endElementIdx += stride;
i = firstElementIdx;
if (i >= maxNumberOfElements) {
isNextStrideElementValid = false;
}
}
return isNextStrideElementValid;
}

} // namespace clue
34 changes: 17 additions & 17 deletions include/CLUEstering/CLUE/CLUEAlpakaKernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
TilesAlpaka<Ndim>* tiles,
uint32_t nTiles,
uint32_t nPerDim) const {
if (clue::once_per_grid(acc)) {
if (alpaka::oncePerGrid(acc)) {
tiles->resizeTiles(nTiles, nPerDim);
}
clue::for_each_element_in_grid(
acc, nTiles, [&](uint32_t i) -> void { tiles->clear(i); });
for (auto index : alpaka::uniformElements(acc, nTiles))
tiles->clear(index);
}
};

Expand All @@ -47,8 +47,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
ALPAKA_FN_ACC void operator()(const TAcc& acc,
VecArray<int, max_followers>* d_followers,
uint32_t n_points) const {
clue::for_each_element_in_grid(
acc, n_points, [&](uint32_t i) { d_followers[i].reset(); });
for (auto index : alpaka::uniformElements(acc, n_points))
d_followers[index].reset();
}
};

Expand All @@ -58,11 +58,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
PointsView<Ndim>* points,
TilesAlpaka<Ndim>* tiles,
uint32_t n_points) const {
clue::for_each_element_in_grid(acc, n_points, [&](uint32_t i) {
for (auto index : alpaka::uniformElements(acc, n_points)) {
float coords[Ndim];
getCoords<Ndim>(coords, points, i);
tiles->fill(acc, coords, i);
});
getCoords<Ndim>(coords, points, index);
tiles->fill(acc, coords, index);
}
}
};

Expand Down Expand Up @@ -133,7 +133,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
/* const VecArray<VecArray<float, 2>, Ndim>& domains, */
float dc,
uint32_t n_points) const {
clue::for_each_element_in_grid(acc, n_points, [&](uint32_t i) {
for (auto i : alpaka::uniformElements(acc, n_points)) {
float rho_i{0.f};
float coords_i[Ndim];
getCoords<Ndim>(coords_i, dev_points, i);
Expand Down Expand Up @@ -165,7 +165,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
i);

dev_points->rho[i] = rho_i;
});
}
}
};

Expand Down Expand Up @@ -248,7 +248,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
float,
uint32_t n_points) const {
float dm_squared{dm * dm};
clue::for_each_element_in_grid(acc, n_points, [&](uint32_t i) {
for (auto i : alpaka::uniformElements(acc, n_points)) {
float delta_i{std::numeric_limits<float>::max()};
int nh_i{-1};
float coords_i[Ndim];
Expand Down Expand Up @@ -284,7 +284,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {

dev_points->delta[i] = alpaka::math::sqrt(acc, delta_i);
dev_points->nearest_higher[i] = nh_i;
});
}
}
};

Expand All @@ -299,7 +299,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
float d_c,
float rho_c,
uint32_t n_points) const {
clue::for_each_element_in_grid(acc, n_points, [&](uint32_t i) {
for (auto i : alpaka::uniformElements(acc, n_points)) {
// initialize cluster_index
dev_points->cluster_index[i] = -1;

Expand All @@ -319,7 +319,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
}
dev_points->is_seed[i] = 0;
}
});
}
}
};

Expand All @@ -332,7 +332,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
PointsView<Ndim>* dev_points) const {
const auto& seeds_0{*seeds};
const auto n_seeds{seeds_0.size()};
clue::for_each_element_in_grid(acc, n_seeds, [&](uint32_t idx_cls) {
for (auto idx_cls : alpaka::uniformElements(acc, n_seeds)) {
int local_stack[256] = {-1};
int local_stack_size{};

Expand Down Expand Up @@ -361,7 +361,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
++local_stack_size;
}
}
});
};
}
};
} // namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE

0 comments on commit ad459ad

Please sign in to comment.