Skip to content

Commit

Permalink
Merge pull request #81 from AuroraPerego/getClusters
Browse files Browse the repository at this point in the history
Add getClusters methods and enhance general interface
  • Loading branch information
sbaldu authored Jan 28, 2025
2 parents ad459ad + a7b3d21 commit d48e818
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 38 deletions.
19 changes: 8 additions & 11 deletions include/CLUEstering/CLUE/CLUEAlpakaKernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
constexpr int32_t reserve{1000000};

template <uint8_t Ndim>
using PointsView = typename PointsAlpaka<Ndim>::PointsAlpakaView;

template <uint8_t Ndim>
ALPAKA_FN_ACC void getCoords(float* coords, PointsView<Ndim>* d_points, uint32_t i) {
ALPAKA_FN_ACC void getCoords(float* coords, PointsAlpakaView* d_points, uint32_t i) {
for (auto dim = 0; dim < Ndim; ++dim) {
coords[dim] = d_points->coords[i + dim * d_points->n];
}
Expand Down Expand Up @@ -55,7 +52,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
struct KernelFillTiles {
template <typename TAcc, uint8_t Ndim>
ALPAKA_FN_ACC void operator()(const TAcc& acc,
PointsView<Ndim>* points,
PointsAlpakaView* points,
TilesAlpaka<Ndim>* tiles,
uint32_t n_points) const {
for (auto index : alpaka::uniformElements(acc, n_points)) {
Expand All @@ -72,7 +69,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
VecArray<uint32_t, Ndim>& base_vec,
const VecArray<VecArray<uint32_t, 2>, Ndim>& search_box,
TilesAlpaka<Ndim>* tiles,
PointsView<Ndim>* dev_points,
PointsAlpakaView* dev_points,
const KernelType& kernel,
/* const VecArray<VecArray<float, 2>, Ndim>& domains, */
const float* coords_i,
Expand Down Expand Up @@ -128,7 +125,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
template <typename TAcc, uint8_t Ndim, typename KernelType>
ALPAKA_FN_ACC void operator()(const TAcc& acc,
TilesAlpaka<Ndim>* dev_tiles,
PointsView<Ndim>* dev_points,
PointsAlpakaView* dev_points,
const KernelType& kernel,
/* const VecArray<VecArray<float, 2>, Ndim>& domains, */
float dc,
Expand Down Expand Up @@ -175,7 +172,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
VecArray<uint32_t, Ndim>& base_vec,
const VecArray<VecArray<uint32_t, 2>, Ndim>& s_box,
TilesAlpaka<Ndim>* tiles,
PointsView<Ndim>* dev_points,
PointsAlpakaView* dev_points,
/* const VecArray<VecArray<float, 2>, Ndim>& domains, */
const float* coords_i,
float rho_i,
Expand Down Expand Up @@ -242,7 +239,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
template <typename TAcc, uint8_t Ndim>
ALPAKA_FN_ACC void operator()(const TAcc& acc,
TilesAlpaka<Ndim>* dev_tiles,
PointsView<Ndim>* dev_points,
PointsAlpakaView* dev_points,
/* const VecArray<VecArray<float, 2>, Ndim>& domains, */
float dm,
float,
Expand Down Expand Up @@ -294,7 +291,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
ALPAKA_FN_ACC void operator()(const TAcc& acc,
VecArray<int32_t, reserve>* seeds,
VecArray<int32_t, max_followers>* followers,
PointsView<Ndim>* dev_points,
PointsAlpakaView* dev_points,
float dm,
float d_c,
float rho_c,
Expand Down Expand Up @@ -329,7 +326,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
ALPAKA_FN_ACC void operator()(const TAcc& acc,
VecArray<int32_t, reserve>* seeds,
VecArray<int, max_followers>* followers,
PointsView<Ndim>* dev_points) const {
PointsAlpakaView* dev_points) const {
const auto& seeds_0{*seeds};
const auto n_seeds{seeds_0.size()};
for (auto idx_cls : alpaka::uniformElements(acc, n_seeds)) {
Expand Down
62 changes: 47 additions & 15 deletions include/CLUEstering/CLUEstering.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {

template <typename KernelType>
void make_clusters(PointsSoA<Ndim>& h_points,
PointsAlpaka<Ndim>& d_points,
const KernelType& kernel,
Queue queue_,
std::size_t block_size);

template <typename KernelType>
void make_clusters(PointsSoA<Ndim>& h_points,
PointsAlpaka<Ndim>& dev_points,
const KernelType& kernel,
Queue queue_,
std::size_t block_size);

std::map<int, std::vector<int>> getClusters(const PointsSoA<Ndim>& h_points);

private:
float dc_;
float rhoc_;
Expand All @@ -54,11 +62,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
std::optional<clue::device_buffer<Device, VecArray<int32_t, reserve>>> d_seeds;
std::optional<clue::device_buffer<Device, clue::VecArray<int32_t, max_followers>[]>>
d_followers;
std::optional<PointsAlpaka<Ndim>> d_points;

// Private methods
void init_device(Queue queue_);
void setup(const PointsSoA<Ndim>& h_points,
PointsAlpaka<Ndim>& d_points,
PointsAlpaka<Ndim>& dev_points,
Queue queue_,
std::size_t block_size);

Expand Down Expand Up @@ -105,7 +114,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {

template <uint8_t Ndim>
void CLUEAlgoAlpaka<Ndim>::setup(const PointsSoA<Ndim>& h_points,
PointsAlpaka<Ndim>& d_points,
PointsAlpaka<Ndim>& dev_points,
Queue queue_,
std::size_t block_size) {
// calculate the number of tiles and their size
Expand Down Expand Up @@ -133,7 +142,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {

const auto copyExtent = (Ndim + 1) * h_points.nPoints();
alpaka::memcpy(queue_,
d_points.input_buffer,
dev_points.input_buffer,
clue::make_host_view(h_points.coords(), copyExtent),
copyExtent);
alpaka::memset(queue_, *d_seeds, 0x00);
Expand All @@ -151,11 +160,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
template <uint8_t Ndim>
template <typename KernelType>
void CLUEAlgoAlpaka<Ndim>::make_clusters(PointsSoA<Ndim>& h_points,
PointsAlpaka<Ndim>& d_points,
const KernelType& kernel,
Queue queue_,
std::size_t block_size) {
setup(h_points, d_points, queue_, block_size);
d_points = PointsAlpaka<Ndim>(queue_, h_points.nPoints());
auto& dev_points = *d_points;
make_clusters(h_points, dev_points, kernel, queue_, block_size);
}

template <uint8_t Ndim>
template <typename KernelType>
void CLUEAlgoAlpaka<Ndim>::make_clusters(PointsSoA<Ndim>& h_points,
PointsAlpaka<Ndim>& dev_points,
const KernelType& kernel,
Queue queue_,
std::size_t block_size) {
setup(h_points, dev_points, queue_, block_size);

const auto nPoints = h_points.nPoints();

Expand All @@ -164,13 +184,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
alpaka::enqueue(
queue_,
alpaka::createTaskKernel<Acc1D>(
working_div, KernelFillTiles{}, d_points.view(), m_tiles, nPoints));
working_div, KernelFillTiles{}, dev_points.view(), m_tiles, nPoints));

alpaka::enqueue(queue_,
alpaka::createTaskKernel<Acc1D>(working_div,
KernelCalculateLocalDensity{},
m_tiles,
d_points.view(),
dev_points.view(),
kernel,
/* m_domains.data(), */
dc_,
Expand All @@ -179,7 +199,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
alpaka::createTaskKernel<Acc1D>(working_div,
KernelCalculateNearestHigher{},
m_tiles,
d_points.view(),
dev_points.view(),
/* m_domains.data(), */
dm_,
dc_,
Expand All @@ -189,7 +209,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
KernelFindClusters<Ndim>{},
m_seeds,
m_followers,
d_points.view(),
dev_points.view(),
dm_,
dc_,
rhoc_,
Expand All @@ -204,7 +224,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
KernelAssignClusters<Ndim>{},
m_seeds,
m_followers,
d_points.view()));
dev_points.view()));

// Wait for all the operations in the queue to finish
alpaka::wait(queue_);
Expand All @@ -213,23 +233,35 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
#ifdef DEBUG
alpaka::memcpy(queue_,
clue::make_host_view(h_points.debugInfo().rho.data(), nPoints),
clue::make_device_view(device, d_points.view()->rho, nPoints));
clue::make_device_view(device, dev_points.view()->rho, nPoints));
alpaka::memcpy(queue_,
clue::make_host_view(h_points.debugInfo().rho.data(), nPoints),
clue::make_device_view(device, d_points.view()->delta, nPoints));
clue::make_device_view(device, dev_points.view()->delta, nPoints));
alpaka::memcpy(
queue_,
clue::make_host_view(h_points.debugInfo().nearestHigher.data(), nPoints),
clue::make_device_view(device, d_points.view()->nearest_higher, nPoints));
clue::make_device_view(device, dev_points.view()->nearest_higher, nPoints));
#endif

alpaka::memcpy(queue_,
clue::make_host_view(h_points.clusterIndexes(), 2 * nPoints),
clue::make_device_view(
device, d_points.result_buffer.data() + nPoints, 2 * nPoints),
device, dev_points.result_buffer.data() + nPoints, 2 * nPoints),
2 * nPoints);

// Wait for all the operations in the queue to finish
alpaka::wait(queue_);
}

template <uint8_t Ndim>
std::map<int, std::vector<int>> CLUEAlgoAlpaka<Ndim>::getClusters(
const PointsSoA<Ndim>& h_points) {
// cluster all points with same clusterId
std::map<int, std::vector<int>> clusters;
for (size_t i = 0; i < h_points.nPoints(); i++) {
clusters[h_points.clusterIndexes()[i]].push_back(i);
}
return clusters;
}

} // namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE
24 changes: 12 additions & 12 deletions include/CLUEstering/DataFormats/alpaka/PointsAlpaka.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@

namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {

class PointsAlpakaView {
public:
float* coords;
float* weight;
float* rho;
float* delta;
int* nearest_higher;
int* cluster_index;
int* is_seed;
int n;
};

template <uint8_t Ndim>
class PointsAlpaka {
public:
Expand Down Expand Up @@ -40,18 +52,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
clue::device_buffer<Device, float[]> input_buffer;
clue::device_buffer<Device, int[]> result_buffer;

class PointsAlpakaView {
public:
float* coords;
float* weight;
float* rho;
float* delta;
int* nearest_higher;
int* cluster_index;
int* is_seed;
int n;
};

PointsAlpakaView* view() { return view_dev.data(); }

private:
Expand Down

0 comments on commit d48e818

Please sign in to comment.