Skip to content

Commit

Permalink
Rewrite Point data structures as generic SoAs (cms-patatrack#74)
Browse files Browse the repository at this point in the history
* Rework points as SoA

Update backend code

Update binding modules

Update python API

Add tests for host-side point SoA

Update run_clue for CUDA and HIP

Update benchmarking scripts

Move test folder

Fix device memory access in memcpy

* Feature clusterer import method (cms-patatrack#76)

* Add `import_clusterer` method

* Add test for new clusterer import

* Add docstring

* Fix "if main" of test file

* Addition to gitignore

* Small fix in getGlobalBin (cms-patatrack#75)

* Small fix in getGlobalBin

* Update version

* Formatting

Fix after merge

* Separate alpaka input and result buffers

Fix partial dimensional clustering

Update package version
  • Loading branch information
sbaldu committed Jan 15, 2025
1 parent 52fec2f commit cf67ee7
Show file tree
Hide file tree
Showing 19 changed files with 959 additions and 498 deletions.
24 changes: 12 additions & 12 deletions CLUEstering/BindingModules/Run.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,22 @@
namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {

template <uint8_t Ndim, typename Kernel>
std::vector<std::vector<int>> run(float dc,
float rhoc,
float dm,
int pPBin,
const std::vector<std::vector<float>>& coordinates,
const std::vector<float>& weight,
const Kernel& kernel,
Queue queue_,
size_t block_size) {
void run(float dc,
float rhoc,
float dm,
int pPBin,
std::tuple<float*, int*>&& pData,
const PointShape<Ndim>& shape,
const Kernel& kernel,
Queue queue_,
size_t block_size) {
CLUEAlgoAlpaka<Ndim> algo(dc, rhoc, dm, pPBin, queue_);

// Create the host and device points
Points<Ndim> h_points(coordinates, weight);
PointsAlpaka<Ndim> d_points(queue_, weight.size());
PointsSoA<Ndim> h_points(std::get<0>(pData), std::get<1>(pData), shape);
PointsAlpaka<Ndim> d_points(queue_, shape.nPoints);

return algo.make_clusters(h_points, d_points, kernel, queue_, block_size);
algo.make_clusters(h_points, d_points, kernel, queue_, block_size);
}

}; // namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE
166 changes: 129 additions & 37 deletions CLUEstering/BindingModules/binding_cpu.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@

#include <alpaka/alpaka.hpp>
#include <tuple>
#include <vector>

#include "Run.hpp"

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/functional.h>
#include <pybind11/numpy.h>

namespace py = pybind11;

namespace alpaka_serial_sync {
void listDevices(const std::string& backend) {
Expand All @@ -24,16 +28,22 @@ namespace alpaka_serial_sync {
}

template <typename Kernel>
std::vector<std::vector<int>> mainRun(float dc,
float rhoc,
float dm,
int pPBin,
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
const Kernel& kernel,
int Ndim,
size_t block_size,
size_t device_id) {
void mainRun(float dc,
float rhoc,
float dm,
int pPBin,
py::array_t<float> data,
py::array_t<int> results,
const Kernel& kernel,
int Ndim,
uint32_t n_points,
size_t block_size,
size_t device_id) {
auto rData = data.request();
float* pData = static_cast<float*>(rData.ptr);
auto rResults = results.request();
int* pResults = static_cast<int*>(rResults.ptr);

const auto dev_acc = alpaka::getDevByIdx(alpaka::Platform<Acc1D>{}, device_id);

// Create the queue
Expand All @@ -42,38 +52,117 @@ namespace alpaka_serial_sync {
// Running the clustering algorithm //
switch (Ndim) {
[[unlikely]] case (1):
return run<1, Kernel>(
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
run<1, Kernel>(dc,
rhoc,
dm,
pPBin,
std::make_tuple(pData, pResults),
PointShape<1>{n_points},
kernel,
queue_,
block_size);
return;
[[likely]] case (2):
return run<2, Kernel>(
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
run<2, Kernel>(dc,
rhoc,
dm,
pPBin,
std::make_tuple(pData, pResults),
PointShape<2>{n_points},
kernel,
queue_,
block_size);
return;
[[likely]] case (3):
return run<3, Kernel>(
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
run<3, Kernel>(dc,
rhoc,
dm,
pPBin,
std::make_tuple(pData, pResults),
PointShape<3>{n_points},
kernel,
queue_,
block_size);
return;
[[unlikely]] case (4):
return run<4, Kernel>(
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
run<4, Kernel>(dc,
rhoc,
dm,
pPBin,
std::make_tuple(pData, pResults),
PointShape<4>{n_points},
kernel,
queue_,
block_size);
return;
[[unlikely]] case (5):
return run<5, Kernel>(
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
run<5, Kernel>(dc,
rhoc,
dm,
pPBin,
std::make_tuple(pData, pResults),
PointShape<5>{n_points},
kernel,
queue_,
block_size);
return;
[[unlikely]] case (6):
return run<6, Kernel>(
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
run<6, Kernel>(dc,
rhoc,
dm,
pPBin,
std::make_tuple(pData, pResults),
PointShape<6>{n_points},
kernel,
queue_,
block_size);
return;
[[unlikely]] case (7):
return run<7, Kernel>(
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
run<7, Kernel>(dc,
rhoc,
dm,
pPBin,
std::make_tuple(pData, pResults),
PointShape<7>{n_points},
kernel,
queue_,
block_size);
return;
[[unlikely]] case (8):
return run<8, Kernel>(
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
run<8, Kernel>(dc,
rhoc,
dm,
pPBin,
std::make_tuple(pData, pResults),
PointShape<8>{n_points},
kernel,
queue_,
block_size);
return;
[[unlikely]] case (9):
return run<9, Kernel>(
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
run<9, Kernel>(dc,
rhoc,
dm,
pPBin,
std::make_tuple(pData, pResults),
PointShape<9>{n_points},
kernel,
queue_,
block_size);
return;
[[unlikely]] case (10):
return run<10, Kernel>(
dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
run<10, Kernel>(dc,
rhoc,
dm,
pPBin,
std::make_tuple(pData, pResults),
PointShape<10>{n_points},
kernel,
queue_,
block_size);
return;
[[unlikely]] default:
std::cout << "This library only works up to 10 dimensions\n";
return {};
}
}

Expand All @@ -88,10 +177,11 @@ namespace alpaka_serial_sync {
float,
float,
int,
const std::vector<std::vector<float>>&,
const std::vector<float>&,
py::array_t<float>,
py::array_t<int>,
const FlatKernel&,
int,
uint32_t,
size_t,
size_t>(&mainRun<FlatKernel>),
"mainRun");
Expand All @@ -100,10 +190,11 @@ namespace alpaka_serial_sync {
float,
float,
int,
const std::vector<std::vector<float>>&,
const std::vector<float>&,
py::array_t<float>,
py::array_t<int>,
const ExponentialKernel&,
int,
uint32_t,
size_t,
size_t>(&mainRun<ExponentialKernel>),
"mainRun");
Expand All @@ -112,10 +203,11 @@ namespace alpaka_serial_sync {
float,
float,
int,
const std::vector<std::vector<float>>&,
const std::vector<float>&,
py::array_t<float>,
py::array_t<int>,
const GaussianKernel&,
int,
uint32_t,
size_t,
size_t>(&mainRun<GaussianKernel>),
"mainRun");
Expand Down
Loading

0 comments on commit cf67ee7

Please sign in to comment.