Rewrite Point data structures as generic SoAs (cms-patatrack#74)

* Rework points as SoA Update backend code Update binding modules Update python API Add tests for host-side point SoA Update run_clue for CUDA and HIP Update benchmarking scripts Move test folder Fix device memory access in memcpy * Feature clusterer import method (cms-patatrack#76) * Add `import_clusterer` method * Add test for new clusterer import * Add docstring * Fix "if main" of test file * Addition to gitignore * Small fix in getGlobalBin (cms-patatrack#75) * Small fix in getGlobalBin * Update version * Formatting Fix after merge * Separate alpaka input and result buffers Fix partial dimensional clustering Update package version
sbaldu · Jan 15, 2025 · cf67ee7 · cf67ee7
1 parent 52fec2f
commit cf67ee7
Show file tree

Hide file tree

Showing 19 changed files with 959 additions and 498 deletions.
diff --git a/CLUEstering/BindingModules/Run.hpp b/CLUEstering/BindingModules/Run.hpp
@@ -7,22 +7,22 @@
 namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE {
 
   template <uint8_t Ndim, typename Kernel>
-  std::vector<std::vector<int>> run(float dc,
-                                    float rhoc,
-                                    float dm,
-                                    int pPBin,
-                                    const std::vector<std::vector<float>>& coordinates,
-                                    const std::vector<float>& weight,
-                                    const Kernel& kernel,
-                                    Queue queue_,
-                                    size_t block_size) {
+  void run(float dc,
+           float rhoc,
+           float dm,
+           int pPBin,
+           std::tuple<float*, int*>&& pData,
+           const PointShape<Ndim>& shape,
+           const Kernel& kernel,
+           Queue queue_,
+           size_t block_size) {
     CLUEAlgoAlpaka<Ndim> algo(dc, rhoc, dm, pPBin, queue_);
 
     // Create the host and device points
-    Points<Ndim> h_points(coordinates, weight);
-    PointsAlpaka<Ndim> d_points(queue_, weight.size());
+    PointsSoA<Ndim> h_points(std::get<0>(pData), std::get<1>(pData), shape);
+    PointsAlpaka<Ndim> d_points(queue_, shape.nPoints);
 
-    return algo.make_clusters(h_points, d_points, kernel, queue_, block_size);
+    algo.make_clusters(h_points, d_points, kernel, queue_, block_size);
   }
 
 };  // namespace ALPAKA_ACCELERATOR_NAMESPACE_CLUE
diff --git a/CLUEstering/BindingModules/binding_cpu.cpp b/CLUEstering/BindingModules/binding_cpu.cpp
@@ -1,12 +1,16 @@
 
 #include <alpaka/alpaka.hpp>
+#include <tuple>
 #include <vector>
 
 #include "Run.hpp"
 
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include <pybind11/functional.h>
+#include <pybind11/numpy.h>
+
+namespace py = pybind11;
 
 namespace alpaka_serial_sync {
   void listDevices(const std::string& backend) {
@@ -24,16 +28,22 @@ namespace alpaka_serial_sync {
   }
 
   template <typename Kernel>
-  std::vector<std::vector<int>> mainRun(float dc,
-                                        float rhoc,
-                                        float dm,
-                                        int pPBin,
-                                        const std::vector<std::vector<float>>& coords,
-                                        const std::vector<float>& weights,
-                                        const Kernel& kernel,
-                                        int Ndim,
-                                        size_t block_size,
-                                        size_t device_id) {
+  void mainRun(float dc,
+               float rhoc,
+               float dm,
+               int pPBin,
+               py::array_t<float> data,
+               py::array_t<int> results,
+               const Kernel& kernel,
+               int Ndim,
+               uint32_t n_points,
+               size_t block_size,
+               size_t device_id) {
+    auto rData = data.request();
+    float* pData = static_cast<float*>(rData.ptr);
+    auto rResults = results.request();
+    int* pResults = static_cast<int*>(rResults.ptr);
+
     const auto dev_acc = alpaka::getDevByIdx(alpaka::Platform<Acc1D>{}, device_id);
 
     // Create the queue
@@ -42,38 +52,117 @@ namespace alpaka_serial_sync {
     // Running the clustering algorithm //
     switch (Ndim) {
       [[unlikely]] case (1):
-        return run<1, Kernel>(
-            dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
+        run<1, Kernel>(dc,
+                       rhoc,
+                       dm,
+                       pPBin,
+                       std::make_tuple(pData, pResults),
+                       PointShape<1>{n_points},
+                       kernel,
+                       queue_,
+                       block_size);
+        return;
       [[likely]] case (2):
-        return run<2, Kernel>(
-            dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
+        run<2, Kernel>(dc,
+                       rhoc,
+                       dm,
+                       pPBin,
+                       std::make_tuple(pData, pResults),
+                       PointShape<2>{n_points},
+                       kernel,
+                       queue_,
+                       block_size);
+        return;
       [[likely]] case (3):
-        return run<3, Kernel>(
-            dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
+        run<3, Kernel>(dc,
+                       rhoc,
+                       dm,
+                       pPBin,
+                       std::make_tuple(pData, pResults),
+                       PointShape<3>{n_points},
+                       kernel,
+                       queue_,
+                       block_size);
+        return;
       [[unlikely]] case (4):
-        return run<4, Kernel>(
-            dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
+        run<4, Kernel>(dc,
+                       rhoc,
+                       dm,
+                       pPBin,
+                       std::make_tuple(pData, pResults),
+                       PointShape<4>{n_points},
+                       kernel,
+                       queue_,
+                       block_size);
+        return;
       [[unlikely]] case (5):
-        return run<5, Kernel>(
-            dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
+        run<5, Kernel>(dc,
+                       rhoc,
+                       dm,
+                       pPBin,
+                       std::make_tuple(pData, pResults),
+                       PointShape<5>{n_points},
+                       kernel,
+                       queue_,
+                       block_size);
+        return;
       [[unlikely]] case (6):
-        return run<6, Kernel>(
-            dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
+        run<6, Kernel>(dc,
+                       rhoc,
+                       dm,
+                       pPBin,
+                       std::make_tuple(pData, pResults),
+                       PointShape<6>{n_points},
+                       kernel,
+                       queue_,
+                       block_size);
+        return;
       [[unlikely]] case (7):
-        return run<7, Kernel>(
-            dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
+        run<7, Kernel>(dc,
+                       rhoc,
+                       dm,
+                       pPBin,
+                       std::make_tuple(pData, pResults),
+                       PointShape<7>{n_points},
+                       kernel,
+                       queue_,
+                       block_size);
+        return;
       [[unlikely]] case (8):
-        return run<8, Kernel>(
-            dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
+        run<8, Kernel>(dc,
+                       rhoc,
+                       dm,
+                       pPBin,
+                       std::make_tuple(pData, pResults),
+                       PointShape<8>{n_points},
+                       kernel,
+                       queue_,
+                       block_size);
+        return;
       [[unlikely]] case (9):
-        return run<9, Kernel>(
-            dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
+        run<9, Kernel>(dc,
+                       rhoc,
+                       dm,
+                       pPBin,
+                       std::make_tuple(pData, pResults),
+                       PointShape<9>{n_points},
+                       kernel,
+                       queue_,
+                       block_size);
+        return;
       [[unlikely]] case (10):
-        return run<10, Kernel>(
-            dc, rhoc, dm, pPBin, coords, weights, kernel, queue_, block_size);
+        run<10, Kernel>(dc,
+                        rhoc,
+                        dm,
+                        pPBin,
+                        std::make_tuple(pData, pResults),
+                        PointShape<10>{n_points},
+                        kernel,
+                        queue_,
+                        block_size);
+        return;
       [[unlikely]] default:
         std::cout << "This library only works up to 10 dimensions\n";
-        return {};
     }
   }
 
@@ -88,10 +177,11 @@ namespace alpaka_serial_sync {
                                   float,
                                   float,
                                   int,
-                                  const std::vector<std::vector<float>>&,
-                                  const std::vector<float>&,
+                                  py::array_t<float>,
+                                  py::array_t<int>,
                                   const FlatKernel&,
                                   int,
+                                  uint32_t,
                                   size_t,
                                   size_t>(&mainRun<FlatKernel>),
           "mainRun");
@@ -100,10 +190,11 @@ namespace alpaka_serial_sync {
                                   float,
                                   float,
                                   int,
-                                  const std::vector<std::vector<float>>&,
-                                  const std::vector<float>&,
+                                  py::array_t<float>,
+                                  py::array_t<int>,
                                   const ExponentialKernel&,
                                   int,
+                                  uint32_t,
                                   size_t,
                                   size_t>(&mainRun<ExponentialKernel>),
           "mainRun");
@@ -112,10 +203,11 @@ namespace alpaka_serial_sync {
                                   float,
                                   float,
                                   int,
-                                  const std::vector<std::vector<float>>&,
-                                  const std::vector<float>&,
+                                  py::array_t<float>,
+                                  py::array_t<int>,
                                   const GaussianKernel&,
                                   int,
+                                  uint32_t,
                                   size_t,
                                   size_t>(&mainRun<GaussianKernel>),
           "mainRun");