Skip to content

Commit

Permalink
Merge pull request #449 from beomki-yeo/refactor-clusterization2
Browse files Browse the repository at this point in the history
(Experimental) Refactor Clusterization
  • Loading branch information
krasznaa authored Oct 10, 2023
2 parents d8f0bfd + 87b7dac commit af6dcca
Show file tree
Hide file tree
Showing 23 changed files with 663 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ TRACCC_HOST inline void calc_cluster_properties(
/// @param[in] module is the cell module where the cluster belongs to
/// @param[in] module_link is the module index
///
TRACCC_HOST void fill_measurement(
TRACCC_HOST inline void fill_measurement(
measurement_collection_types::host& measurements,
const cell_collection_types::host& cluster, const cell_module& module,
const unsigned int module_link) {
Expand All @@ -102,7 +102,7 @@ TRACCC_HOST void fill_measurement(
if (totalWeight > 0.) {
measurement m;
m.module_link = module_link;
m.surface_link = detray::geometry::barcode{module.module};
m.surface_link = module.surface_link;
// normalize the cell position
m.local = mean;
// normalize the variance
Expand Down
7 changes: 5 additions & 2 deletions core/include/traccc/edm/cell.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
#include "traccc/edm/container.hpp"
#include "traccc/geometry/pixel_data.hpp"

// Detray include(s).
#include "detray/geometry/barcode.hpp"

namespace traccc {

/// Definition of a detector module
Expand All @@ -23,7 +26,7 @@ namespace traccc {
///
struct cell_module {

geometry_id module = 0;
detray::geometry::barcode surface_link{0u};
transform3 placement = transform3{};
scalar threshold = 0;

Expand All @@ -37,7 +40,7 @@ using cell_module_collection_types = collection_types<cell_module>;
/// Equality operator for cell module
TRACCC_HOST_DEVICE
inline bool operator==(const cell_module& lhs, const cell_module& rhs) {
return lhs.module == rhs.module;
return lhs.surface_link == rhs.surface_link;
}

/// Definition for one detector cell
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ inline void aggregate_cluster(
*/
out.local = mean;
out.variance = var;
out.surface_link = this_module.surface_link;
out.module_link = module_link;
out.surface_link = detray::geometry::barcode{this_module.module};
// The following will need to be filled properly "soon".
out.meas_dim = 2u;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ namespace traccc::device {
* cluster.
*/
TRACCC_HOST_DEVICE
bool is_adjacent(channel_id ac0, channel_id ac1, channel_id bc0,
channel_id bc1) {
inline bool is_adjacent(channel_id ac0, channel_id ac1, channel_id bc0,
channel_id bc1) {
unsigned int p0 = (ac0 - bc0);
unsigned int p1 = (ac1 - bc1);

Expand Down
2 changes: 2 additions & 0 deletions device/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ traccc_add_library( traccc_cuda cuda TYPE SHARED
"src/seeding/seeding_algorithm.cpp"
"src/cca/component_connection.cu"
# Clusterization
"include/traccc/cuda/clusterization/experimental/clusterization_algorithm.hpp"
"src/clusterization/experimental/clusterization_algorithm.cu"
"include/traccc/cuda/clusterization/clusterization_algorithm.hpp"
"src/clusterization/clusterization_algorithm.cu"
# Finding
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/** TRACCC library, part of the ACTS project (R&D line)
*
* (c) 2023 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

// Local include(s).
#include "traccc/cuda/utils/stream.hpp"

// Project include(s).
#include "traccc/edm/cell.hpp"
#include "traccc/edm/measurement.hpp"
#include "traccc/utils/algorithm.hpp"
#include "traccc/utils/memory_resource.hpp"

// VecMem include(s).
#include <vecmem/utils/copy.hpp>

namespace traccc::cuda::experimental {

/// Algorithm performing hit clusterization
///
/// This algorithm implements hit clusterization in a massively-parallel
/// approach. Each thread handles a pre-determined number of detector cells.
///
/// This algorithm returns a buffer which is not necessarily filled yet. A
/// synchronisation statement is required before destroying this buffer.
///
class clusterization_algorithm
: public algorithm<measurement_collection_types::buffer(
const cell_collection_types::const_view&,
const cell_module_collection_types::const_view&)> {

public:
/// Constructor for clusterization algorithm
///
/// @param mr The memory resource(s) to use in the algorithm
/// @param copy The copy object to use for copying data between device
/// and host memory blocks
/// @param str The CUDA stream to perform the operations in
/// @param target_cells_per_partition the average number of cells in each
/// partition
///
clusterization_algorithm(const traccc::memory_resource& mr,
vecmem::copy& copy, stream& str,
const unsigned short target_cells_per_partition);
// const unsigned short target_cells_per_partition);

/// Callable operator for clusterization algorithm
///
/// @param cells a collection of cells
/// @param modules a collection of modules
/// @return a spacepoint collection (buffer) and a collection (buffer)
/// of links from cells to the spacepoints they belong to.
output_type operator()(
const cell_collection_types::const_view& cells,
const cell_module_collection_types::const_view& modules) const override;

private:
/// The average number of cells in each partition
unsigned short m_target_cells_per_partition;
/// The memory resource(s) to use
traccc::memory_resource m_mr;
/// The copy object to use
vecmem::copy& m_copy;
/// The CUDA stream to use
stream& m_stream;
};

} // namespace traccc::cuda::experimental
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/** TRACCC library, part of the ACTS project (R&D line)
*
* (c) 2022 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

// CUDA Library include(s).
#include "../../utils/utils.hpp"
#include "traccc/cuda/clusterization/experimental/clusterization_algorithm.hpp"
#include "traccc/cuda/utils/barrier.hpp"
#include "traccc/cuda/utils/definitions.hpp"

// Project include(s)
#include "traccc/clusterization/device/aggregate_cluster.hpp"
#include "traccc/clusterization/device/ccl_kernel.hpp"
#include "traccc/clusterization/device/reduce_problem_cell.hpp"

// Vecmem include(s).
#include <vecmem/utils/copy.hpp>

namespace traccc::cuda::experimental {

namespace {
/// These indices in clusterization will only range from 0 to
/// max_cells_per_partition, so we only need a short.
using index_t = unsigned short;

static constexpr int TARGET_CELLS_PER_THREAD = 8;
static constexpr int MAX_CELLS_PER_THREAD = 12;
} // namespace

namespace kernels {

/// CUDA kernel for running @c traccc::device::ccl_kernel
__global__ void ccl_kernel(
const cell_collection_types::const_view cells_view,
const cell_module_collection_types::const_view modules_view,
const index_t max_cells_per_partition,
const index_t target_cells_per_partition,
measurement_collection_types::view measurements_view,
unsigned int& measurement_count,
vecmem::data::vector_view<unsigned int> cell_links) {
__shared__ unsigned int partition_start, partition_end;
__shared__ unsigned int outi;
extern __shared__ index_t shared_v[];
index_t* f = &shared_v[0];
index_t* f_next = &shared_v[max_cells_per_partition];
traccc::cuda::barrier barry_r;

device::ccl_kernel(threadIdx.x, blockDim.x, blockIdx.x, cells_view,
modules_view, max_cells_per_partition,
target_cells_per_partition, partition_start,
partition_end, outi, f, f_next, barry_r,
measurements_view, measurement_count, cell_links);
}

} // namespace kernels

clusterization_algorithm::clusterization_algorithm(
const traccc::memory_resource& mr, vecmem::copy& copy, stream& str,
const unsigned short target_cells_per_partition)
: m_mr(mr),
m_copy(copy),
m_stream(str),
m_target_cells_per_partition(target_cells_per_partition) {}

clusterization_algorithm::output_type clusterization_algorithm::operator()(
const cell_collection_types::const_view& cells,
const cell_module_collection_types::const_view& modules) const {

// Get a convenience variable for the stream that we'll be using.
cudaStream_t stream = details::get_stream(m_stream);

// Number of cells
const cell_collection_types::view::size_type num_cells =
m_copy.get_size(cells);

if (num_cells == 0) {
return {0, m_mr.main};
}

// Create result object for the CCL kernel with size overestimation
measurement_collection_types::buffer measurements_buffer(num_cells,
m_mr.main);
m_copy.setup(measurements_buffer);

// Counter for number of measurements
vecmem::unique_alloc_ptr<unsigned int> num_measurements_device =
vecmem::make_unique_alloc<unsigned int>(m_mr.main);
CUDA_ERROR_CHECK(cudaMemsetAsync(num_measurements_device.get(), 0,
sizeof(unsigned int), stream));

const unsigned short max_cells_per_partition =
(m_target_cells_per_partition * MAX_CELLS_PER_THREAD +
TARGET_CELLS_PER_THREAD - 1) /
TARGET_CELLS_PER_THREAD;
const unsigned int threads_per_partition =
(m_target_cells_per_partition + TARGET_CELLS_PER_THREAD - 1) /
TARGET_CELLS_PER_THREAD;
const unsigned int num_partitions =
(num_cells + m_target_cells_per_partition - 1) /
m_target_cells_per_partition;

// Create buffer for linking cells to their spacepoints.
vecmem::data::vector_buffer<unsigned int> cell_links(num_cells, m_mr.main);
m_copy.setup(cell_links);

// Launch ccl kernel. Each thread will handle a single cell.
kernels::
ccl_kernel<<<num_partitions, threads_per_partition,
2 * max_cells_per_partition * sizeof(index_t), stream>>>(
cells, modules, max_cells_per_partition,
m_target_cells_per_partition, measurements_buffer,
*num_measurements_device, cell_links);

CUDA_ERROR_CHECK(cudaGetLastError());

// Copy number of measurements to host
vecmem::unique_alloc_ptr<unsigned int> num_measurements_host =
vecmem::make_unique_alloc<unsigned int>(
(m_mr.host != nullptr) ? *(m_mr.host) : m_mr.main);
CUDA_ERROR_CHECK(cudaMemcpyAsync(
num_measurements_host.get(), num_measurements_device.get(),
sizeof(unsigned int), cudaMemcpyDeviceToHost, stream));
m_stream.synchronize();

// Create a new measurement buffer with a right size
measurement_collection_types::buffer new_measurements_buffer(
*num_measurements_host, m_mr.main);
m_copy.setup(new_measurements_buffer);

vecmem::device_vector<measurement> measurements_device(measurements_buffer);
vecmem::device_vector<measurement> new_measurements_device(
new_measurements_buffer);

CUDA_ERROR_CHECK(cudaMemcpyAsync(
new_measurements_device.begin(), measurements_device.begin(),
sizeof(measurement) * (*num_measurements_host),
cudaMemcpyDeviceToDevice, stream));

m_stream.synchronize();

return new_measurements_buffer;
}

} // namespace traccc::cuda::experimental
2 changes: 2 additions & 0 deletions device/sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ enable_language( SYCL )
traccc_add_library( traccc_sycl sycl TYPE SHARED
# header files
"include/traccc/sycl/clusterization/clusterization_algorithm.hpp"
"include/traccc/sycl/clusterization/experimental/clusterization_algorithm.hpp"
"include/traccc/sycl/fitting/fitting_algorithm.hpp"
"include/traccc/sycl/seeding/experimental/spacepoint_formation.hpp"
"include/traccc/sycl/seeding/seeding_algorithm.hpp"
Expand All @@ -27,6 +28,7 @@ traccc_add_library( traccc_sycl sycl TYPE SHARED
"include/traccc/sycl/utils/barrier.hpp"
# implementation files
"src/clusterization/clusterization_algorithm.sycl"
"src/clusterization/experimental/clusterization_algorithm.sycl"
"src/fitting/fitting_algorithm.sycl"
"src/seeding/experimental/spacepoint_formation.sycl"
"src/seeding/seed_finding.sycl"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/** TRACCC library, part of the ACTS project (R&D line)
*
* (c) 2023 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

// SYCL library include(s).
#include "traccc/sycl/utils/queue_wrapper.hpp"

// Project include(s).
#include "traccc/edm/cell.hpp"
#include "traccc/edm/measurement.hpp"
#include "traccc/utils/algorithm.hpp"
#include "traccc/utils/memory_resource.hpp"

// VecMem include(s).
#include <vecmem/utils/copy.hpp>

namespace traccc::sycl::experimental {

/// Algorithm performing hit clusterization
///
/// This algorithm implements hit clusterization in a massively-parallel
/// approach. Each thread handles a pre-determined number of detector cells.
///
/// This algorithm returns a buffer which is not necessarily filled yet. A
/// synchronisation statement is required before destroying this buffer.
///
class clusterization_algorithm
: public algorithm<measurement_collection_types::buffer(
const cell_collection_types::const_view&,
const cell_module_collection_types::const_view&)> {

public:
/// Constructor for clusterization algorithm
///
/// @param mr The memory resource(s) to use in the algorithm
/// @param copy The copy object to use for copying data between device
/// and host memory blocks
/// @param str The CUDA stream to perform the operations in
/// @param target_cells_per_partition the average number of cells in each
/// partition
///
clusterization_algorithm(const traccc::memory_resource& mr,
vecmem::copy& copy, queue_wrapper queue,
const unsigned short target_cells_per_partition);
// const unsigned short target_cells_per_partition);

/// Callable operator for clusterization algorithm
///
/// @param cells a collection of cells
/// @param modules a collection of modules
/// @return a spacepoint collection (buffer) and a collection (buffer)
/// of links from cells to the spacepoints they belong to.
output_type operator()(
const cell_collection_types::const_view& cells,
const cell_module_collection_types::const_view& modules) const override;

private:
/// The average number of cells in each partition
unsigned short m_target_cells_per_partition;
/// The maximum number of threads in a work group
unsigned int m_max_work_group_size;

/// The memory resource(s) to use
traccc::memory_resource m_mr;
/// The copy object to use
vecmem::copy& m_copy;
/// The SYCL queue object
mutable queue_wrapper m_queue;
};

} // namespace traccc::sycl::experimental
Loading

0 comments on commit af6dcca

Please sign in to comment.