From cef106da55eb2d62619394d79113afad7896f47c Mon Sep 17 00:00:00 2001 From: beomki-yeo Date: Wed, 18 Oct 2023 23:19:27 +0200 Subject: [PATCH] Sort measurements before Ckf --- .../experimental/clusterization_algorithm.cu | 8 +++++ device/cuda/src/finding/finding_algorithm.cu | 32 ++++++------------- .../clusterization_algorithm.sycl | 6 ++++ tests/cuda/test_ckf_sparse_tracks.cpp | 3 ++ 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/device/cuda/src/clusterization/experimental/clusterization_algorithm.cu b/device/cuda/src/clusterization/experimental/clusterization_algorithm.cu index a46d15f858..ce1e635f56 100644 --- a/device/cuda/src/clusterization/experimental/clusterization_algorithm.cu +++ b/device/cuda/src/clusterization/experimental/clusterization_algorithm.cu @@ -19,6 +19,10 @@ // Vecmem include(s). #include +// Thrust include(s). +#include +#include + namespace traccc::cuda::experimental { namespace { @@ -141,6 +145,10 @@ clusterization_algorithm::output_type clusterization_algorithm::operator()( m_stream.synchronize(); + // Sort the measurements w.r.t geometry barcode + thrust::sort(thrust::cuda::par.on(stream), new_measurements_device.begin(), + new_measurements_device.end(), measurement_sort_comp()); + return new_measurements_buffer; } diff --git a/device/cuda/src/finding/finding_algorithm.cu b/device/cuda/src/finding/finding_algorithm.cu index b0e9d3fe57..f1a5fdd0e9 100644 --- a/device/cuda/src/finding/finding_algorithm.cu +++ b/device/cuda/src/finding/finding_algorithm.cu @@ -222,27 +222,16 @@ finding_algorithm::operator()( * Measurement Operations *****************************************************************/ - // Copy the measurements - measurement_collection_types::buffer sorted_measurements_buffer( - m_copy->get_size(measurements), m_mr.main); - measurement_collection_types::device sorted_measurements( - sorted_measurements_buffer); measurement_collection_types::const_device measurements_device( measurements); - thrust::copy(thrust::device, measurements_device.begin(), - measurements_device.end(), sorted_measurements.begin()); - - // Sort the measurements w.r.t geometry barcode - thrust::sort(thrust::device, sorted_measurements.begin(), - sorted_measurements.end(), measurement_sort_comp()); // Get copy of barcode uniques measurement_collection_types::buffer uniques_buffer{ - sorted_measurements.size(), m_mr.main}; + measurements_device.size(), m_mr.main}; measurement_collection_types::device uniques(uniques_buffer); measurement* end = thrust::unique_copy( - thrust::device, sorted_measurements.begin(), sorted_measurements.end(), + thrust::device, measurements_device.begin(), measurements_device.end(), uniques.begin(), measurement_equal_comp()); unsigned int n_modules = end - uniques.begin(); @@ -251,8 +240,8 @@ finding_algorithm::operator()( m_mr.main}; vecmem::device_vector upper_bounds(upper_bounds_buffer); - thrust::upper_bound(thrust::device, sorted_measurements.begin(), - sorted_measurements.end(), uniques.begin(), + thrust::upper_bound(thrust::device, measurements_device.begin(), + measurements_device.end(), uniques.begin(), uniques.begin() + n_modules, upper_bounds.begin(), measurement_sort_comp()); @@ -264,7 +253,7 @@ finding_algorithm::operator()( upper_bounds.end(), sizes.begin()); // Number of total measurements - const unsigned int n_total_measurements = sorted_measurements.size(); + const unsigned int n_total_measurements = measurements_device.size(); /***************************************************************** * Kernel1: Create barcode sequence @@ -362,10 +351,9 @@ finding_algorithm::operator()( if (nBlocks > 0) { kernels::find_tracks <<>>( - m_cfg, det_view, sorted_measurements_buffer, - barcodes_buffer, upper_bounds_buffer, in_params_buffer, - n_threads_buffer, step, - (*global_counter_device).n_measurements_per_thread, + m_cfg, det_view, measurements, barcodes_buffer, + upper_bounds_buffer, in_params_buffer, n_threads_buffer, + step, (*global_counter_device).n_measurements_per_thread, (*global_counter_device).n_total_threads, updated_params_buffer, link_map[step], (*global_counter_device).n_candidates); @@ -507,8 +495,8 @@ finding_algorithm::operator()( nThreads = WARP_SIZE * 2; nBlocks = (n_tips_total + nThreads - 1) / nThreads; kernels::build_tracks<<>>( - sorted_measurements_buffer, seeds_buffer, links_buffer, - param_to_link_buffer, tips_buffer, track_candidates_buffer); + measurements, seeds_buffer, links_buffer, param_to_link_buffer, + tips_buffer, track_candidates_buffer); CUDA_ERROR_CHECK(cudaGetLastError()); CUDA_ERROR_CHECK(cudaDeviceSynchronize()); diff --git a/device/sycl/src/clusterization/experimental/clusterization_algorithm.sycl b/device/sycl/src/clusterization/experimental/clusterization_algorithm.sycl index 4f81352885..3a276bb24a 100644 --- a/device/sycl/src/clusterization/experimental/clusterization_algorithm.sycl +++ b/device/sycl/src/clusterization/experimental/clusterization_algorithm.sycl @@ -159,6 +159,12 @@ clusterization_algorithm::output_type clusterization_algorithm::operator()( sizeof(measurement) * (*num_measurements_host)) .wait_and_throw(); + // @NOTE Uncomment once the onedpl is available + // oneapi::dpl::experimental::sort_async( + // oneapi::dpl::execution::dpcpp_default, + // new_measurements_device.begin(), new_measurements_device.end(), + // measurement_sort_comp()); + return new_measurements_buffer; } diff --git a/tests/cuda/test_ckf_sparse_tracks.cpp b/tests/cuda/test_ckf_sparse_tracks.cpp index 456ed48027..73abd49af2 100644 --- a/tests/cuda/test_ckf_sparse_tracks.cpp +++ b/tests/cuda/test_ckf_sparse_tracks.cpp @@ -179,6 +179,9 @@ TEST_P(CkfSparseTrackTests, Run) { traccc::measurement_collection_types::host& measurements_per_event = readOut.measurements; + std::sort(measurements_per_event.begin(), measurements_per_event.end(), + measurement_sort_comp()); + traccc::measurement_collection_types::buffer measurements_buffer( measurements_per_event.size(), mr.main); copy(vecmem::get_data(measurements_per_event), measurements_buffer);