From 7a2874163ba14a70374550549555e47a914d7110 Mon Sep 17 00:00:00 2001 From: Guilherme Date: Mon, 13 Mar 2023 10:57:52 +0100 Subject: [PATCH] Made cuda seeding and track parameters estimation use asynchronous copies and kernel launches --- .../traccc/cuda/seeding/seed_finding.hpp | 17 ++- .../traccc/cuda/seeding/seeding_algorithm.hpp | 12 +- .../cuda/seeding/spacepoint_binning.hpp | 12 +- .../cuda/seeding/track_params_estimation.hpp | 15 ++- device/cuda/src/seeding/seed_finding.cu | 123 ++++++++++-------- device/cuda/src/seeding/seeding_algorithm.cpp | 10 +- device/cuda/src/seeding/spacepoint_binning.cu | 38 +++--- .../src/seeding/track_params_estimation.cu | 30 ++--- examples/run/cuda/full_chain_algorithm.cpp | 12 +- examples/run/cuda/seeding_example_cuda.cpp | 10 +- examples/run/cuda/seq_example_cuda.cpp | 6 +- 11 files changed, 164 insertions(+), 121 deletions(-) diff --git a/device/cuda/include/traccc/cuda/seeding/seed_finding.hpp b/device/cuda/include/traccc/cuda/seeding/seed_finding.hpp index 5f2d050052..d4abd19036 100644 --- a/device/cuda/include/traccc/cuda/seeding/seed_finding.hpp +++ b/device/cuda/include/traccc/cuda/seeding/seed_finding.hpp @@ -1,6 +1,6 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2021-2022 CERN for the benefit of the ACTS project + * (c) 2021-2023 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -8,6 +8,7 @@ #pragma once // Project include(s). +#include "traccc/cuda/utils/stream.hpp" #include "traccc/edm/alt_seed.hpp" #include "traccc/edm/spacepoint.hpp" #include "traccc/seeding/detail/seeding_config.hpp" @@ -34,10 +35,14 @@ class seed_finding : public algorithm m_copy; + + /// The copy object to use + vecmem::copy& m_copy; + /// The CUDA stream to use + stream& m_stream; }; } // namespace traccc::cuda diff --git a/device/cuda/include/traccc/cuda/seeding/seeding_algorithm.hpp b/device/cuda/include/traccc/cuda/seeding/seeding_algorithm.hpp index 38407e2c52..d78af1bcab 100644 --- a/device/cuda/include/traccc/cuda/seeding/seeding_algorithm.hpp +++ b/device/cuda/include/traccc/cuda/seeding/seeding_algorithm.hpp @@ -1,6 +1,6 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2021-2022 CERN for the benefit of the ACTS project + * (c) 2021-2023 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -10,6 +10,7 @@ // Library include(s). #include "traccc/cuda/seeding/seed_finding.hpp" #include "traccc/cuda/seeding/spacepoint_binning.hpp" +#include "traccc/cuda/utils/stream.hpp" // Project include(s). #include "traccc/edm/alt_seed.hpp" @@ -17,7 +18,7 @@ #include "traccc/utils/algorithm.hpp" // VecMem include(s). -#include +#include // traccc library include(s). #include "traccc/utils/memory_resource.hpp" @@ -32,8 +33,13 @@ class seeding_algorithm : public algorithm m_axes; traccc::memory_resource m_mr; - std::unique_ptr m_copy; + + /// The copy object to use + vecmem::copy& m_copy; + /// The CUDA stream to use + stream& m_stream; }; // class spacepoint_binning diff --git a/device/cuda/include/traccc/cuda/seeding/track_params_estimation.hpp b/device/cuda/include/traccc/cuda/seeding/track_params_estimation.hpp index 8549387776..49d3f40df6 100644 --- a/device/cuda/include/traccc/cuda/seeding/track_params_estimation.hpp +++ b/device/cuda/include/traccc/cuda/seeding/track_params_estimation.hpp @@ -1,6 +1,6 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2021-2022 CERN for the benefit of the ACTS project + * (c) 2021-2023 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -8,6 +8,7 @@ #pragma once // Project include(s) +#include "traccc/cuda/utils/stream.hpp" #include "traccc/edm/alt_seed.hpp" #include "traccc/edm/spacepoint.hpp" #include "traccc/edm/track_parameters.hpp" @@ -30,7 +31,11 @@ struct track_params_estimation /// Constructor for track_params_estimation /// /// @param mr is the memory resource - track_params_estimation(const traccc::memory_resource& mr); + /// @param copy The copy object to use for copying data between device + /// and host memory blocks + /// @param str The CUDA stream to perform the operations in + track_params_estimation(const traccc::memory_resource& mr, + vecmem::copy& copy, stream& str); /// Callable operator for track_params_esitmation /// @@ -45,8 +50,10 @@ struct track_params_estimation private: /// Memory resource used by the algorithm traccc::memory_resource m_mr; - /// Copy object used by the algorithm - std::unique_ptr m_copy; + /// The copy object to use + vecmem::copy& m_copy; + /// The CUDA stream to use + stream& m_stream; }; } // namespace cuda diff --git a/device/cuda/src/seeding/seed_finding.cu b/device/cuda/src/seeding/seed_finding.cu index ce1b78b3a4..e7a7e56aab 100644 --- a/device/cuda/src/seeding/seed_finding.cu +++ b/device/cuda/src/seeding/seed_finding.cu @@ -6,6 +6,7 @@ */ // Local include(s). +#include "../utils/utils.hpp" #include "traccc/cuda/seeding/seed_finding.hpp" #include "traccc/cuda/utils/definitions.hpp" @@ -141,40 +142,38 @@ __global__ void select_seeds( seed_finding::seed_finding(const seedfinder_config& config, const seedfilter_config& filter_config, - const traccc::memory_resource& mr) + const traccc::memory_resource& mr, + vecmem::copy& copy, stream& str) : m_seedfinder_config(config.toInternalUnits()), m_seedfilter_config(filter_config.toInternalUnits()), - m_mr(mr) { - - // Initialize m_copy ptr based on memory resources that were given - if (mr.host) { - m_copy = std::make_unique(); - } else { - m_copy = std::make_unique(); - } -} + m_mr(mr), + m_copy(copy), + m_stream(str) {} seed_finding::output_type seed_finding::operator()( const spacepoint_collection_types::const_view& spacepoints_view, const sp_grid_const_view& g2_view) const { + // Get a convenience variable for the stream that we'll be using. + cudaStream_t stream = details::get_stream(m_stream); + // Get the sizes from the grid view - auto grid_sizes = m_copy->get_sizes(g2_view._data_view); + auto grid_sizes = m_copy.get_sizes(g2_view._data_view); // Create prefix sum buffer vecmem::data::vector_buffer sp_grid_prefix_sum_buff = - make_prefix_sum_buff(grid_sizes, *m_copy, m_mr); + make_prefix_sum_buff(grid_sizes, m_copy, m_mr, m_stream); // Set up the doublet counter buffer. device::doublet_counter_collection_types::buffer doublet_counter_buffer = { - m_copy->get_size(sp_grid_prefix_sum_buff), 0, m_mr.main}; - m_copy->setup(doublet_counter_buffer); + m_copy.get_size(sp_grid_prefix_sum_buff), 0, m_mr.main}; + m_copy.setup(doublet_counter_buffer); // Calculate the number of threads and thread blocks to run the doublet // counting kernel for. const unsigned int nDoubletCountThreads = WARP_SIZE * 2; const unsigned int nDoubletCountBlocks = - (m_copy->get_size(sp_grid_prefix_sum_buff) + nDoubletCountThreads - 1) / + (m_copy.get_size(sp_grid_prefix_sum_buff) + nDoubletCountThreads - 1) / nDoubletCountThreads; // Counter for the total number of doublets and triplets @@ -182,54 +181,59 @@ seed_finding::output_type seed_finding::operator()( globalCounter_device = vecmem::make_unique_alloc( m_mr.main); - CUDA_ERROR_CHECK(cudaMemset(globalCounter_device.get(), 0, - sizeof(device::seeding_global_counter))); + CUDA_ERROR_CHECK(cudaMemsetAsync(globalCounter_device.get(), 0, + sizeof(device::seeding_global_counter), + stream)); // Count the number of doublets that we need to produce. - kernels::count_doublets<<>>( + kernels::count_doublets<<>>( m_seedfinder_config, g2_view, sp_grid_prefix_sum_buff, doublet_counter_buffer, (*globalCounter_device).m_nMidBot, (*globalCounter_device).m_nMidTop); CUDA_ERROR_CHECK(cudaGetLastError()); - CUDA_ERROR_CHECK(cudaDeviceSynchronize()); + m_stream.synchronize(); // Get the summary values. device::seeding_global_counter globalCounter_host; - CUDA_ERROR_CHECK(cudaMemcpy(&globalCounter_host, globalCounter_device.get(), - sizeof(device::seeding_global_counter), - cudaMemcpyDeviceToHost)); + CUDA_ERROR_CHECK(cudaMemcpyAsync(&globalCounter_host, + globalCounter_device.get(), + sizeof(device::seeding_global_counter), + cudaMemcpyDeviceToHost, stream)); + m_stream.synchronize(); // Set up the doublet counter buffers. device::device_doublet_collection_types::buffer doublet_buffer_mb = { globalCounter_host.m_nMidBot, m_mr.main}; - m_copy->setup(doublet_buffer_mb); + m_copy.setup(doublet_buffer_mb); device::device_doublet_collection_types::buffer doublet_buffer_mt = { globalCounter_host.m_nMidTop, m_mr.main}; - m_copy->setup(doublet_buffer_mt); + m_copy.setup(doublet_buffer_mt); // Calculate the number of threads and thread blocks to run the doublet // finding kernel for. const unsigned int nDoubletFindThreads = WARP_SIZE * 2; const unsigned int doublet_counter_buffer_size = - m_copy->get_size(doublet_counter_buffer); + m_copy.get_size(doublet_counter_buffer); const unsigned int nDoubletFindBlocks = (doublet_counter_buffer_size + nDoubletFindThreads - 1) / nDoubletFindThreads; // Find all of the spacepoint doublets. - kernels::find_doublets<<>>( - m_seedfinder_config, g2_view, doublet_counter_buffer, doublet_buffer_mb, - doublet_buffer_mt); + kernels:: + find_doublets<<>>( + m_seedfinder_config, g2_view, doublet_counter_buffer, + doublet_buffer_mb, doublet_buffer_mt); // Set up the triplet counter buffers device::triplet_counter_spM_collection_types::buffer triplet_counter_spM_buffer = {doublet_counter_buffer_size, m_mr.main}; - m_copy->setup(triplet_counter_spM_buffer); - m_copy->memset(triplet_counter_spM_buffer, 0); + m_copy.setup(triplet_counter_spM_buffer); + m_copy.memset(triplet_counter_spM_buffer, 0); device::triplet_counter_collection_types::buffer triplet_counter_midBot_buffer = {globalCounter_host.m_nMidBot, 0, m_mr.main}; - m_copy->setup(triplet_counter_midBot_buffer); + m_copy.setup(triplet_counter_midBot_buffer); // Calculate the number of threads and thread blocks to run the doublet // counting kernel for. @@ -240,10 +244,11 @@ seed_finding::output_type seed_finding::operator()( // Wait here for the find doublets kernel to finish CUDA_ERROR_CHECK(cudaGetLastError()); - CUDA_ERROR_CHECK(cudaDeviceSynchronize()); + m_stream.synchronize(); // Count the number of triplets that we need to produce. - kernels::count_triplets<<>>( + kernels::count_triplets<<>>( m_seedfinder_config, g2_view, doublet_counter_buffer, doublet_buffer_mb, doublet_buffer_mt, triplet_counter_spM_buffer, triplet_counter_midBot_buffer); @@ -257,37 +262,42 @@ seed_finding::output_type seed_finding::operator()( // Wait here for the count triplets kernel to finish CUDA_ERROR_CHECK(cudaGetLastError()); - CUDA_ERROR_CHECK(cudaDeviceSynchronize()); + m_stream.synchronize(); // Reduce the triplet counts per spM. - kernels::reduce_triplet_counts<<>>( + kernels::reduce_triplet_counts<<>>( doublet_counter_buffer, triplet_counter_spM_buffer, (*globalCounter_device).m_nTriplets); CUDA_ERROR_CHECK(cudaGetLastError()); - CUDA_ERROR_CHECK(cudaDeviceSynchronize()); + m_stream.synchronize(); - CUDA_ERROR_CHECK(cudaMemcpy(&globalCounter_host, globalCounter_device.get(), - sizeof(device::seeding_global_counter), - cudaMemcpyDeviceToHost)); + CUDA_ERROR_CHECK(cudaMemcpyAsync(&globalCounter_host, + globalCounter_device.get(), + sizeof(device::seeding_global_counter), + cudaMemcpyDeviceToHost, stream)); + m_stream.synchronize(); // Set up the triplet buffer. device::device_triplet_collection_types::buffer triplet_buffer = { globalCounter_host.m_nTriplets, m_mr.main}; - m_copy->setup(triplet_buffer); + m_copy.setup(triplet_buffer); // Calculate the number of threads and thread blocks to run the triplet // finding kernel for. const unsigned int nTripletFindThreads = WARP_SIZE * 2; const unsigned int nTripletFindBlocks = - (m_copy->get_size(triplet_counter_midBot_buffer) + nTripletFindThreads - + (m_copy.get_size(triplet_counter_midBot_buffer) + nTripletFindThreads - 1) / nTripletFindThreads; // Find all of the spacepoint triplets. - kernels::find_triplets<<>>( - m_seedfinder_config, m_seedfilter_config, g2_view, - doublet_counter_buffer, doublet_buffer_mt, triplet_counter_spM_buffer, - triplet_counter_midBot_buffer, triplet_buffer); + kernels:: + find_triplets<<>>( + m_seedfinder_config, m_seedfilter_config, g2_view, + doublet_counter_buffer, doublet_buffer_mt, + triplet_counter_spM_buffer, triplet_counter_midBot_buffer, + triplet_buffer); // Calculate the number of threads and thread blocks to run the weight // updating kernel for. @@ -298,20 +308,20 @@ seed_finding::output_type seed_finding::operator()( // Wait here for the find triplets kernel to finish CUDA_ERROR_CHECK(cudaGetLastError()); - CUDA_ERROR_CHECK(cudaDeviceSynchronize()); + m_stream.synchronize(); // Update the weights of all spacepoint triplets. kernels::update_triplet_weights<<< nWeightUpdatingBlocks, nWeightUpdatingThreads, sizeof(scalar) * m_seedfilter_config.compatSeedLimit * - nWeightUpdatingThreads>>>( - m_seedfilter_config, g2_view, triplet_counter_spM_buffer, - triplet_counter_midBot_buffer, triplet_buffer); + nWeightUpdatingThreads, + stream>>>(m_seedfilter_config, g2_view, triplet_counter_spM_buffer, + triplet_counter_midBot_buffer, triplet_buffer); // Create result object: collection of seeds alt_seed_collection_types::buffer seed_buffer( globalCounter_host.m_nTriplets, 0, m_mr.main); - m_copy->setup(seed_buffer); + m_copy.setup(seed_buffer); // Calculate the number of threads and thread blocks to run the seed // selecting kernel for. @@ -322,18 +332,19 @@ seed_finding::output_type seed_finding::operator()( // Wait here for the update triplet weights kernel to finish CUDA_ERROR_CHECK(cudaGetLastError()); - CUDA_ERROR_CHECK(cudaDeviceSynchronize()); + m_stream.synchronize(); // Create seeds out of selected triplets kernels::select_seeds<<>>( - m_seedfilter_config, spacepoints_view, g2_view, - triplet_counter_spM_buffer, triplet_counter_midBot_buffer, - triplet_buffer, seed_buffer); + nSeedSelectingThreads, + stream>>>(m_seedfilter_config, spacepoints_view, + g2_view, triplet_counter_spM_buffer, + triplet_counter_midBot_buffer, + triplet_buffer, seed_buffer); CUDA_ERROR_CHECK(cudaGetLastError()); - CUDA_ERROR_CHECK(cudaDeviceSynchronize()); + m_stream.synchronize(); return seed_buffer; } diff --git a/device/cuda/src/seeding/seeding_algorithm.cpp b/device/cuda/src/seeding/seeding_algorithm.cpp index 4828680878..879aaec65c 100644 --- a/device/cuda/src/seeding/seeding_algorithm.cpp +++ b/device/cuda/src/seeding/seeding_algorithm.cpp @@ -1,6 +1,6 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2021-2022 CERN for the benefit of the ACTS project + * (c) 2021-2023 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -58,10 +58,12 @@ traccc::spacepoint_grid_config default_spacepoint_grid_config() { namespace traccc::cuda { -seeding_algorithm::seeding_algorithm(const traccc::memory_resource& mr) +seeding_algorithm::seeding_algorithm(const traccc::memory_resource& mr, + vecmem::copy& copy, stream& str) : m_spacepoint_binning(default_seedfinder_config(), - default_spacepoint_grid_config(), mr), - m_seed_finding(default_seedfinder_config(), seedfilter_config(), mr) {} + default_spacepoint_grid_config(), mr, copy, str), + m_seed_finding(default_seedfinder_config(), seedfilter_config(), mr, copy, + str) {} seeding_algorithm::output_type seeding_algorithm::operator()( const spacepoint_collection_types::const_view& spacepoints_view) const { diff --git a/device/cuda/src/seeding/spacepoint_binning.cu b/device/cuda/src/seeding/spacepoint_binning.cu index 441ef474f0..62d3b29e8c 100644 --- a/device/cuda/src/seeding/spacepoint_binning.cu +++ b/device/cuda/src/seeding/spacepoint_binning.cu @@ -1,11 +1,12 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2021-2022 CERN for the benefit of the ACTS project + * (c) 2021-2023 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ // Local include(s). +#include "../utils/utils.hpp" #include "traccc/cuda/seeding/spacepoint_binning.hpp" #include "traccc/cuda/utils/definitions.hpp" @@ -45,33 +46,30 @@ __global__ void populate_grid( spacepoint_binning::spacepoint_binning( const seedfinder_config& config, const spacepoint_grid_config& grid_config, - const traccc::memory_resource& mr) + const traccc::memory_resource& mr, vecmem::copy& copy, stream& str) : m_config(config.toInternalUnits()), m_axes(get_axes(grid_config.toInternalUnits(), (mr.host ? *(mr.host) : mr.main))), - m_mr(mr) { - - // Initialize m_copy ptr based on memory resources that were given - if (mr.host) { - m_copy = std::make_unique(); - } else { - m_copy = std::make_unique(); - } -} + m_mr(mr), + m_copy(copy), + m_stream(str) {} sp_grid_buffer spacepoint_binning::operator()( const spacepoint_collection_types::const_view& spacepoints_view) const { + // Get a convenience variable for the stream that we'll be using. + cudaStream_t stream = details::get_stream(m_stream); + // Get the spacepoint sizes from the view - auto sp_size = m_copy->get_size(spacepoints_view); + auto sp_size = m_copy.get_size(spacepoints_view); // Set up the container that will be filled with the required capacities for // the spacepoint grid. const std::size_t grid_bins = m_axes.first.n_bins * m_axes.second.n_bins; vecmem::data::vector_buffer grid_capacities_buff(grid_bins, m_mr.main); - m_copy->setup(grid_capacities_buff); - m_copy->memset(grid_capacities_buff, 0); + m_copy.setup(grid_capacities_buff); + m_copy.memset(grid_capacities_buff, 0); vecmem::data::vector_view grid_capacities_view = grid_capacities_buff; @@ -80,31 +78,31 @@ sp_grid_buffer spacepoint_binning::operator()( const unsigned int num_blocks = (sp_size + num_threads - 1) / num_threads; // Fill the grid capacity container. - kernels::count_grid_capacities<<>>( + kernels::count_grid_capacities<<>>( m_config, m_axes.first, m_axes.second, spacepoints_view, grid_capacities_view); CUDA_ERROR_CHECK(cudaGetLastError()); - CUDA_ERROR_CHECK(cudaDeviceSynchronize()); // Copy grid capacities back to the host vecmem::vector grid_capacities_host(m_mr.host ? m_mr.host : &(m_mr.main)); - (*m_copy)(grid_capacities_buff, grid_capacities_host); + m_copy(grid_capacities_buff, grid_capacities_host); + m_stream.synchronize(); // Create the grid buffer. sp_grid_buffer grid_buffer( m_axes.first, m_axes.second, std::vector(grid_bins, 0), std::vector(grid_capacities_host.begin(), grid_capacities_host.end()), m_mr.main, m_mr.host); - m_copy->setup(grid_buffer._buffer); + m_copy.setup(grid_buffer._buffer); sp_grid_view grid_view = grid_buffer; // Populate the grid. - kernels::populate_grid<<>>( + kernels::populate_grid<<>>( m_config, spacepoints_view, grid_view); CUDA_ERROR_CHECK(cudaGetLastError()); - CUDA_ERROR_CHECK(cudaDeviceSynchronize()); + m_stream.synchronize(); // Return the freshly filled buffer. return grid_buffer; diff --git a/device/cuda/src/seeding/track_params_estimation.cu b/device/cuda/src/seeding/track_params_estimation.cu index 5990c809fc..c57dc2993e 100644 --- a/device/cuda/src/seeding/track_params_estimation.cu +++ b/device/cuda/src/seeding/track_params_estimation.cu @@ -1,13 +1,16 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2021-2022 CERN for the benefit of the ACTS project + * (c) 2021-2023 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ -// Project include(s). +// Local include(s). +#include "../utils/utils.hpp" #include "traccc/cuda/seeding/track_params_estimation.hpp" #include "traccc/cuda/utils/definitions.hpp" + +// Project include(s). #include "traccc/seeding/device/estimate_track_params.hpp" // VecMem include(s). @@ -29,28 +32,23 @@ __global__ void estimate_track_params( } // namespace kernels track_params_estimation::track_params_estimation( - const traccc::memory_resource& mr) - : m_mr(mr) { - - // Initialize m_copy ptr based on memory resources that were given - if (mr.host) { - m_copy = std::make_unique(); - } else { - m_copy = std::make_unique(); - } -} + const traccc::memory_resource& mr, vecmem::copy& copy, stream& str) + : m_mr(mr), m_copy(copy), m_stream(str) {} track_params_estimation::output_type track_params_estimation::operator()( const spacepoint_collection_types::const_view& spacepoints_view, const alt_seed_collection_types::const_view& seeds_view) const { + // Get a convenience variable for the stream that we'll be using. + cudaStream_t stream = details::get_stream(m_stream); + // Get the size of the seeds view - const std::size_t seeds_size = m_copy->get_size(seeds_view); + const std::size_t seeds_size = m_copy.get_size(seeds_view); // Create device buffer for the parameters bound_track_parameters_collection_types::buffer params_buffer(seeds_size, m_mr.main); - m_copy->setup(params_buffer); + m_copy.setup(params_buffer); // Check if anything needs to be done. if (seeds_size == 0) { @@ -67,12 +65,12 @@ track_params_estimation::output_type track_params_estimation::operator()( unsigned int num_blocks = (seeds_size + num_threads - 1) / num_threads; // run the kernel - kernels::estimate_track_params<<>>( + kernels::estimate_track_params<<>>( spacepoints_view, seeds_view, params_buffer); // cuda error check CUDA_ERROR_CHECK(cudaGetLastError()); - CUDA_ERROR_CHECK(cudaDeviceSynchronize()); + m_stream.synchronize(); return params_buffer; } diff --git a/examples/run/cuda/full_chain_algorithm.cpp b/examples/run/cuda/full_chain_algorithm.cpp index 3964132a10..897e759fee 100644 --- a/examples/run/cuda/full_chain_algorithm.cpp +++ b/examples/run/cuda/full_chain_algorithm.cpp @@ -1,6 +1,6 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2022 CERN for the benefit of the ACTS project + * (c) 2022-2023 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -39,9 +39,10 @@ full_chain_algorithm::full_chain_algorithm( m_target_cells_per_partition(target_cells_per_partition), m_clusterization(memory_resource{*m_cached_device_mr, &m_host_mr}, m_copy, m_stream, m_target_cells_per_partition), - m_seeding(memory_resource{*m_cached_device_mr, &m_host_mr}), + m_seeding(memory_resource{*m_cached_device_mr, &m_host_mr}, m_copy, + m_stream), m_track_parameter_estimation( - memory_resource{*m_cached_device_mr, &m_host_mr}) { + memory_resource{*m_cached_device_mr, &m_host_mr}, m_copy, m_stream) { // Tell the user what device is being used. int device = 0; @@ -63,9 +64,10 @@ full_chain_algorithm::full_chain_algorithm(const full_chain_algorithm& parent) m_target_cells_per_partition(parent.m_target_cells_per_partition), m_clusterization(memory_resource{*m_cached_device_mr, &m_host_mr}, m_copy, m_stream, m_target_cells_per_partition), - m_seeding(memory_resource{*m_cached_device_mr, &m_host_mr}), + m_seeding(memory_resource{*m_cached_device_mr, &m_host_mr}, m_copy, + m_stream), m_track_parameter_estimation( - memory_resource{*m_cached_device_mr, &m_host_mr}) {} + memory_resource{*m_cached_device_mr, &m_host_mr}, m_copy, m_stream) {} full_chain_algorithm::~full_chain_algorithm() { diff --git a/examples/run/cuda/seeding_example_cuda.cpp b/examples/run/cuda/seeding_example_cuda.cpp index 04d8a65cb0..95cd2c14b6 100644 --- a/examples/run/cuda/seeding_example_cuda.cpp +++ b/examples/run/cuda/seeding_example_cuda.cpp @@ -1,6 +1,6 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2021-2022 CERN for the benefit of the ACTS project + * (c) 2021-2023 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -23,6 +23,7 @@ // VecMem include(s). #include #include +#include #include // System include(s). @@ -52,10 +53,13 @@ int seq_run(const traccc::seeding_input_config& i_cfg, traccc::seeding_algorithm sa(host_mr); traccc::track_params_estimation tp(host_mr); + traccc::cuda::stream stream; + vecmem::cuda::copy copy; + vecmem::cuda::async_copy async_copy{stream.cudaStream()}; - traccc::cuda::seeding_algorithm sa_cuda{mr}; - traccc::cuda::track_params_estimation tp_cuda{mr}; + traccc::cuda::seeding_algorithm sa_cuda{mr, async_copy, stream}; + traccc::cuda::track_params_estimation tp_cuda{mr, async_copy, stream}; // performance writer traccc::seeding_performance_writer sd_performance_writer( diff --git a/examples/run/cuda/seq_example_cuda.cpp b/examples/run/cuda/seq_example_cuda.cpp index 452dbd000f..55ee204114 100644 --- a/examples/run/cuda/seq_example_cuda.cpp +++ b/examples/run/cuda/seq_example_cuda.cpp @@ -1,6 +1,6 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2021-2022 CERN for the benefit of the ACTS project + * (c) 2021-2023 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -79,8 +79,8 @@ int seq_run(const traccc::full_tracking_input_config& i_cfg, traccc::cuda::clusterization_algorithm ca_cuda( mr, async_copy, stream, common_opts.target_cells_per_partition); - traccc::cuda::seeding_algorithm sa_cuda(mr); - traccc::cuda::track_params_estimation tp_cuda(mr); + traccc::cuda::seeding_algorithm sa_cuda(mr, async_copy, stream); + traccc::cuda::track_params_estimation tp_cuda(mr, async_copy, stream); // performance writer traccc::seeding_performance_writer sd_performance_writer(