From 829f02130ee8922adaa40866cdfd96156fe96482 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Wed, 22 Jan 2025 18:31:13 +0100 Subject: [PATCH] Introduction of the raft::device_resources_snmg type --- cpp/include/raft/comms/nccl_clique.hpp | 156 ------------- .../raft/core/device_resources_snmg.hpp | 217 ++++++++++++++++++ .../raft/core/resource/nccl_clique.hpp | 66 ------ cpp/include/raft/core/resources.hpp | 3 +- docs/source/cpp_api/core_resources.rst | 17 ++ 5 files changed, 236 insertions(+), 223 deletions(-) delete mode 100644 cpp/include/raft/comms/nccl_clique.hpp create mode 100644 cpp/include/raft/core/device_resources_snmg.hpp delete mode 100644 cpp/include/raft/core/resource/nccl_clique.hpp diff --git a/cpp/include/raft/comms/nccl_clique.hpp b/cpp/include/raft/comms/nccl_clique.hpp deleted file mode 100644 index c6520af753..0000000000 --- a/cpp/include/raft/comms/nccl_clique.hpp +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include - -#include - -/** - * @brief Error checking macro for NCCL runtime API functions. - * - * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an - * exception detailing the NCCL error that occurred - */ -#define RAFT_NCCL_TRY(call) \ - do { \ - ncclResult_t const status = (call); \ - if (ncclSuccess != status) { \ - std::string msg{}; \ - SET_ERROR_MSG(msg, \ - "NCCL error encountered at: ", \ - "call='%s', Reason=%d:%s", \ - #call, \ - status, \ - ncclGetErrorString(status)); \ - throw raft::logic_error(msg); \ - } \ - } while (0); - -namespace raft::comms { -void build_comms_nccl_only(raft::resources* handle, ncclComm_t nccl_comm, int num_ranks, int rank); -} - -namespace raft::comms { - -struct nccl_clique { - using pool_mr = rmm::mr::pool_memory_resource; - - /** - * Instantiates a NCCL clique with all available GPUs - * - * @param[in] percent_of_free_memory percentage of device memory to pre-allocate as memory pool - * - */ - nccl_clique(int percent_of_free_memory = 80) - : root_rank_(0), - percent_of_free_memory_(percent_of_free_memory), - per_device_pools_(0), - device_resources_(0) - { - cudaGetDeviceCount(&num_ranks_); - device_ids_.resize(num_ranks_); - std::iota(device_ids_.begin(), device_ids_.end(), 0); - nccl_comms_.resize(num_ranks_); - nccl_clique_init(); - } - - /** - * Instantiates a NCCL clique - * - * Usage example: - * @code{.cpp} - * int n_devices; - * cudaGetDeviceCount(&n_devices); - * std::vector device_ids(n_devices); - * std::iota(device_ids.begin(), device_ids.end(), 0); - * cuvs::neighbors::mg::nccl_clique& clique(device_ids); // first device is the root rank - * @endcode - * - * @param[in] device_ids list of device IDs to be used to initiate the clique - * @param[in] percent_of_free_memory percentage of device memory to pre-allocate as memory pool - * - */ - nccl_clique(const std::vector& device_ids, int percent_of_free_memory = 80) - : root_rank_(0), - num_ranks_(device_ids.size()), - percent_of_free_memory_(percent_of_free_memory), - device_ids_(device_ids), - nccl_comms_(device_ids.size()), - per_device_pools_(0), - device_resources_(0) - { - nccl_clique_init(); - } - - void nccl_clique_init() - { - RAFT_NCCL_TRY(ncclCommInitAll(nccl_comms_.data(), num_ranks_, device_ids_.data())); - - for (int rank = 0; rank < num_ranks_; rank++) { - RAFT_CUDA_TRY(cudaSetDevice(device_ids_[rank])); - - // create a pool memory resource for each device - auto old_mr = rmm::mr::get_current_device_resource(); - per_device_pools_.push_back(std::make_unique( - old_mr, rmm::percent_of_free_device_memory(percent_of_free_memory_))); - rmm::cuda_device_id id(device_ids_[rank]); - rmm::mr::set_per_device_resource(id, per_device_pools_.back().get()); - - // create a device resource handle for each device - device_resources_.emplace_back(); - - // add NCCL communications to the device resource handle - raft::comms::build_comms_nccl_only( - &device_resources_[rank], nccl_comms_[rank], num_ranks_, rank); - } - - for (int rank = 0; rank < num_ranks_; rank++) { - RAFT_CUDA_TRY(cudaSetDevice(device_ids_[rank])); - raft::resource::sync_stream(device_resources_[rank]); - } - } - - const raft::device_resources& set_current_device_to_root_rank() const - { - int root_device_id = device_ids_[root_rank_]; - RAFT_CUDA_TRY(cudaSetDevice(root_device_id)); - return device_resources_[root_rank_]; - } - - ~nccl_clique() - { -#pragma omp parallel for // necessary to avoid hangs - for (int rank = 0; rank < num_ranks_; rank++) { - cudaSetDevice(device_ids_[rank]); - ncclCommDestroy(nccl_comms_[rank]); - rmm::cuda_device_id id(device_ids_[rank]); - rmm::mr::set_per_device_resource(id, nullptr); - } - } - - int root_rank_; - int num_ranks_; - int percent_of_free_memory_; - std::vector device_ids_; - std::vector nccl_comms_; - std::vector> per_device_pools_; - std::vector device_resources_; -}; - -} // namespace raft::comms diff --git a/cpp/include/raft/core/device_resources_snmg.hpp b/cpp/include/raft/core/device_resources_snmg.hpp new file mode 100644 index 0000000000..f20a81a1c6 --- /dev/null +++ b/cpp/include/raft/core/device_resources_snmg.hpp @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +#include +#include + +/** + * @brief Error checking macro for NCCL runtime API functions. + * + * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an + * exception detailing the NCCL error that occurred + */ +#define RAFT_NCCL_TRY(call) \ + do { \ + ncclResult_t const status = (call); \ + if (ncclSuccess != status) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, \ + "NCCL error encountered at: ", \ + "call='%s', Reason=%d:%s", \ + #call, \ + status, \ + ncclGetErrorString(status)); \ + throw raft::logic_error(msg); \ + } \ + } while (0); + +namespace raft { + +/** + * @brief SNMG (single-node multi-GPU) resource container object that stores a NCCL clique and all + * necessary resources used for calling device functions, cuda kernels, libraries and/or NCCL + * communications on each GPU. Note the `device_resources_snmg` object can also be used as a classic + * `device_resources` object. The associated resources will be the ones of the GPU used during + * object instantiation and a GPU switch operation will be ordered during the retrieval of said + * resources. + * + * The `device_resources_snmg` class is intended to be used in a single process to manage several + * GPUs. Please note that NCCL communications are the responsibility of the user. Blocking NCCL + * calls will sometimes require the use of several threads to avoid hangs. + */ +class device_resources_snmg : public device_resources { + public: + /** + * @brief Construct a SNMG resources instance with all available GPUs + */ + device_resources_snmg() : device_resources(), root_rank_(0) + { + cudaGetDevice(&main_gpu_id_); + + int num_ranks; + RAFT_CUDA_TRY(cudaGetDeviceCount(&num_ranks)); + device_ids_.resize(num_ranks); + std::iota(device_ids_.begin(), device_ids_.end(), 0); + nccl_comms_.resize(num_ranks); + initialize(); + } + + /** + * @brief Construct a SNMG resources instance with a subset of available GPUs + * + * @param[in] device_ids List of device IDs to be used by the NCCL clique + */ + device_resources_snmg(const std::vector& device_ids) + : device_resources(), root_rank_(0), device_ids_(device_ids), nccl_comms_(device_ids.size()) + { + cudaGetDevice(&main_gpu_id_); + + initialize(); + } + + /** + * @brief SNMG resources instance copy constructor + * + * @param[in] clique A SNMG resources instance + */ + device_resources_snmg(const device_resources_snmg& clique) + : device_resources(clique), + root_rank_(clique.root_rank_), + main_gpu_id_(clique.main_gpu_id_), + device_ids_(clique.device_ids_), + nccl_comms_(clique.nccl_comms_), + device_resources_(clique.device_resources_) + { + } + + device_resources_snmg(device_resources_snmg&&) = delete; + device_resources_snmg& operator=(device_resources_snmg&&) = delete; + + /** + * @brief Set root rank of NCCL clique + */ + inline int set_root_rank(int rank) { this->root_rank_ = rank; } + + /** + * @brief Get root rank of NCCL clique + */ + inline int get_root_rank() const { return this->root_rank_; } + + /** + * @brief Get number of ranks in NCCL clique + */ + inline int get_num_ranks() const { return this->device_ids_.size(); } + + /** + * @brief Get device ID of rank in NCCL clique + */ + inline int get_device_id(int rank) const { return this->device_ids_[rank]; } + + /** + * @brief Get NCCL comm object of rank in NCCL clique + */ + inline ncclComm_t get_nccl_comm(int rank) const { return this->nccl_comms_[rank]; } + + /** + * @brief Get raft::device_resources object of rank in NCCL clique + */ + inline const raft::device_resources& get_device_resources(int rank) const + { + return this->device_resources_[rank]; + } + + /** + * @brief Set current device ID to root rank and return its raft::device_resources object + */ + inline const raft::device_resources& set_current_device_to_root_rank() const + { + return set_current_device_to_rank(get_root_rank()); + } + + /** + * @brief Set current device ID to rank and return its raft::device_resources object + */ + inline const raft::device_resources& set_current_device_to_rank(int rank) const + { + RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); + return get_device_resources(rank); + } + + /** + * @brief Set a memory pool on all GPUs of the clique + */ + void set_memory_pool(int percent_of_free_memory) const + { + for (int rank = 0; rank < get_num_ranks(); rank++) { + RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); + size_t limit = + rmm::percent_of_free_device_memory(percent_of_free_memory); // check limit for each device + raft::resource::set_workspace_to_pool_resource(get_device_resources(rank), limit); + } + cudaSetDevice(this->main_gpu_id_); + } + + bool has_resource_factory(resource::resource_type resource_type) const override + { + cudaSetDevice(this->main_gpu_id_); + return raft::resources::has_resource_factory(resource_type); + } + + /** Destroys all held-up resources */ + ~device_resources_snmg() + { +#pragma omp parallel for // necessary to avoid hangs + for (int rank = 0; rank < get_num_ranks(); rank++) { + RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); + RAFT_NCCL_TRY(ncclCommDestroy(get_nccl_comm(rank))); + } + cudaSetDevice(this->main_gpu_id_); + } + + private: + /** + * @brief Initializes the NCCL clique and raft::device_resources objects + */ + void initialize() + { + RAFT_NCCL_TRY(ncclCommInitAll(nccl_comms_.data(), get_num_ranks(), device_ids_.data())); + + for (int rank = 0; rank < get_num_ranks(); rank++) { + RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); + device_resources_.emplace_back(); + + // ideally add the ncclComm_t to the device_resources object with + // raft::comms::build_comms_nccl_only + } + cudaSetDevice(this->main_gpu_id_); + } + + int root_rank_; + int main_gpu_id_; + std::vector device_ids_; + std::vector nccl_comms_; + std::vector device_resources_; + +}; // class device_resources_snmg + +} // namespace raft diff --git a/cpp/include/raft/core/resource/nccl_clique.hpp b/cpp/include/raft/core/resource/nccl_clique.hpp deleted file mode 100644 index edda5043ae..0000000000 --- a/cpp/include/raft/core/resource/nccl_clique.hpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include - -#include - -namespace raft::resource { - -class nccl_clique_resource : public resource { - public: - nccl_clique_resource() : clique_(std::make_unique()) {} - ~nccl_clique_resource() override {} - void* get_resource() override { return clique_.get(); } - - private: - std::unique_ptr clique_; -}; - -/** Factory that knows how to construct a specific raft::resource to populate the res_t. */ -class nccl_clique_resource_factory : public resource_factory { - public: - resource_type get_resource_type() override { return resource_type::NCCL_CLIQUE; } - resource* make_resource() override { return new nccl_clique_resource(); } -}; - -/** - * @defgroup nccl_clique_resource resource functions - * @{ - */ - -/** - * Retrieves a NCCL clique from raft res if it exists, otherwise initializes it and return it. - * - * @param[in] res the raft resources object - * @return NCCL clique - */ -inline const raft::comms::nccl_clique& get_nccl_clique(resources const& res) -{ - if (!res.has_resource_factory(resource_type::NCCL_CLIQUE)) { - res.add_resource_factory(std::make_shared()); - } - return *res.get_resource(resource_type::NCCL_CLIQUE); -}; - -/** - * @} - */ - -} // namespace raft::resource diff --git a/cpp/include/raft/core/resources.hpp b/cpp/include/raft/core/resources.hpp index b0827d8e11..44525edb23 100644 --- a/cpp/include/raft/core/resources.hpp +++ b/cpp/include/raft/core/resources.hpp @@ -72,6 +72,7 @@ class resources { resources(const resources& res) : factories_(res.factories_), resources_(res.resources_) {} resources(resources&&) = delete; resources& operator=(resources&&) = delete; + virtual ~resources() {} /** * @brief Returns true if a resource_factory has been registered for the @@ -79,7 +80,7 @@ class resources { * @param resource_type resource type to check * @return true if resource_factory is registered for the given resource_type */ - bool has_resource_factory(resource::resource_type resource_type) const + virtual bool has_resource_factory(resource::resource_type resource_type) const { std::lock_guard _(mutex_); return factories_.at(resource_type).first != resource::resource_type::LAST_KEY; diff --git a/docs/source/cpp_api/core_resources.rst b/docs/source/cpp_api/core_resources.rst index 0da11acae6..3c242af848 100644 --- a/docs/source/cpp_api/core_resources.rst +++ b/docs/source/cpp_api/core_resources.rst @@ -55,6 +55,23 @@ namespace *raft::core* :project: RAFT :members: +SNMG Device Resources +--------------------- + +The `raft::device_resources_snmg` provides a convenient way to design SNMG +(single-node multi-GPU) algorithms. It initiates device-related resources +for a set of devices forming clique. This includes NCCL communications. +GPUs can be addressed and exchanges be made over multiple threads +for performance or convenience. + +``#include `` + +namespace *raft::core* + +.. doxygenclass:: raft::device_resources_snmg + :project: RAFT + :members: + Resource Functions ------------------