diff --git a/cpp/include/raft/comms/nccl_clique.hpp b/cpp/include/raft/comms/nccl_clique.hpp new file mode 100644 index 0000000000..c6520af753 --- /dev/null +++ b/cpp/include/raft/comms/nccl_clique.hpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include + +/** + * @brief Error checking macro for NCCL runtime API functions. + * + * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an + * exception detailing the NCCL error that occurred + */ +#define RAFT_NCCL_TRY(call) \ + do { \ + ncclResult_t const status = (call); \ + if (ncclSuccess != status) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, \ + "NCCL error encountered at: ", \ + "call='%s', Reason=%d:%s", \ + #call, \ + status, \ + ncclGetErrorString(status)); \ + throw raft::logic_error(msg); \ + } \ + } while (0); + +namespace raft::comms { +void build_comms_nccl_only(raft::resources* handle, ncclComm_t nccl_comm, int num_ranks, int rank); +} + +namespace raft::comms { + +struct nccl_clique { + using pool_mr = rmm::mr::pool_memory_resource; + + /** + * Instantiates a NCCL clique with all available GPUs + * + * @param[in] percent_of_free_memory percentage of device memory to pre-allocate as memory pool + * + */ + nccl_clique(int percent_of_free_memory = 80) + : root_rank_(0), + percent_of_free_memory_(percent_of_free_memory), + per_device_pools_(0), + device_resources_(0) + { + cudaGetDeviceCount(&num_ranks_); + device_ids_.resize(num_ranks_); + std::iota(device_ids_.begin(), device_ids_.end(), 0); + nccl_comms_.resize(num_ranks_); + nccl_clique_init(); + } + + /** + * Instantiates a NCCL clique + * + * Usage example: + * @code{.cpp} + * int n_devices; + * cudaGetDeviceCount(&n_devices); + * std::vector device_ids(n_devices); + * std::iota(device_ids.begin(), device_ids.end(), 0); + * cuvs::neighbors::mg::nccl_clique& clique(device_ids); // first device is the root rank + * @endcode + * + * @param[in] device_ids list of device IDs to be used to initiate the clique + * @param[in] percent_of_free_memory percentage of device memory to pre-allocate as memory pool + * + */ + nccl_clique(const std::vector& device_ids, int percent_of_free_memory = 80) + : root_rank_(0), + num_ranks_(device_ids.size()), + percent_of_free_memory_(percent_of_free_memory), + device_ids_(device_ids), + nccl_comms_(device_ids.size()), + per_device_pools_(0), + device_resources_(0) + { + nccl_clique_init(); + } + + void nccl_clique_init() + { + RAFT_NCCL_TRY(ncclCommInitAll(nccl_comms_.data(), num_ranks_, device_ids_.data())); + + for (int rank = 0; rank < num_ranks_; rank++) { + RAFT_CUDA_TRY(cudaSetDevice(device_ids_[rank])); + + // create a pool memory resource for each device + auto old_mr = rmm::mr::get_current_device_resource(); + per_device_pools_.push_back(std::make_unique( + old_mr, rmm::percent_of_free_device_memory(percent_of_free_memory_))); + rmm::cuda_device_id id(device_ids_[rank]); + rmm::mr::set_per_device_resource(id, per_device_pools_.back().get()); + + // create a device resource handle for each device + device_resources_.emplace_back(); + + // add NCCL communications to the device resource handle + raft::comms::build_comms_nccl_only( + &device_resources_[rank], nccl_comms_[rank], num_ranks_, rank); + } + + for (int rank = 0; rank < num_ranks_; rank++) { + RAFT_CUDA_TRY(cudaSetDevice(device_ids_[rank])); + raft::resource::sync_stream(device_resources_[rank]); + } + } + + const raft::device_resources& set_current_device_to_root_rank() const + { + int root_device_id = device_ids_[root_rank_]; + RAFT_CUDA_TRY(cudaSetDevice(root_device_id)); + return device_resources_[root_rank_]; + } + + ~nccl_clique() + { +#pragma omp parallel for // necessary to avoid hangs + for (int rank = 0; rank < num_ranks_; rank++) { + cudaSetDevice(device_ids_[rank]); + ncclCommDestroy(nccl_comms_[rank]); + rmm::cuda_device_id id(device_ids_[rank]); + rmm::mr::set_per_device_resource(id, nullptr); + } + } + + int root_rank_; + int num_ranks_; + int percent_of_free_memory_; + std::vector device_ids_; + std::vector nccl_comms_; + std::vector> per_device_pools_; + std::vector device_resources_; +}; + +} // namespace raft::comms diff --git a/cpp/include/raft/core/device_resources_snmg.hpp b/cpp/include/raft/core/device_resources_snmg.hpp deleted file mode 100644 index f20a81a1c6..0000000000 --- a/cpp/include/raft/core/device_resources_snmg.hpp +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include -#include - -#include -#include - -/** - * @brief Error checking macro for NCCL runtime API functions. - * - * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an - * exception detailing the NCCL error that occurred - */ -#define RAFT_NCCL_TRY(call) \ - do { \ - ncclResult_t const status = (call); \ - if (ncclSuccess != status) { \ - std::string msg{}; \ - SET_ERROR_MSG(msg, \ - "NCCL error encountered at: ", \ - "call='%s', Reason=%d:%s", \ - #call, \ - status, \ - ncclGetErrorString(status)); \ - throw raft::logic_error(msg); \ - } \ - } while (0); - -namespace raft { - -/** - * @brief SNMG (single-node multi-GPU) resource container object that stores a NCCL clique and all - * necessary resources used for calling device functions, cuda kernels, libraries and/or NCCL - * communications on each GPU. Note the `device_resources_snmg` object can also be used as a classic - * `device_resources` object. The associated resources will be the ones of the GPU used during - * object instantiation and a GPU switch operation will be ordered during the retrieval of said - * resources. - * - * The `device_resources_snmg` class is intended to be used in a single process to manage several - * GPUs. Please note that NCCL communications are the responsibility of the user. Blocking NCCL - * calls will sometimes require the use of several threads to avoid hangs. - */ -class device_resources_snmg : public device_resources { - public: - /** - * @brief Construct a SNMG resources instance with all available GPUs - */ - device_resources_snmg() : device_resources(), root_rank_(0) - { - cudaGetDevice(&main_gpu_id_); - - int num_ranks; - RAFT_CUDA_TRY(cudaGetDeviceCount(&num_ranks)); - device_ids_.resize(num_ranks); - std::iota(device_ids_.begin(), device_ids_.end(), 0); - nccl_comms_.resize(num_ranks); - initialize(); - } - - /** - * @brief Construct a SNMG resources instance with a subset of available GPUs - * - * @param[in] device_ids List of device IDs to be used by the NCCL clique - */ - device_resources_snmg(const std::vector& device_ids) - : device_resources(), root_rank_(0), device_ids_(device_ids), nccl_comms_(device_ids.size()) - { - cudaGetDevice(&main_gpu_id_); - - initialize(); - } - - /** - * @brief SNMG resources instance copy constructor - * - * @param[in] clique A SNMG resources instance - */ - device_resources_snmg(const device_resources_snmg& clique) - : device_resources(clique), - root_rank_(clique.root_rank_), - main_gpu_id_(clique.main_gpu_id_), - device_ids_(clique.device_ids_), - nccl_comms_(clique.nccl_comms_), - device_resources_(clique.device_resources_) - { - } - - device_resources_snmg(device_resources_snmg&&) = delete; - device_resources_snmg& operator=(device_resources_snmg&&) = delete; - - /** - * @brief Set root rank of NCCL clique - */ - inline int set_root_rank(int rank) { this->root_rank_ = rank; } - - /** - * @brief Get root rank of NCCL clique - */ - inline int get_root_rank() const { return this->root_rank_; } - - /** - * @brief Get number of ranks in NCCL clique - */ - inline int get_num_ranks() const { return this->device_ids_.size(); } - - /** - * @brief Get device ID of rank in NCCL clique - */ - inline int get_device_id(int rank) const { return this->device_ids_[rank]; } - - /** - * @brief Get NCCL comm object of rank in NCCL clique - */ - inline ncclComm_t get_nccl_comm(int rank) const { return this->nccl_comms_[rank]; } - - /** - * @brief Get raft::device_resources object of rank in NCCL clique - */ - inline const raft::device_resources& get_device_resources(int rank) const - { - return this->device_resources_[rank]; - } - - /** - * @brief Set current device ID to root rank and return its raft::device_resources object - */ - inline const raft::device_resources& set_current_device_to_root_rank() const - { - return set_current_device_to_rank(get_root_rank()); - } - - /** - * @brief Set current device ID to rank and return its raft::device_resources object - */ - inline const raft::device_resources& set_current_device_to_rank(int rank) const - { - RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); - return get_device_resources(rank); - } - - /** - * @brief Set a memory pool on all GPUs of the clique - */ - void set_memory_pool(int percent_of_free_memory) const - { - for (int rank = 0; rank < get_num_ranks(); rank++) { - RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); - size_t limit = - rmm::percent_of_free_device_memory(percent_of_free_memory); // check limit for each device - raft::resource::set_workspace_to_pool_resource(get_device_resources(rank), limit); - } - cudaSetDevice(this->main_gpu_id_); - } - - bool has_resource_factory(resource::resource_type resource_type) const override - { - cudaSetDevice(this->main_gpu_id_); - return raft::resources::has_resource_factory(resource_type); - } - - /** Destroys all held-up resources */ - ~device_resources_snmg() - { -#pragma omp parallel for // necessary to avoid hangs - for (int rank = 0; rank < get_num_ranks(); rank++) { - RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); - RAFT_NCCL_TRY(ncclCommDestroy(get_nccl_comm(rank))); - } - cudaSetDevice(this->main_gpu_id_); - } - - private: - /** - * @brief Initializes the NCCL clique and raft::device_resources objects - */ - void initialize() - { - RAFT_NCCL_TRY(ncclCommInitAll(nccl_comms_.data(), get_num_ranks(), device_ids_.data())); - - for (int rank = 0; rank < get_num_ranks(); rank++) { - RAFT_CUDA_TRY(cudaSetDevice(get_device_id(rank))); - device_resources_.emplace_back(); - - // ideally add the ncclComm_t to the device_resources object with - // raft::comms::build_comms_nccl_only - } - cudaSetDevice(this->main_gpu_id_); - } - - int root_rank_; - int main_gpu_id_; - std::vector device_ids_; - std::vector nccl_comms_; - std::vector device_resources_; - -}; // class device_resources_snmg - -} // namespace raft diff --git a/cpp/include/raft/core/resource/nccl_clique.hpp b/cpp/include/raft/core/resource/nccl_clique.hpp new file mode 100644 index 0000000000..edda5043ae --- /dev/null +++ b/cpp/include/raft/core/resource/nccl_clique.hpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +namespace raft::resource { + +class nccl_clique_resource : public resource { + public: + nccl_clique_resource() : clique_(std::make_unique()) {} + ~nccl_clique_resource() override {} + void* get_resource() override { return clique_.get(); } + + private: + std::unique_ptr clique_; +}; + +/** Factory that knows how to construct a specific raft::resource to populate the res_t. */ +class nccl_clique_resource_factory : public resource_factory { + public: + resource_type get_resource_type() override { return resource_type::NCCL_CLIQUE; } + resource* make_resource() override { return new nccl_clique_resource(); } +}; + +/** + * @defgroup nccl_clique_resource resource functions + * @{ + */ + +/** + * Retrieves a NCCL clique from raft res if it exists, otherwise initializes it and return it. + * + * @param[in] res the raft resources object + * @return NCCL clique + */ +inline const raft::comms::nccl_clique& get_nccl_clique(resources const& res) +{ + if (!res.has_resource_factory(resource_type::NCCL_CLIQUE)) { + res.add_resource_factory(std::make_shared()); + } + return *res.get_resource(resource_type::NCCL_CLIQUE); +}; + +/** + * @} + */ + +} // namespace raft::resource diff --git a/cpp/include/raft/core/resources.hpp b/cpp/include/raft/core/resources.hpp index 44525edb23..b0827d8e11 100644 --- a/cpp/include/raft/core/resources.hpp +++ b/cpp/include/raft/core/resources.hpp @@ -72,7 +72,6 @@ class resources { resources(const resources& res) : factories_(res.factories_), resources_(res.resources_) {} resources(resources&&) = delete; resources& operator=(resources&&) = delete; - virtual ~resources() {} /** * @brief Returns true if a resource_factory has been registered for the @@ -80,7 +79,7 @@ class resources { * @param resource_type resource type to check * @return true if resource_factory is registered for the given resource_type */ - virtual bool has_resource_factory(resource::resource_type resource_type) const + bool has_resource_factory(resource::resource_type resource_type) const { std::lock_guard _(mutex_); return factories_.at(resource_type).first != resource::resource_type::LAST_KEY; diff --git a/docs/source/cpp_api/core_resources.rst b/docs/source/cpp_api/core_resources.rst index 3c242af848..0da11acae6 100644 --- a/docs/source/cpp_api/core_resources.rst +++ b/docs/source/cpp_api/core_resources.rst @@ -55,23 +55,6 @@ namespace *raft::core* :project: RAFT :members: -SNMG Device Resources ---------------------- - -The `raft::device_resources_snmg` provides a convenient way to design SNMG -(single-node multi-GPU) algorithms. It initiates device-related resources -for a set of devices forming clique. This includes NCCL communications. -GPUs can be addressed and exchanges be made over multiple threads -for performance or convenience. - -``#include `` - -namespace *raft::core* - -.. doxygenclass:: raft::device_resources_snmg - :project: RAFT - :members: - Resource Functions ------------------