Skip to content

Commit

Permalink
rdma/sendrecv: Lower thread requirement to DOMAIN
Browse files Browse the repository at this point in the history
Lower the required threading limits to FI_THREAD_DOMAIN, with control
progress set to FI_PROGRESS_CONTROL_UNIFIED.  On Libfabric providers
that use the utility completion queues, this will result in no locks
for both the send/recv and cq polling calls inside Libfabric, since
we already have domain-level exclusivity in the transports.

Signed-off-by: Brian Barrett <[email protected]>
  • Loading branch information
bwbarrett committed Jan 28, 2025
1 parent 94bc377 commit 0e0782e
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 11 deletions.
1 change: 1 addition & 0 deletions m4/check_pkg_libfabric.m4
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ AC_DEFUN([CHECK_PKG_LIBFABRIC], [
FI_OPT_MAX_MSG_SIZE,
FI_OPT_SHARED_MEMORY_PERMITTED,
FI_MR_DMABUF,
FI_PROGRESS_CONTROL_UNIFIED,
FI_OPT_INJECT_RMA_SIZE],
[], [], [AC_INCLUDES_DEFAULT
[#include <rdma/fi_endpoint.h>
Expand Down
22 changes: 15 additions & 7 deletions src/nccl_ofi_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -7728,14 +7728,22 @@ static void get_hints(struct fi_info *hints)
hints->domain_attr->mr_mode = FI_MR_LOCAL | FI_MR_HMEM | FI_MR_VIRT_ADDR |
FI_MR_ALLOCATED | FI_MR_PROV_KEY;
hints->domain_attr->mr_key_size = (size_t) ofi_nccl_mr_key_size();
hints->domain_attr->threading = FI_THREAD_SAFE;

/* Set progress mode to unspec to use the provider's default
* mode. We hard poll for completion, but if a provider is
* faster with async progress, then we don't really care and
* should let it do that. */
hints->domain_attr->threading = FI_THREAD_DOMAIN;

/* If libfabric is new enough to support
* FI_PROGRESS_CONTROL_UNIFIED, specify MANUAL /
* CONTROL_UNIFIED progress, to remove the domain lock from
* the completion queue polling. Otherwise, set
* PROGRESS_UNSPEC to allow the provider to pick what it
* thinks will go fastsest.
*/
#if HAVE_DECL_FI_PROGRESS_CONTROL_UNIFIED
hints->domain_attr->control_progress = FI_PROGRESS_CONTROL_UNIFIED;
hints->domain_attr->data_progress = FI_PROGRESS_MANUAL;
#else
hints->domain_attr->control_progress = FI_PROGRESS_UNSPEC;
hints->domain_attr->data_progress = FI_PROGRESS_UNSPEC;
hints->domain_attr->data_progress = FI_PROGRESS_UNSPEC;;
#endif
}


Expand Down
19 changes: 15 additions & 4 deletions src/nccl_ofi_sendrecv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2571,11 +2571,22 @@ static void sendrecv_get_hints(struct fi_info *hints, int req_gdr)

hints->ep_attr->type = FI_EP_RDM;

hints->domain_attr->threading = FI_THREAD_SAFE;

/* Set progress mode to unspec to use the provider's default mode. */
hints->domain_attr->threading = FI_THREAD_DOMAIN;

/* If libfabric is new enough to support
* FI_PROGRESS_CONTROL_UNIFIED, specify MANUAL /
* CONTROL_UNIFIED progress, to remove the domain lock from
* the completion queue polling. Otherwise, set
* PROGRESS_UNSPEC to allow the provider to pick what it
* thinks will go fastsest.
*/
#if HAVE_DECL_FI_PROGRESS_CONTROL_UNIFIED
hints->domain_attr->control_progress = FI_PROGRESS_CONTROL_UNIFIED;
hints->domain_attr->data_progress = FI_PROGRESS_MANUAL;
#else
hints->domain_attr->control_progress = FI_PROGRESS_UNSPEC;
hints->domain_attr->data_progress = FI_PROGRESS_UNSPEC;
hints->domain_attr->data_progress = FI_PROGRESS_UNSPEC;;
#endif

/* Set MR mode bits to indicate FI_MR_BASIC registration */
hints->domain_attr->mr_mode |= FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY;
Expand Down

0 comments on commit 0e0782e

Please sign in to comment.