Skip to content

Commit

Permalink
Support ext-net v9 API from the NCCL Plugin interface
Browse files Browse the repository at this point in the history
Update the Nvidia interface to support the v9 interface introduced
in NCCL 2.24.  The primary changes for NCCL are:
1. NIC fusion / vitual NIC support
2. API supports indication when completion is not needed (e.g. LL128 )
3. DeviceProps supports max transfer size for p2p/coll operations
  • Loading branch information
yexiang-aws committed Jan 22, 2025
1 parent 9561001 commit 72736a0
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,6 @@ m4/lt~obsolete.m4
.idea/
.devenv/
.direnv

.vscode
install/
2 changes: 2 additions & 0 deletions include/nccl_ofi_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ ncclResult_t nccl_net_ofi_regMrDmaBuf(void* comm, void* data, size_t size, int t
ncclResult_t nccl_net_ofi_deregMr(void *comm, void *mhandle);
ncclResult_t nccl_net_ofi_isend(void *sendComm, void* data, int size, int tag, void *mhandle, void** request);
ncclResult_t nccl_net_ofi_isend_v4(void* sendComm, void* data, int size, void* mhandle, void** request);
ncclResult_t nccl_net_ofi_isend_v9(void *sendComm, void* data, size_t size, int tag, void *mhandle, void** request);
ncclResult_t nccl_net_ofi_irecv(void* recvComm, int n, void** buffers, int* sizes, int *tags, void** mhandles, void** request);
ncclResult_t nccl_net_ofi_irecv_v4(void* recvComm, void* data, int size, void* mhandle, void** request);
ncclResult_t nccl_net_ofi_irecv_v9(void* recvComm, int n, void** buffers, size_t* sizes, int *tags, void** mhandles, void** request);
ncclResult_t nccl_net_ofi_test(void *request, int *done, int *size);
ncclResult_t nccl_net_ofi_iflush(void* recvComm, int n, void** buffers, int* sizes, void** mhandles, void** request);
ncclResult_t nccl_net_ofi_flush_v3(void* recvComm, void* data, int size, void* mhandle);
Expand Down
12 changes: 12 additions & 0 deletions src/nccl_ofi_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,13 @@ ncclResult_t nccl_net_ofi_isend_v4(void* sendComm, void* data, int size,
}


ncclResult_t nccl_net_ofi_isend_v9(void* sendComm, void* data, size_t size,
int tag, void* mhandle, void** request)
{
return nccl_net_ofi_isend(sendComm, data, size, tag, mhandle, request);
}


ncclResult_t nccl_net_ofi_irecv(void* rComm, int n, void** buffers, int* sizes,
int *tags, void** mhandles, void** req)
{
Expand Down Expand Up @@ -744,6 +751,11 @@ ncclResult_t nccl_net_ofi_irecv_v4(void* recvComm, void* data, int size,
return nccl_net_ofi_irecv(recvComm, 1, &data, &size, &tag, &mhandle, request);
}

ncclResult_t nccl_net_ofi_irecv_v9(void* recvComm, int n, void** data,
size_t* sizes, int* tags, void** mhandles, void** request)
{
return nccl_net_ofi_irecv(recvComm, n, data, (int*) sizes, tags, mhandles, request);
}

ncclResult_t nccl_net_ofi_test(void* req, int* done, int* size)
{
Expand Down
80 changes: 80 additions & 0 deletions src/nccl_ofi_interface_nvidia.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,61 @@
#include "nccl_ofi.h"
#include "nccl_ofi_api.h"

static ncclResult_t getProperties_v9(int dev_id, ncclNetProperties_v9_t* props)
{
nccl_ofi_properties_t ofi_properties;
ncclResult_t ret = nccl_net_ofi_get_properties(dev_id, &ofi_properties);
if (ret != ncclSuccess) {
return ret;
}

props->name = ofi_properties.name;
props->pciPath = ofi_properties.pci_path;
props->guid = ofi_properties.guid;
props->ptrSupport = NCCL_PTR_HOST;
if (ofi_properties.hmem_support) {
props->ptrSupport |= NCCL_PTR_CUDA;
}
if (ofi_properties.dmabuf_support) {
props->ptrSupport |= NCCL_PTR_DMABUF;
}

/**
* When net-plugin returns regIsGlobal=1 to NCCL (As part of
* net-plugin getProperties() API), it signals to NCCL that
* registered MRs are global, in the sense that they can be
* used by all communicators. In addition, it also signals to
* NCCL that the net-plugin have a fast MR cache such that
* calling regMr() on same buffer (address and size), will
* quickly return a previously globally registered MR on same
* buffer.
*
* When user registers a buffer with NCCL by using
* ncclCommRegister() API, if net-plugin supports
* regIsGlobal=1, NCCL will register the buffer globally once
* (On each net device) with regMr() API. When the net
* proxy-thread starts to execute a communication task on a
* previously registered user buffer, it will call the
* net-plugin regMr() to quickly fetch the previously globally
* registered MR from the plugin managed MR cache.
*/
props->regIsGlobal = ofi_properties.regIsGlobal;

props->speed = ofi_properties.port_speed;
props->port = ofi_properties.port_number;
props->latency = ofi_properties.latency;
props->maxComms = ofi_properties.max_communicators;
props->maxRecvs = ofi_properties.max_group_receives;
props->netDeviceType = NCCL_NET_DEVICE_HOST;
props->netDeviceVersion = NCCL_NET_DEVICE_INVALID_VERSION;
props->vProps.ndevs = 1;
props->vProps.devs[0] = dev_id;
props->maxP2pBytes = NCCL_MAX_NET_SIZE_BYTES;
props->maxCollBytes = NCCL_MAX_NET_SIZE_BYTES;

return ncclSuccess;
}

static ncclResult_t getProperties_v8(int dev_id, ncclNetProperties_v8_t* props)
{
nccl_ofi_properties_t ofi_properties;
Expand Down Expand Up @@ -319,6 +374,29 @@ NCCL_OFI_EXPORT_SYMBOL ncclNet_v8_t ncclNetPlugin_v8 = {
.irecvConsumed = NULL,
};

NCCL_OFI_EXPORT_SYMBOL ncclNet_v9_t ncclNetPlugin_v9 = {
.name = "Libfabric",
.init = nccl_net_ofi_init,
.devices = nccl_net_ofi_devices,
.getProperties = getProperties_v9,
.listen = nccl_net_ofi_listen,
.connect = connect_v7,
.accept = accept_v7,
.regMr = nccl_net_ofi_regMr,
.regMrDmaBuf = nccl_net_ofi_regMrDmaBuf,
.deregMr = nccl_net_ofi_deregMr,
.isend = nccl_net_ofi_isend_v9,
.irecv = nccl_net_ofi_irecv_v9,
.iflush = nccl_net_ofi_iflush,
.test = nccl_net_ofi_test,
.closeSend = nccl_net_ofi_closeSend,
.closeRecv = nccl_net_ofi_closeRecv,
.closeListen = nccl_net_ofi_closeListen,
.getDeviceMr = NULL,
.irecvConsumed = NULL,
.makeVDevice = NULL,
};


/*
* Versions 1.11.0 and prior of the plugin set the name to
Expand All @@ -339,6 +417,7 @@ __attribute__((constructor)) static void nvidia_plugin_name_fixup(void)
ncclNetPlugin_v6.name = "AWS Libfabric";
ncclNetPlugin_v7.name = "AWS Libfabric";
ncclNetPlugin_v8.name = "AWS Libfabric";
ncclNetPlugin_v9.name = "AWS Libfabric";
} else if (net_env != NULL && 0 == strcasecmp(net_env, "OFI")) {
ncclNetPlugin_v2.name = "OFI";
ncclNetPlugin_v3.name = "OFI";
Expand All @@ -347,5 +426,6 @@ __attribute__((constructor)) static void nvidia_plugin_name_fixup(void)
ncclNetPlugin_v6.name = "OFI";
ncclNetPlugin_v7.name = "OFI";
ncclNetPlugin_v8.name = "OFI";
ncclNetPlugin_v9.name = "OFI";
}
}
2 changes: 1 addition & 1 deletion tests/functional/nccl_connection.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ int main(int argc, char* argv[])
nccl_net_ofi_send_comm_t *sComm = NULL;
nccl_net_ofi_listen_comm_t *lComm = NULL;
nccl_net_ofi_recv_comm_t *rComm = NULL;
ncclNetDeviceHandle_v8_t *s_ignore, *r_ignore;
ncclNetDeviceHandle_v9_t *s_ignore, *r_ignore;
char src_handle[NCCL_NET_HANDLE_MAXSIZE] = {};
char handle[NCCL_NET_HANDLE_MAXSIZE] = {};
test_nccl_net_t *extNet = NULL;
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/nccl_message_transfer.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ int main(int argc, char* argv[])
nccl_net_ofi_listen_comm_t *lComm = NULL;
nccl_net_ofi_recv_comm_t *rComm = NULL;
test_nccl_net_t *extNet = NULL;
ncclNetDeviceHandle_v8_t *s_ignore, *r_ignore;
ncclNetDeviceHandle_v9_t *s_ignore, *r_ignore;
char src_handle[NCCL_NET_HANDLE_MAXSIZE] = {};

ofi_log_function = logger;
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ int main(int argc, char *argv[])
char handle[NCCL_NET_HANDLE_MAXSIZE] = {};
char src_handle_prev[NCCL_NET_HANDLE_MAXSIZE] = {};
char src_handle_next[NCCL_NET_HANDLE_MAXSIZE] = {};
ncclNetDeviceHandle_v8_t *s_ignore, *r_ignore;
ncclNetDeviceHandle_v9_t *s_ignore, *r_ignore;
test_nccl_net_t *extNet = NULL;

ofi_log_function = logger;
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/test-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
} while(false);

// Can be changed when porting new versions to the plugin
#define NCCL_PLUGIN_SYMBOL ncclNetPlugin_v8
#define NCCL_PLUGIN_SYMBOL ncclNetPlugin_v9

typedef ncclNet_v8_t test_nccl_net_t;
typedef ncclNetProperties_v8_t test_nccl_properties_t;
Expand Down

0 comments on commit 72736a0

Please sign in to comment.