Skip to content

Commit

Permalink
MPI tests: Validate NCCL v2.6.4 API
Browse files Browse the repository at this point in the history
Modify tests to invoke and test new NCCL `getProperties` API.

Signed-off-by: Rashika Kheria <[email protected]>
  • Loading branch information
rashikakheria committed May 29, 2020
1 parent 0b714ea commit dff97df
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 6 deletions.
21 changes: 19 additions & 2 deletions tests/nccl_connection.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,13 @@ int main(int argc, char* argv[])
char name[MPI_MAX_PROCESSOR_NAME];

/* Plugin defines */
int ndev;
int ndev, dev;
sendComm_t *sComm = NULL;
listenComm_t *lComm = NULL;
recvComm_t *rComm = NULL;
char src_handle[NCCL_NET_HANDLE_MAXSIZE] = {0};
ncclNet_t *extNet = NULL;

ncclDebugLogger_t ofi_log_function = NULL;
ofi_log_function = logger;

MPI_Init(&argc, &argv);
Expand All @@ -42,6 +41,24 @@ int main(int argc, char* argv[])
OFINCCLCHECK(extNet->devices(&ndev));
NCCL_OFI_INFO(NCCL_INIT, "Received %d network devices", ndev);

#if (NCCL_VERSION_CODE >= NCCL_VERSION(2, 6, 4))
/* Get Properties for the device */
for (dev = 0; dev < ndev; dev++) {
ncclNetProperties_v3_t props = {0};
OFINCCLCHECK(extNet->getProperties(dev, &props));
print_dev_props(dev, &props);
}
#else
/* Get PCIe path and plugin memory pointer support */
for (dev = 0; dev < ndev; dev++) {
char *path = NULL;
int supported_types = 0;
extNet->pciPath(dev, &path);
OFINCCLCHECK(extNet->ptrSupport(dev, &supported_types));
NCCL_OFI_TRACE(NCCL_INIT, "Dev %d has path %s and supports pointers of type %d", dev, path, supported_types);
}
#endif

/* Listen API */
char handle[NCCL_NET_HANDLE_MAXSIZE];
NCCL_OFI_INFO(NCCL_INIT, "Server: Listening on dev 0");
Expand Down
21 changes: 19 additions & 2 deletions tests/nccl_message_transfer.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@ int main(int argc, char* argv[])
char name[MPI_MAX_PROCESSOR_NAME];

/* Plugin defines */
int ndev;
int ndev, dev;
sendComm_t *sComm = NULL;
listenComm_t *lComm = NULL;
recvComm_t *rComm = NULL;
ncclNet_t *extNet = NULL;
char src_handle[NCCL_NET_HANDLE_MAXSIZE] = {0};

ncclDebugLogger_t ofi_log_function;
ofi_log_function = logger;

/* Initialisation for data transfer */
Expand Down Expand Up @@ -52,6 +51,24 @@ int main(int argc, char* argv[])
OFINCCLCHECK(extNet->devices(&ndev));
NCCL_OFI_INFO(NCCL_NET, "Received %d network devices", ndev);

#if (NCCL_VERSION_CODE >= NCCL_VERSION(2, 6, 4))
/* Get Properties for the device */
for (dev = 0; dev < ndev; dev++) {
ncclNetProperties_v3_t props = {0};
OFINCCLCHECK(extNet->getProperties(dev, &props));
print_dev_props(dev, &props);
}
#else
/* Get PCIe path and plugin memory pointer support */
for (dev = 0; dev < ndev; dev++) {
char *path = NULL;
int supported_types = 0;
extNet->pciPath(dev, &path);
OFINCCLCHECK(extNet->ptrSupport(dev, &supported_types));
NCCL_OFI_TRACE(NCCL_INIT, "Dev %d has path %s and supports pointers of type %d", dev, path, supported_types);
}
#endif

/* Listen API */
char handle[NCCL_NET_HANDLE_MAXSIZE];
NCCL_OFI_INFO(NCCL_NET, "Server: Listening on dev 0");
Expand Down
21 changes: 19 additions & 2 deletions tests/ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ int main(int argc, char *argv[])
char name[MPI_MAX_PROCESSOR_NAME];

/* Plugin defines */
int ndev;
int ndev, dev;
sendComm_t *sComm_prev = NULL, *sComm_next = NULL;
listenComm_t *lComm = NULL;
recvComm_t *rComm = NULL;
Expand All @@ -19,7 +19,6 @@ int main(int argc, char *argv[])
char src_handle_next[NCCL_NET_HANDLE_MAXSIZE] = {0};
ncclNet_t *extNet = NULL;

ncclDebugLogger_t ofi_log_function;
ofi_log_function = logger;

/* Initialisation for data transfer */
Expand Down Expand Up @@ -62,6 +61,24 @@ int main(int argc, char *argv[])
OFINCCLCHECK(extNet->devices(&ndev));
NCCL_OFI_INFO(NCCL_NET, "Received %d network devices", ndev);

#if (NCCL_VERSION_CODE >= NCCL_VERSION(2, 6, 4))
/* Get Properties for the device */
for (dev = 0; dev < ndev; dev++) {
ncclNetProperties_v3_t props = {0};
OFINCCLCHECK(extNet->getProperties(dev, &props));
print_dev_props(dev, &props);
}
#else
/* Get PCIe path and plugin memory pointer support */
for (dev = 0; dev < ndev; dev++) {
char *path = NULL;
int supported_types = 0;
extNet->pciPath(dev, &path);
OFINCCLCHECK(extNet->ptrSupport(dev, &supported_types));
NCCL_OFI_TRACE(NCCL_INIT, "Dev %d has path %s and supports pointers of type %d", dev, path, supported_types);
}
#endif

/* Listen API */
NCCL_OFI_INFO(NCCL_NET, "Server: Listening on device 0");
OFINCCLCHECK(extNet->listen(0, (void *)&handle, (void **)&lComm));
Expand Down
13 changes: 13 additions & 0 deletions tests/test-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,19 @@ void logger(ncclDebugLogLevel level, unsigned long flags, const char *filefunc,
va_end(vargs);
}

#if (NCCL_VERSION_CODE >= NCCL_VERSION(2, 6, 4))
void print_dev_props(int dev, ncclNetProperties_v3_t *props)
{
NCCL_OFI_TRACE(NCCL_NET, "****************** Device %d Properties ******************", dev);
NCCL_OFI_TRACE(NCCL_NET, "%s: PCIe Path: %s", props->name, props->pciPath);
NCCL_OFI_TRACE(NCCL_NET, "%s: Plugin Support: %d", props->name, props->ptrSupport);
NCCL_OFI_TRACE(NCCL_NET, "%s: Device GUID: %d", props->name, props->guid);
NCCL_OFI_TRACE(NCCL_NET, "%s: Device Speed: %d", props->name, props->speed);
NCCL_OFI_TRACE(NCCL_NET, "%s: Device Port: %d", props->name, props->port);
NCCL_OFI_TRACE(NCCL_NET, "%s: Device Maximum Communicators: %d", props->name, props->maxComms);
}
#endif

ncclNet_t *get_extNet(void)
{
void *netPluginLib = NULL;
Expand Down

0 comments on commit dff97df

Please sign in to comment.