From a7a09e7255261274aa918676ed543479e752e4d5 Mon Sep 17 00:00:00 2001 From: Raghu Raja Date: Tue, 27 Feb 2024 23:19:44 +0000 Subject: [PATCH] fix: Close the libfabric domain before closing the fabric Signed-off-by: Raghu Raja --- src/nccl_ofi_rdma.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/nccl_ofi_rdma.c b/src/nccl_ofi_rdma.c index 28cfd90e6..fe9a39b49 100644 --- a/src/nccl_ofi_rdma.c +++ b/src/nccl_ofi_rdma.c @@ -5819,9 +5819,15 @@ static void release_device_ofi_resources(nccl_net_ofi_rdma_device_t *device) nccl_net_ofi_rdma_device_rail_t *end = device->device_rails + device->num_rails; for (; begin != end; ++begin) { - if (begin->info) fi_freeinfo(begin->info); - if (begin->fabric) fi_close(&begin->fabric->fid); - if (begin->domain) fi_close(&begin->domain->fid); + if (begin->domain) { + fi_close(&begin->domain->fid); + } + if (begin->fabric) { + fi_close(&begin->fabric->fid); + } + if (begin->info) { + fi_freeinfo(begin->info); + } } }