Skip to content

Commit

Permalink
prov/cxi: Test CUDA with DMA buf FD recycling
Browse files Browse the repository at this point in the history
When a MR is freed, the CXI provider should free the DMA buf FD used for
the CUDA region. Failing to do this will result in FDs being exhausted.

Signed-off-by: Ian Ziemba <[email protected]>
  • Loading branch information
iziemba authored and j-xiong committed Jan 23, 2025
1 parent 4431fe5 commit ba880cc
Showing 1 changed file with 35 additions and 0 deletions.
35 changes: 35 additions & 0 deletions prov/cxi/test/cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -580,3 +580,38 @@ Test(cuda, verify_force_dev_reg_local)
cxit_destroy_cqs();
cxit_teardown_ep();
}

Test(cuda, dmabuf_stress)
{
int ret;
int i;
void *buf;
size_t size = 1024 * 1024;
struct fid_mr *mr;
cudaError_t cuda_ret;

ret = setenv("FI_HMEM_CUDA_USE_DMABUF", "1", 1);
cr_assert_eq(ret, 0, "setenv failed: %d", -errno);

ret = setenv("FI_MR_CUDA_CACHE_MONITOR_ENABLED", "0", 1);
cr_assert_eq(ret, 0, "setenv failed: %d", -errno);

cuda_ret = cudaMalloc(&buf, size);
cr_assert_eq(cuda_ret, cudaSuccess, "cudaMalloc failed: %d", cuda_ret);

cxit_setup_msg();

for (i = 0; i < 2048; i++) {
ret = fi_mr_reg(cxit_domain, buf, size, FI_READ | FI_WRITE,
0, 0, 0, &mr, NULL);
cr_assert_eq(ret, FI_SUCCESS, "fi_mr_reg failed: %d", ret);

ret = fi_close(&mr->fid);
cr_assert_eq(ret, FI_SUCCESS, "fi_close MR failed: %d", ret);
}

cxit_teardown_msg();

cuda_ret = cudaFree(buf);
cr_assert_eq(cuda_ret, cudaSuccess, "cudaFree failed: %d", cuda_ret);
}

0 comments on commit ba880cc

Please sign in to comment.