Skip to content

Commit

Permalink
hmem/cuda: Add dmabuf fd ops functions
Browse files Browse the repository at this point in the history
Implement the get_dmabuf_fd API for cuda interface.

Signed-off-by: Shi Jin <[email protected]>
  • Loading branch information
shijin-aws committed Oct 30, 2023
1 parent 95874ad commit 8e1e20c
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 3 deletions.
3 changes: 3 additions & 0 deletions include/ofi_hmem.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ int cuda_dev_reg_copy_from_hmem(uint64_t handle, void *dest, const void *src,
bool cuda_is_ipc_enabled(void);
int cuda_get_ipc_handle_size(size_t *size);
bool cuda_is_gdrcopy_enabled(void);
bool cuda_is_dmabuf_supported(void);
int cuda_get_dmabuf_fd(void *addr, uint64_t size, int *fd,
uint64_t *offset);

void cuda_gdrcopy_to_dev(uint64_t handle, void *dev,
const void *host, size_t size);
Expand Down
2 changes: 1 addition & 1 deletion src/hmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ struct ofi_hmem_ops hmem_ops[] = {
.dev_unregister = cuda_dev_unregister,
.dev_reg_copy_to_hmem = cuda_dev_reg_copy_to_hmem,
.dev_reg_copy_from_hmem = cuda_dev_reg_copy_from_hmem,
.get_dmabuf_fd = ofi_hmem_no_get_dmabuf_fd,
.get_dmabuf_fd = cuda_get_dmabuf_fd,
},
[FI_HMEM_ROCR] = {
.initialized = false,
Expand Down
133 changes: 131 additions & 2 deletions src/hmem_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

#include "ofi_hmem.h"
#include "ofi.h"
#include "ofi_mem.h"

#if HAVE_CUDA

Expand All @@ -58,7 +59,10 @@
_(cuPointerGetAttribute) \
_(cuPointerSetAttribute) \
_(cuDeviceCanAccessPeer) \
_(cuMemGetAddressRange)
_(cuMemGetAddressRange) \
_(cuMemGetHandleForAddressRange) \
_(cuDeviceGetAttribute) \
_(cuDeviceGet)

#define CUDA_RUNTIME_FUNCS_DEF(_) \
_(cudaMemcpy) \
Expand Down Expand Up @@ -86,6 +90,7 @@ static struct {
bool p2p_access_supported;
bool use_gdrcopy;
bool use_ipc;
bool dmabuf_supported;
void *driver_handle;
void *runtime_handle;
void *nvml_handle;
Expand All @@ -96,7 +101,8 @@ static struct {
.use_ipc = false,
.driver_handle = NULL,
.runtime_handle = NULL,
.nvml_handle = NULL
.nvml_handle = NULL,
.dmabuf_supported = false
};

static struct {
Expand All @@ -119,6 +125,13 @@ static struct {
size_t* psize, CUdeviceptr dptr);
CUresult (*cuDeviceCanAccessPeer)(int *canAccessPeer,
CUdevice srcDevice, CUdevice dstDevice);
CUresult (*cuMemGetHandleForAddressRange)(void* handle,
CUdeviceptr dptr, size_t size,
CUmemRangeHandleType handleType,
unsigned long long flags);
CUresult (*cuDeviceGetAttribute)(int* pi,
CUdevice_attribute attrib, CUdevice dev);
CUresult (*cuDeviceGet)(CUdevice* device, int ordinal);
cudaError_t (*cudaHostRegister)(void *ptr, size_t size,
unsigned int flags);
cudaError_t (*cudaHostUnregister)(void *ptr);
Expand Down Expand Up @@ -199,6 +212,16 @@ CUresult ofi_cuPointerGetAttribute(void *data, CUpointer_attribute attribute,
return cuda_ops.cuPointerGetAttribute(data, attribute, ptr);
}

#define CUDA_DRIVER_LOG_ERR(cu_result, cuda_api_name) \
{ \
const char *cu_error_name; \
const char *cu_error_str; \
cuda_ops.cuGetErrorName(cu_result, &cu_error_name); \
cuda_ops.cuGetErrorString(cu_result, &cu_error_str); \
FI_WARN(&core_prov, FI_LOG_CORE, "%s failed: %s:%s\n", \
cuda_api_name, cu_error_name, cu_error_str); \
}

/**
* @brief Set CU_POINTER_ATTRIBUTE_SYNC_MEMOPS for a cuda ptr
* to ensure any synchronous copies are completed prior
Expand Down Expand Up @@ -613,6 +636,92 @@ static int cuda_hmem_detect_p2p_access_support(void)
return FI_SUCCESS;
}

/**
* @brief detect dmabuf support in the current platform
* This checks the dmabuf support in the current platform
* by querying the property of cuda device 0
*
* @return FI_SUCCESS if dmabuf support check is successful
* -FI_EIO upon CUDA API error
*/
static int cuda_hmem_detect_dmabuf_support(void)
{
CUresult cuda_ret;
CUdevice dev;
int is_supported = 0;

if (cuda_attr.device_count <= 1)
return FI_SUCCESS;

cuda_ret = cuda_ops.cuDeviceGet(&dev, 0);
if (cuda_ret != CUDA_SUCCESS) {
CUDA_DRIVER_LOG_ERR(cuda_ret, "cuDeviceGet");
return -FI_EIO;
}

cuda_ret = cuda_ops.cuDeviceGetAttribute(&is_supported,
CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED, dev);
if (cuda_ret != CUDA_SUCCESS) {
CUDA_DRIVER_LOG_ERR(cuda_ret, "cuDeviceGetAttribute");
return -FI_EIO;
}

FI_INFO(&core_prov, FI_LOG_CORE,
"cuda dmabuf support status: %d\n", is_supported);
cuda_attr.dmabuf_supported = (is_supported == 1);
return FI_SUCCESS;
}

/**
* @brief Get dmabuf fd and offset for a given cuda memory allocation
*
* @param addr the starting address of the cuda memory allocation
* @param size the length of the cuda memory allocation
* @param fd the fd of the dmabuf region
* @param offset the offset of the buf in the dmabuf region
* @return FI_SUCCESS if dmabuf fd and offset are retrieved successfully
* -FI_EOPNOTSUPP if dmabuf is not supported on the cuda device
* -FI_EIO upon CUDA API error
*/
int cuda_get_dmabuf_fd(void *addr, uint64_t size, int *fd,
uint64_t *offset)
{
CUdeviceptr aligned_ptr;
CUresult cuda_ret;

size_t aligned_size;
size_t host_page_size = ofi_get_page_size();

if (!cuda_is_dmabuf_supported())
return -FI_EOPNOTSUPP;

aligned_ptr = (uintptr_t) ofi_get_page_start(addr, host_page_size);
aligned_size = (uintptr_t) ofi_get_page_end((void *) ((uintptr_t) addr + size),
host_page_size) - (uintptr_t) aligned_ptr + 1;

cuda_ret = cuda_ops.cuMemGetHandleForAddressRange(
(void *)fd,
aligned_ptr, aligned_size,
CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD,
0);
if (cuda_ret != CUDA_SUCCESS) {
CUDA_DRIVER_LOG_ERR(cuda_ret, "cuMemGetHandleForAddressRange");
return -FI_EIO;
}

*offset = (uintptr_t) addr - (uintptr_t) aligned_ptr;

FI_INFO(&core_prov, FI_LOG_CORE,
"Get dma buf handle with fd: %d, offset: %lu"
", page aligned base address: %p"
", page aligned size: %lu, cuda allocation address %p"
", cuda allocation length: %lu\n",
*fd, *offset,
(void *) aligned_ptr, aligned_size,
(void *) addr, size);
return FI_SUCCESS;
}

int cuda_hmem_init(void)
{
int ret;
Expand All @@ -635,6 +744,10 @@ int cuda_hmem_init(void)
if (ret != FI_SUCCESS)
goto dl_cleanup;

ret = cuda_hmem_detect_dmabuf_support();
if (ret != FI_SUCCESS)
goto dl_cleanup;

ret = 1;
fi_param_get_bool(NULL, "hmem_cuda_use_gdrcopy",
&ret);
Expand Down Expand Up @@ -783,6 +896,11 @@ bool cuda_is_gdrcopy_enabled(void)
return cuda_attr.use_gdrcopy;
}

bool cuda_is_dmabuf_supported(void)
{
return cuda_attr.dmabuf_supported;
}

#else

int cuda_copy_to_dev(uint64_t device, void *dev, const void *host, size_t size)
Expand Down Expand Up @@ -878,6 +996,17 @@ bool cuda_is_gdrcopy_enabled(void)
return false;
}

bool cuda_is_dmabuf_supported(void)
{
return false;
}

int cuda_get_dmabuf_fd(void *addr, uint64_t size, int *fd,
uint64_t *offset)
{
return -FI_ENOSYS;
}

int cuda_set_sync_memops(void *ptr)
{
return FI_SUCCESS;
Expand Down

0 comments on commit 8e1e20c

Please sign in to comment.