From 4a9a35a4041d7bbc93f90c9da7c7cbdf3621aca6 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 28 Nov 2023 10:00:26 +0100 Subject: [PATCH] Enable support for asynchronous memory in ROCm 5.3 and later Include protections for 0-sized allocations in ROCm 5.3-5.5 . --- include/alpaka/core/ApiHipRt.hpp | 30 +++++++++++++++---- .../alpaka/mem/buf/BufUniformCudaHipRt.hpp | 18 +++++++---- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/include/alpaka/core/ApiHipRt.hpp b/include/alpaka/core/ApiHipRt.hpp index e76294f068fb..69590a4583ea 100644 --- a/include/alpaka/core/ApiHipRt.hpp +++ b/include/alpaka/core/ApiHipRt.hpp @@ -181,9 +181,17 @@ namespace alpaka static inline Error_t freeAsync([[maybe_unused]] void* devPtr, [[maybe_unused]] Stream_t stream) { - // hipFreeAsync is implemented only in ROCm 5.2.0 and later. -# if HIP_VERSION >= 50'200'000 - return ::hipFreeAsync(devPtr, stream); + // stream-ordered memory operations are fully implemented only in ROCm 5.3.0 and later. +# if HIP_VERSION >= 50'300'000 + // hipFreeAsync fails on a null pointer deallocation + if(devPtr) + { + return ::hipFreeAsync(devPtr, stream); + } + else + { + return ::hipSuccess; + } # else // Not implemented. return errorUnknown; @@ -289,9 +297,21 @@ namespace alpaka [[maybe_unused]] size_t size, [[maybe_unused]] Stream_t stream) { - // hipMallocAsync is implemented only in ROCm 5.2.0 and later. -# if HIP_VERSION >= 50'200'000 + // stream-ordered memory operations are fully implemented only in ROCm 5.3.0 and later. +# if HIP_VERSION >= 50'600'000 return ::hipMallocAsync(devPtr, size, stream); +# elif HIP_VERSION >= 50'300'000 + // before ROCm 5.6.0, hipMallocAsync fails for an allocation of 0 bytes + if(size > 0) + { + return ::hipMallocAsync(devPtr, size, stream); + } + else + { + // make sure the pointer can safely be passed to hipFreeAsync + *devPtr = nullptr; + return ::hipSuccess; + } # else // Not implemented. return errorUnknown; diff --git a/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp b/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp index 79340323ff01..649812097cba 100644 --- a/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp +++ b/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp @@ -280,8 +280,8 @@ namespace alpaka # endif # if defined(ALPAKA_ACC_GPU_HIP_ENABLED) static_assert( - !std::is_same_v, - "HIP devices do not support stream-ordered memory buffers."); + std::is_same_v && TApi::version >= BOOST_VERSION_NUMBER(5, 3, 0), + "Support for stream-ordered memory buffers requires HIP/ROCm 5.3 or higher."); # endif static_assert( TDim::value <= 1, @@ -317,16 +317,22 @@ namespace alpaka } }; -# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) //! The CUDA/HIP stream-ordered memory allocation capability trait specialization. template struct HasAsyncBufSupport> : std::bool_constant< - std::is_same_v && TApi::version >= BOOST_VERSION_NUMBER(11, 2, 0) - && TDim::value <= 1> + TDim::value <= 1 + && ( +# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) + std::is_same_v && TApi::version >= BOOST_VERSION_NUMBER(11, 2, 0) +# elif defined(ALPAKA_ACC_GPU_HIP_ENABLED) + std::is_same_v && TApi::version >= BOOST_VERSION_NUMBER(5, 3, 0) +# else + false +# endif + )> { }; -# endif //! The pinned/mapped memory allocation trait specialization for the CUDA/HIP devices. template