From c3b03f2b4992eaa008fdae1519861a6147cfe8f7 Mon Sep 17 00:00:00 2001 From: Cory Bloor Date: Tue, 5 Jul 2022 19:05:11 -0600 Subject: [PATCH 1/3] Remove Git from build requirements (#243) --- README.md | 1 - cmake/Dependencies.cmake | 6 ------ 2 files changed, 7 deletions(-) diff --git a/README.md b/README.md index c0fd29458..8cb25264d 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,6 @@ Thrust is a parallel algorithm library. This library has been ported to [HIP](ht ### Software -* Git * CMake (3.5.1 or later) * AMD [ROCm](https://rocm.github.io/install.html) platform (1.8.0 or later) * Including [HipCC](https://github.com/ROCm-Developer-Tools/HIP) compiler, which must be diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index a5f25352d..eae0b0c72 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -12,12 +12,6 @@ # For downloading, building, and installing required dependencies include(cmake/DownloadProject.cmake) -# GIT -find_package(Git REQUIRED) -if (NOT Git_FOUND) - message(FATAL_ERROR "Please ensure Git is installed on the system") -endif() - # rocPRIM (https://github.com/ROCmSoftwarePlatform/rocPRIM) if(NOT DOWNLOAD_ROCPRIM) find_package(rocprim QUIET) From 221f21e061656e2866daf29a0078a39c86eb0e96 Mon Sep 17 00:00:00 2001 From: Nol Moonen Date: Mon, 18 Jul 2022 17:14:24 +0200 Subject: [PATCH 2/3] Enable tests that rely on device malloc, remove kernel checks (#234) * enable tests that rely on device malloc, remove kernel checks * Updates for language. * disable device malloc tests for windows Co-authored-by: Saad Rahim <44449863+saadrahim@users.noreply.github.com> --- CHANGELOG.md | 4 + test/test_partition.cpp | 2 - test/test_sort.cpp | 84 ++++++------- test/test_sort_by_key.cpp | 108 ++++++++--------- test/test_stable_sort.cpp | 83 ++++++------- test/test_stable_sort_by_key.cpp | 110 +++++++++--------- thrust/system/detail/sequential/partition.h | 5 - .../detail/sequential/stable_merge_sort.inl | 18 --- .../detail/sequential/stable_radix_sort.inl | 10 -- 9 files changed, 198 insertions(+), 226 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d05840e4..3c9cc4d05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ Full documentation for rocThrust is available at [https://rocthrust.readthedocs.io/en/latest/](https://rocthrust.readthedocs.io/en/latest/) +## (Unreleased) rocThrust 2.16.0 for ROCm 5.3 +### Changed +- rocThrust functionality dependent on device malloc works is functional as ROCm 5.2 reneabled device malloc. Device launched `thrust::sort` and `thrust::sort_by_key` are available for use. + ## rocThrust 2.15.0 for ROCm 5.2 ### Added - Packages for tests and benchmark executable on all supported OSes using CPack. diff --git a/test/test_partition.cpp b/test/test_partition.cpp index fa4043282..69b903f00 100644 --- a/test/test_partition.cpp +++ b/test/test_partition.cpp @@ -1906,8 +1906,6 @@ TEST(PartitionTests,TestPartitionCopyDevice) } } -//TODO: Tests fails with error message "Memory access fault by GPU node-1 (Agent handle: 0x1ade7d0) on address 0x7fac25a00000. Reason: Page not present or supervisor privilege." - // __global__ // THRUST_HIP_LAUNCH_BOUNDS_DEFAULT // void StablePartitionKernel(int const N, int* array) diff --git a/test/test_sort.cpp b/test/test_sort.cpp index 1e950cf3b..0a7685f01 100644 --- a/test/test_sort.cpp +++ b/test/test_sort.cpp @@ -437,45 +437,47 @@ TEST(SortTests, TestSortBoolDescending) } } +#ifndef _WIN32 //TODO: refactor this test into a different set of tests -// __global__ -// THRUST_HIP_LAUNCH_BOUNDS_DEFAULT -// void SortKernel(int const N, int* array) -// { -// if(threadIdx.x == 0) -// { -// thrust::device_ptr begin(array); -// thrust::device_ptr end(array + N); -// thrust::sort(thrust::hip::par, begin, end); -// } -// } -// -// TEST(SortTests, TestSortDevice) -// { -// SCOPED_TRACE(testing::Message() << "with device_id= " << test::set_device_from_ctest()); -// -// for(auto size : get_sizes() ) -// { -// SCOPED_TRACE(testing::Message() << "with size= " << size); -// -// for(auto seed : get_seeds()) -// { -// SCOPED_TRACE(testing::Message() << "with seed= " << seed); -// -// thrust::host_vector h_data = get_random_data(size, 0, size, seed); -// -// thrust::device_vector d_data = h_data; -// -// thrust::sort(h_data.begin(), h_data.end()); -// hipLaunchKernelGGL(SortKernel, -// dim3(1, 1, 1), -// dim3(128, 1, 1), -// 0, -// 0, -// size, -// thrust::raw_pointer_cast(&d_data[0])); -// -// ASSERT_EQ(h_data, d_data); -// } -// } -// } +__global__ +THRUST_HIP_LAUNCH_BOUNDS_DEFAULT +void SortKernel(int const N, int* array) +{ + if(threadIdx.x == 0) + { + thrust::device_ptr begin(array); + thrust::device_ptr end(array + N); + thrust::sort(thrust::hip::par, begin, end); + } +} + +TEST(SortTests, TestSortDevice) +{ + SCOPED_TRACE(testing::Message() << "with device_id= " << test::set_device_from_ctest()); + + for(auto size : get_sizes() ) + { + SCOPED_TRACE(testing::Message() << "with size= " << size); + + for(auto seed : get_seeds()) + { + SCOPED_TRACE(testing::Message() << "with seed= " << seed); + + thrust::host_vector h_data = get_random_data(size, 0, size, seed); + + thrust::device_vector d_data = h_data; + + thrust::sort(h_data.begin(), h_data.end()); + hipLaunchKernelGGL(SortKernel, + dim3(1, 1, 1), + dim3(128, 1, 1), + 0, + 0, + size, + thrust::raw_pointer_cast(&d_data[0])); + + ASSERT_EQ(h_data, d_data); + } + } +} +#endif diff --git a/test/test_sort_by_key.cpp b/test/test_sort_by_key.cpp index 3172295be..66dc301a9 100644 --- a/test/test_sort_by_key.cpp +++ b/test/test_sort_by_key.cpp @@ -256,57 +256,57 @@ TEST(SortByKeyTests, TestSortByKeyBoolDescending) } } -//TODO: Tests fails with error message "Memory access fault by GPU node-1 (Agent handle: 0x1ade7d0) on address 0x7fac25a00000. Reason: Page not present or supervisor privilege." - -// __global__ -// THRUST_HIP_LAUNCH_BOUNDS_DEFAULT -// void SortByKeyKernel(int const N, int* keys, short* values) -// { -// if(threadIdx.x == 0) -// { -// thrust::device_ptr keys_begin(keys); -// thrust::device_ptr keys_end(keys + N); -// thrust::device_ptr val(values); -// thrust::sort_by_key(thrust::hip::par, keys_begin, keys_end, val); -// } -// } -// -// TEST(SortByKeyTests, TestSortByKeyDevice) -// { -// SCOPED_TRACE(testing::Message() << "with device_id= " << test::set_device_from_ctest()); -// -// for (auto size: get_sizes() ) -// { -// SCOPED_TRACE(testing::Message() << "with size= " << size); -// -// for(auto seed : get_seeds()) -// { -// SCOPED_TRACE(testing::Message() << "with seed= " << seed); -// -// thrust::host_vector h_keys = get_random_data(size, 0, size, seed); -// -// thrust::host_vector h_values -// = get_random_data(size, -// std::numeric_limits::min(), -// std::numeric_limits::max(), -// seed); -// -// thrust::device_vector d_keys = h_keys; -// thrust::device_vector d_values = h_values; -// -// thrust::sort_by_key(h_keys.begin(), h_keys.end(), h_values.begin()); -// hipLaunchKernelGGL(SortByKeyKernel, -// dim3(1, 1, 1), -// dim3(128, 1, 1), -// 0, -// 0, -// size, -// thrust::raw_pointer_cast(&d_keys[0]), -// thrust::raw_pointer_cast(&d_values[0])); -// -// ASSERT_EQ(h_keys, d_keys); -// // Only keys are compared here, the sequential stable_merge_sort that's used in -// // CUDA and HIP don't generate the correct value sorting -// } -// } -// } +#ifndef _WIN32 +__global__ +THRUST_HIP_LAUNCH_BOUNDS_DEFAULT +void SortByKeyKernel(int const N, int* keys, short* values) +{ + if(threadIdx.x == 0) + { + thrust::device_ptr keys_begin(keys); + thrust::device_ptr keys_end(keys + N); + thrust::device_ptr val(values); + thrust::sort_by_key(thrust::hip::par, keys_begin, keys_end, val); + } +} + +TEST(SortByKeyTests, TestSortByKeyDevice) +{ + SCOPED_TRACE(testing::Message() << "with device_id= " << test::set_device_from_ctest()); + + for (auto size: get_sizes() ) + { + SCOPED_TRACE(testing::Message() << "with size= " << size); + + for(auto seed : get_seeds()) + { + SCOPED_TRACE(testing::Message() << "with seed= " << seed); + + thrust::host_vector h_keys = get_random_data(size, 0, size, seed); + + thrust::host_vector h_values + = get_random_data(size, + std::numeric_limits::min(), + std::numeric_limits::max(), + seed); + + thrust::device_vector d_keys = h_keys; + thrust::device_vector d_values = h_values; + + thrust::sort_by_key(h_keys.begin(), h_keys.end(), h_values.begin()); + hipLaunchKernelGGL(SortByKeyKernel, + dim3(1, 1, 1), + dim3(128, 1, 1), + 0, + 0, + size, + thrust::raw_pointer_cast(&d_keys[0]), + thrust::raw_pointer_cast(&d_values[0])); + + ASSERT_EQ(h_keys, d_keys); + // Only keys are compared here, the sequential stable_merge_sort that's used in + // CUDA and HIP don't generate the correct value sorting + } + } +} +#endif diff --git a/test/test_stable_sort.cpp b/test/test_stable_sort.cpp index 221630e94..e7f88c464 100644 --- a/test/test_stable_sort.cpp +++ b/test/test_stable_sort.cpp @@ -189,45 +189,46 @@ TYPED_TEST(StableSortVectorTests, TestStableSortWithIndirection) ASSERT_EQ(data[6], T(2)); } +#ifndef _WIN32 +__global__ +THRUST_HIP_LAUNCH_BOUNDS_DEFAULT +void StableSortKernel(int const N, int* array) +{ + if(threadIdx.x == 0) + { + thrust::device_ptr begin(array); + thrust::device_ptr end(array + N); + thrust::stable_sort(thrust::hip::par, begin, end); + } +} -// __global__ -// THRUST_HIP_LAUNCH_BOUNDS_DEFAULT -// void StableSortKernel(int const N, int* array) -// { -// if(threadIdx.x == 0) -// { -// thrust::device_ptr begin(array); -// thrust::device_ptr end(array + N); -// thrust::stable_sort(thrust::hip::par, begin, end); -// } -// } -// -// TEST(StableSortTests, TestStableSortDevice) -// { -// SCOPED_TRACE(testing::Message() << "with device_id= " << test::set_device_from_ctest()); -// -// for(auto size : get_sizes() ) -// { -// SCOPED_TRACE(testing::Message() << "with size= " << size); -// -// for(auto seed : get_seeds()) -// { -// SCOPED_TRACE(testing::Message() << "with seed= " << seed); -// -// thrust::host_vector h_data = get_random_data(size, 0, size, seed); -// -// thrust::device_vector d_data = h_data; -// -// thrust::stable_sort(h_data.begin(), h_data.end()); -// hipLaunchKernelGGL(StableSortKernel, -// dim3(1, 1, 1), -// dim3(128, 1, 1), -// 0, -// 0, -// size, -// thrust::raw_pointer_cast(&d_data[0])); -// -// ASSERT_EQ(h_data, d_data); -// } -// } -// } +TEST(StableSortTests, TestStableSortDevice) +{ + SCOPED_TRACE(testing::Message() << "with device_id= " << test::set_device_from_ctest()); + + for(auto size : get_sizes() ) + { + SCOPED_TRACE(testing::Message() << "with size= " << size); + + for(auto seed : get_seeds()) + { + SCOPED_TRACE(testing::Message() << "with seed= " << seed); + + thrust::host_vector h_data = get_random_data(size, 0, size, seed); + + thrust::device_vector d_data = h_data; + + thrust::stable_sort(h_data.begin(), h_data.end()); + hipLaunchKernelGGL(StableSortKernel, + dim3(1, 1, 1), + dim3(128, 1, 1), + 0, + 0, + size, + thrust::raw_pointer_cast(&d_data[0])); + + ASSERT_EQ(h_data, d_data); + } + } +} +#endif diff --git a/test/test_stable_sort_by_key.cpp b/test/test_stable_sort_by_key.cpp index 653f75b9b..4843c3978 100644 --- a/test/test_stable_sort_by_key.cpp +++ b/test/test_stable_sort_by_key.cpp @@ -181,58 +181,58 @@ TYPED_TEST(StableSortByKeyVectorPrimitiveTests, TestStableSortByKey) } } -//TODO: Tests fails with error message "Memory access fault by GPU node-1 (Agent handle: 0x1ade7d0) on address 0x7fac25a00000. Reason: Page not present or supervisor privilege." - -// __global__ -// THRUST_HIP_LAUNCH_BOUNDS_DEFAULT -// void StableSortByKeyKernel(int const N, int* keys, short* values) -// { -// if(threadIdx.x == 0) -// { -// thrust::device_ptr keys_begin(keys); -// thrust::device_ptr keys_end(keys + N); -// thrust::device_ptr val(values); -// //TODO: The thrust::hip::par throw exception, we should fix it -// thrust::stable_sort_by_key(thrust::hip::par, keys_begin, keys_end, val); -// } -// } -// -// TEST(StableSortByKeyTests, TestStableSortByKeyDevice) -// { -// SCOPED_TRACE(testing::Message() << "with device_id= " << test::set_device_from_ctest()); -// -// for (auto size: get_sizes() ) -// { -// SCOPED_TRACE(testing::Message() << "with size= " << size); -// -// for(auto seed : get_seeds()) -// { -// SCOPED_TRACE(testing::Message() << "with seed= " << seed); -// -// thrust::host_vector h_keys = get_random_data(size, 0, size, seed); -// -// thrust::host_vector h_values -// = get_random_data(size, -// std::numeric_limits::min(), -// std::numeric_limits::max(), -// seed); -// -// thrust::device_vector d_keys = h_keys; -// thrust::device_vector d_values = h_values; -// -// thrust::stable_sort_by_key(h_keys.begin(), h_keys.end(), h_values.begin()); -// hipLaunchKernelGGL(StableSortByKeyKernel, -// dim3(1, 1, 1), -// dim3(128, 1, 1), -// 0, -// 0, -// size, -// thrust::raw_pointer_cast(&d_keys[0]), -// thrust::raw_pointer_cast(&d_values[0])); -// -// ASSERT_EQ(h_keys, d_keys); -// // Only keys are compared here, the sequential stable_merge_sort that's used in -// // CUDA and HIP don't generate the correct value sorting -// } -// } -// } +#ifndef _WIN32 +__global__ +THRUST_HIP_LAUNCH_BOUNDS_DEFAULT +void StableSortByKeyKernel(int const N, int* keys, short* values) +{ + if(threadIdx.x == 0) + { + thrust::device_ptr keys_begin(keys); + thrust::device_ptr keys_end(keys + N); + thrust::device_ptr val(values); + //TODO: The thrust::hip::par throw exception, we should fix it + thrust::stable_sort_by_key(thrust::hip::par, keys_begin, keys_end, val); + } +} + +TEST(StableSortByKeyTests, TestStableSortByKeyDevice) +{ + SCOPED_TRACE(testing::Message() << "with device_id= " << test::set_device_from_ctest()); + + for (auto size: get_sizes() ) + { + SCOPED_TRACE(testing::Message() << "with size= " << size); + + for(auto seed : get_seeds()) + { + SCOPED_TRACE(testing::Message() << "with seed= " << seed); + + thrust::host_vector h_keys = get_random_data(size, 0, size, seed); + + thrust::host_vector h_values + = get_random_data(size, + std::numeric_limits::min(), + std::numeric_limits::max(), + seed); + + thrust::device_vector d_keys = h_keys; + thrust::device_vector d_values = h_values; + + thrust::stable_sort_by_key(h_keys.begin(), h_keys.end(), h_values.begin()); + hipLaunchKernelGGL(StableSortByKeyKernel, + dim3(1, 1, 1), + dim3(128, 1, 1), + 0, + 0, + size, + thrust::raw_pointer_cast(&d_keys[0]), + thrust::raw_pointer_cast(&d_values[0])); + + ASSERT_EQ(h_keys, d_keys); + // Only keys are compared here, the sequential stable_merge_sort that's used in + // CUDA and HIP don't generate the correct value sorting + } + } +} +#endif diff --git a/thrust/system/detail/sequential/partition.h b/thrust/system/detail/sequential/partition.h index 0443306c7..730aa1596 100644 --- a/thrust/system/detail/sequential/partition.h +++ b/thrust/system/detail/sequential/partition.h @@ -176,11 +176,6 @@ __host__ __device__ typedef thrust::detail::temporary_array TempRange; typedef typename TempRange::iterator TempIterator; - #ifdef __HIP_DEVICE_COMPILE__ - THRUST_HIP_PRINTF("Abort: Stable Partition is currently disabled for memory objects allocated on the device. HIP malloc does not support device side memory allocation.\n"); - abort(); - #endif - TempRange temp(exec, first, last); for(TempIterator iter = temp.begin(); iter != temp.end(); ++iter) diff --git a/thrust/system/detail/sequential/stable_merge_sort.inl b/thrust/system/detail/sequential/stable_merge_sort.inl index df5be051e..bfeafc9e0 100644 --- a/thrust/system/detail/sequential/stable_merge_sort.inl +++ b/thrust/system/detail/sequential/stable_merge_sort.inl @@ -46,11 +46,6 @@ void inplace_merge(sequential::execution_policy &exec, { typedef typename thrust::iterator_value::type value_type; - #ifdef __HIP_DEVICE_COMPILE__ - THRUST_HIP_PRINTF("Abort: Inplace Merge is currently disabled for memory objects allocated on the device. HIP malloc does not support device side memory allocation.\n"); - abort(); - #endif - thrust::detail::temporary_array a(exec, first, middle); thrust::detail::temporary_array b(exec, middle, last); @@ -76,11 +71,6 @@ void inplace_merge_by_key(sequential::execution_policy &exec, RandomAccessIterator2 middle2 = first2 + (middle1 - first1); RandomAccessIterator2 last2 = first2 + (last1 - first1); - #ifdef __HIP_DEVICE_COMPILE__ - THRUST_HIP_PRINTF("Abort: Inplace Merge is currently disabled for memory objects allocated on the device. HIP malloc does not support device side memory allocation.\n"); - abort(); - #endif - thrust::detail::temporary_array lhs1(exec, first1, middle1); thrust::detail::temporary_array rhs1(exec, middle1, last1); thrust::detail::temporary_array lhs2(exec, first2, middle2); @@ -229,10 +219,6 @@ void iterative_stable_merge_sort(sequential::execution_policy &ex } else { - #ifdef __HIP_DEVICE_COMPILE__ - THRUST_HIP_PRINTF("Abort: Iterative Stable Merge Sort is currently disabled for memory objects allocated on the device. HIP malloc does not support device side memory allocation.\n"); - abort(); - #endif thrust::detail::temporary_array temp(exec, n); // insertion sort each 32 element partition @@ -291,10 +277,6 @@ void iterative_stable_merge_sort_by_key(sequential::execution_policy keys_temp(exec, n); thrust::detail::temporary_array values_temp(exec, n); diff --git a/thrust/system/detail/sequential/stable_radix_sort.inl b/thrust/system/detail/sequential/stable_radix_sort.inl index 7ce3f075d..8953ce55d 100644 --- a/thrust/system/detail/sequential/stable_radix_sort.inl +++ b/thrust/system/detail/sequential/stable_radix_sort.inl @@ -561,11 +561,6 @@ void stable_radix_sort(sequential::execution_policy &exec, size_t N = last - first; - #ifdef __HIP_DEVICE_COMPILE__ - THRUST_HIP_PRINTF("Abort: Stable Radix Sort is currently disabled for memory objects allocated on the device. HIP malloc does not support device side memory allocation.\n"); - abort(); - #endif - thrust::detail::temporary_array temp(exec, N); radix_sort_detail::radix_sort(exec, first, temp.begin(), N); @@ -586,11 +581,6 @@ void stable_radix_sort_by_key(sequential::execution_policy &exec, size_t N = last1 - first1; - #ifdef __HIP_DEVICE_COMPILE__ - THRUST_HIP_PRINTF("Abort: Stable Radix Sort by Key is currently disabled for memory objects allocated on the device. HIP malloc does not support device side memory allocation.\n"); - abort(); - #endif - thrust::detail::temporary_array temp1(exec, N); thrust::detail::temporary_array temp2(exec, N); From 767898e523d6a5a9ecb7f164734f704476e79569 Mon Sep 17 00:00:00 2001 From: arvindcheru <90783369+arvindcheru@users.noreply.github.com> Date: Tue, 2 Aug 2022 23:06:12 -0400 Subject: [PATCH 3/3] HIP Bin prefix path updated to rocm_path as per reorg structure (#246) --- toolchain-linux.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/toolchain-linux.cmake b/toolchain-linux.cmake index 15d356c23..9fc49aede 100644 --- a/toolchain-linux.cmake +++ b/toolchain-linux.cmake @@ -4,9 +4,9 @@ #set(CMAKE_GENERATOR_PLATFORM x64) if (DEFINED ENV{ROCM_PATH}) - set(rocm_bin "$ENV{ROCM_PATH}/hip/bin") + set(rocm_bin "$ENV{ROCM_PATH}/bin") else() - set(rocm_bin "/opt/rocm/hip/bin") + set(rocm_bin "/opt/rocm/bin") endif()