diff --git a/c2h/generators.cu b/c2h/generators.cu index 8044eabe6fe..4abdfe896ea 100644 --- a/c2h/generators.cu +++ b/c2h/generators.cu @@ -478,15 +478,15 @@ template void init_key_segments(const c2h::device_vector& segment_offsets, float* out, std::size_t element_size); template void init_key_segments( const c2h::device_vector& segment_offsets, custom_type_state_t* out, std::size_t element_size); -#ifdef _CCCL_HAS_NVFP16 +#ifdef TEST_HALF_T template void init_key_segments(const c2h::device_vector& segment_offsets, half_t* out, std::size_t element_size); -#endif // _CCCL_HAS_NVFP16 +#endif // TEST_HALF_T -#ifdef _CCCL_HAS_NVBF16 +#ifdef TEST_BF_T template void init_key_segments(const c2h::device_vector& segment_offsets, bfloat16_t* out, std::size_t element_size); -#endif // _CCCL_HAS_NVBF16 +#endif // TEST_BF_T } // namespace detail template @@ -552,15 +552,15 @@ INSTANTIATE(double); INSTANTIATE(bool); INSTANTIATE(char); -#ifdef _CCCL_HAS_NVFP16 +#ifdef TEST_HALF_T INSTANTIATE(half_t); INSTANTIATE(__half); -#endif // _CCCL_HAS_NVFP16 +#endif // TEST_HALF_T -#ifdef _CCCL_HAS_NVBF16 +#ifdef TEST_BF_T INSTANTIATE(bfloat16_t); INSTANTIATE(__nv_bfloat16); -#endif // _CCCL_HAS_NVBF16 +#endif // TEST_BF_T #undef INSTANTIATE_RND #undef INSTANTIATE_MOD diff --git a/c2h/include/c2h/extended_types.h b/c2h/include/c2h/extended_types.h index d8e53acd71e..753e225f15a 100644 --- a/c2h/include/c2h/extended_types.h +++ b/c2h/include/c2h/extended_types.h @@ -30,11 +30,15 @@ #include #ifndef TEST_HALF_T -# define TEST_HALF_T _CCCL_HAS_NVFP16 +# if defined(_CCCL_HAS_NVFP16) && defined(_LIBCUDACXX_HAS_NVFP16) +# define TEST_HALF_T +# endif #endif #ifndef TEST_BF_T -# define TEST_BF_T _CCCL_HAS_NVBF16 +# if defined(_CCCL_HAS_NVBF16) && defined(_LIBCUDACXX_HAS_NVBF16) +# define TEST_BF_T +# endif #endif #ifdef TEST_HALF_T diff --git a/cub/test/catch2_segmented_sort_helper.cuh b/cub/test/catch2_segmented_sort_helper.cuh index f8a081a125a..5f8b2b09c62 100644 --- a/cub/test/catch2_segmented_sort_helper.cuh +++ b/cub/test/catch2_segmented_sort_helper.cuh @@ -265,7 +265,7 @@ struct unwrap_value_t_impl using type = T; }; -#if TEST_HALF_T +#ifdef TEST_HALF_T template <> struct unwrap_value_t_impl { @@ -273,7 +273,7 @@ struct unwrap_value_t_impl }; #endif -#if TEST_BF_T +#ifdef TEST_BF_T template <> struct unwrap_value_t_impl { diff --git a/cub/test/catch2_test_device_histogram.cu b/cub/test/catch2_test_device_histogram.cu index e258fb4bcb6..2ed5d621661 100644 --- a/cub/test/catch2_test_device_histogram.cu +++ b/cub/test/catch2_test_device_histogram.cu @@ -69,7 +69,7 @@ auto cast_if_half_pointer(T* p) -> T* return p; } -#if TEST_HALF_T +#ifdef TEST_HALF_T auto cast_if_half_pointer(half_t* p) -> __half* { return reinterpret_cast<__half*>(p); @@ -412,7 +412,7 @@ using types = std::uint32_t, std::int64_t, std::uint64_t, -#if TEST_HALF_T +#ifdef TEST_HALF_T half_t, #endif float, diff --git a/cub/test/catch2_test_device_reduce.cu b/cub/test/catch2_test_device_reduce.cu index d6cfdb11521..fb046a3c7d5 100644 --- a/cub/test/catch2_test_device_reduce.cu +++ b/cub/test/catch2_test_device_reduce.cu @@ -77,14 +77,13 @@ using full_type_list = c2h::type_list, type_pair>; // clang-format off using full_type_list = c2h::type_list< type_pair -#if TEST_HALF_T -, type_pair // testing half -#endif -#if TEST_BF_T -, type_pair // testing bf16 - +#ifdef TEST_HALF_T +, type_pair +#endif // TEST_HALF_T +#ifdef TEST_BF_T +, type_pair +#endif // TEST_BF_T >; -#endif // clang-format on #elif TEST_TYPES == 4 // DPX SIMD instructions diff --git a/cub/test/catch2_test_device_reduce.cuh b/cub/test/catch2_test_device_reduce.cuh index ed9a806d19e..6fbdfa9b4c5 100644 --- a/cub/test/catch2_test_device_reduce.cuh +++ b/cub/test/catch2_test_device_reduce.cuh @@ -46,7 +46,7 @@ #include #include -#if TEST_HALF_T +#ifdef TEST_HALF_T // Half support is provided by SM53+. We currently test against a few older architectures. // The specializations below can be removed once we drop these architectures. @@ -107,8 +107,13 @@ __host__ __device__ __forceinline__ // return a; } + +CUB_NAMESPACE_END + #endif // TEST_HALF_T +CUB_NAMESPACE_BEGIN + /** * @brief Introduces the required NumericTraits for `c2h::custom_type_t`. */ @@ -173,7 +178,7 @@ struct ExtendedFloatSum return result; } -#if TEST_HALF_T +#ifdef TEST_HALF_T __host__ __device__ __half operator()(__half a, __half b) const { uint16_t result = this->operator()(half_t{a}, half_t(b)).raw(); @@ -181,7 +186,7 @@ struct ExtendedFloatSum } #endif -#if TEST_BF_T +#ifdef TEST_BF_T __device__ __nv_bfloat16 operator()(__nv_bfloat16 a, __nv_bfloat16 b) const { uint16_t result = this->operator()(bfloat16_t{a}, bfloat16_t(b)).raw(); @@ -196,7 +201,7 @@ inline It unwrap_it(It it) return it; } -#if TEST_HALF_T +#ifdef TEST_HALF_T inline __half* unwrap_it(half_t* it) { return reinterpret_cast<__half*>(it); @@ -211,7 +216,7 @@ inline thrust::constant_iterator<__half, OffsetT> unwrap_it(thrust::constant_ite } #endif -#if TEST_BF_T +#ifdef TEST_BF_T inline __nv_bfloat16* unwrap_it(bfloat16_t* it) { return reinterpret_cast<__nv_bfloat16*>(it); diff --git a/cub/test/catch2_test_device_reduce_by_key.cu b/cub/test/catch2_test_device_reduce_by_key.cu index ee8726219f2..6d21e9c64f1 100644 --- a/cub/test/catch2_test_device_reduce_by_key.cu +++ b/cub/test/catch2_test_device_reduce_by_key.cu @@ -58,10 +58,10 @@ using full_type_list = c2h::type_list, typ // clang-format off using full_type_list = c2h::type_list< type_triple -#if TEST_HALF_T +#ifdef TEST_HALF_T , type_triple // testing half #endif -#if TEST_BF_T +#ifdef TEST_BF_T , type_triple // testing bf16 #endif >; diff --git a/cub/test/catch2_test_device_scan.cu b/cub/test/catch2_test_device_scan.cu index d9cf517f55d..4b2e5820d27 100644 --- a/cub/test/catch2_test_device_scan.cu +++ b/cub/test/catch2_test_device_scan.cu @@ -64,10 +64,10 @@ using full_type_list = c2h::type_list, type_pair>; // clang-format off using full_type_list = c2h::type_list< type_pair -#if TEST_HALF_T +#ifdef TEST_HALF_T , type_pair // testing half #endif -#if TEST_BF_T +#ifdef TEST_BF_T , type_pair // testing bf16 #endif >; diff --git a/cub/test/catch2_test_device_scan_by_key.cu b/cub/test/catch2_test_device_scan_by_key.cu index 6942d22d09e..6b6b0e9ef7a 100644 --- a/cub/test/catch2_test_device_scan_by_key.cu +++ b/cub/test/catch2_test_device_scan_by_key.cu @@ -69,10 +69,10 @@ using full_type_list = // clang-format off using full_type_list = c2h::type_list< type_quad -#if TEST_HALF_T +#ifdef TEST_HALF_T , type_quad // testing half #endif -#if TEST_BF_T +#ifdef TEST_BF_T , type_quad // testing bf16 #endif >; diff --git a/cub/test/catch2_test_device_segmented_reduce.cu b/cub/test/catch2_test_device_segmented_reduce.cu index c524a7ef753..5fd75b15248 100644 --- a/cub/test/catch2_test_device_segmented_reduce.cu +++ b/cub/test/catch2_test_device_segmented_reduce.cu @@ -65,10 +65,10 @@ using full_type_list = c2h::type_list, type_pair>; // clang-format off using full_type_list = c2h::type_list< type_pair -#if TEST_HALF_T +#ifdef TEST_HALF_T , type_pair // testing half #endif -#if TEST_BF_T +#ifdef TEST_BF_T , type_pair // testing bf16 #endif >; diff --git a/cub/test/catch2_test_device_segmented_sort_keys.cu b/cub/test/catch2_test_device_segmented_sort_keys.cu index 3d392e8e8f6..a4d27d7df30 100644 --- a/cub/test/catch2_test_device_segmented_sort_keys.cu +++ b/cub/test/catch2_test_device_segmented_sort_keys.cu @@ -28,6 +28,7 @@ #include "insert_nested_NVTX_range_guard.h" // above header needs to be included first #include +#include #include "catch2_radix_sort_helper.cuh" #include "catch2_segmented_sort_helper.cuh" @@ -37,17 +38,23 @@ // graph launch. // %PARAM% TEST_LAUNCH lid 0:1 +static_assert(::cuda::std::__is_extended_floating_point<__half>::value); +static_assert(::cuda::is_floating_point_v<__half>); + +cub::Twiddle<__half>::UnsignedBits a; +cub::Twiddle::UnsignedBits b; + DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedSort::StableSortKeys, stable_sort_keys); using key_types = c2h::type_list, c2h::type_list, c2h::type_list -#if TEST_HALF_T +#ifdef TEST_HALF_T , c2h::type_list #endif -#if TEST_BF_T +#ifdef TEST_BF_T , c2h::type_list #endif