diff --git a/lapack/src/KokkosLapack_gesv.hpp b/lapack/src/KokkosLapack_gesv.hpp index 281d6a5651..1b183981fe 100644 --- a/lapack/src/KokkosLapack_gesv.hpp +++ b/lapack/src/KokkosLapack_gesv.hpp @@ -63,9 +63,15 @@ void gesv(const ExecutionSpace& space, const AMatrix& A, const BXMV& B, const IP static_assert(Kokkos::SpaceAccessibility::accessible); static_assert(Kokkos::SpaceAccessibility::accessible); #if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) +#if defined(KOKKOS_ENABLE_CUDA) if constexpr (!std::is_same_v) { static_assert(Kokkos::SpaceAccessibility::accessible); } +#elif defined(KOKKOS_ENABLE_HIP) + if constexpr (!std::is_same_v) { + static_assert(Kokkos::SpaceAccessibility::accessible); + } +#endif #else static_assert(Kokkos::SpaceAccessibility::accessible); #endif @@ -96,6 +102,7 @@ void gesv(const ExecutionSpace& space, const AMatrix& A, const BXMV& B, const IP // Check for no pivoting case. Only MAGMA supports no pivoting interface #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL +#if defined(KOKKOS_ENABLE_CUDA) if ((!std::is_same::value) && (IPIV0 == 0) && (IPIV.data() == nullptr)) { std::ostringstream os; @@ -103,6 +110,15 @@ void gesv(const ExecutionSpace& space, const AMatrix& A, const BXMV& B, const IP << "LAPACK TPL does not support no pivoting."; KokkosKernels::Impl::throw_runtime_exception(os.str()); } +#elif defined(KOKKOS_ENABLE_HIP) + if ((!std::is_same::value) && (IPIV0 == 0) && + (IPIV.data() == nullptr)) { + std::ostringstream os; + os << "KokkosLapack::gesv: IPIV: " << IPIV0 << ". " + << "LAPACK TPL does not support no pivoting."; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } +#endif #endif #else // not have MAGMA TPL #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // but have LAPACK TPL diff --git a/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp index 472b79ce85..50a6863b80 100644 --- a/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp @@ -52,23 +52,28 @@ KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLe namespace KokkosLapack { namespace Impl { -#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(SCALAR, LAYOUT, MEMSPACE) \ - template <> \ - struct gesv_tpl_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct gesv_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; - -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#if defined(KOKKOS_ENABLE_CUDA) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +#endif +#if defined(KOKKOS_ENABLE_HIP) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +#endif } // namespace Impl } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA diff --git a/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp index 559f5d0509..06bc012570 100644 --- a/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp @@ -197,42 +197,48 @@ void magmaGesvWrapper(const ExecSpace& space, const AViewType& A, const BViewTyp Kokkos::Profiling::popRegion(); } -#define KOKKOSLAPACK_GESV_MAGMA(SCALAR, LAYOUT, MEM_SPACE) \ - template <> \ - struct GESV, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ - gesv_eti_spec_avail, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using BViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using PViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void gesv(const Kokkos::Cuda& space, const AViewType& A, const BViewType& B, const PViewType& IPIV) { \ - magmaGesvWrapper(space, A, B, IPIV); \ - } \ +#define KOKKOSLAPACK_GESV_MAGMA(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ + template <> \ + struct GESV< \ + EXEC_SPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + gesv_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using BViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void gesv(const EXEC_SPACE& space, const AViewType& A, const BViewType& B, const PViewType& IPIV) { \ + magmaGesvWrapper(space, A, B, IPIV); \ + } \ }; -KOKKOSLAPACK_GESV_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) - +#if defined(KOKKOS_ENABLE_CUDA) +KOKKOSLAPACK_GESV_MAGMA(float, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_MAGMA(double, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace) +#endif +#if defined(KOKKOS_ENABLE_HIP) +KOKKOSLAPACK_GESV_MAGMA(float, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSLAPACK_GESV_MAGMA(double, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSLAPACK_GESV_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSLAPACK_GESV_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) +#endif } // namespace Impl } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA diff --git a/lapack/unit_test/Test_Lapack_gesv.hpp b/lapack/unit_test/Test_Lapack_gesv.hpp index 653ed2cbf2..fb3f371927 100644 --- a/lapack/unit_test/Test_Lapack_gesv.hpp +++ b/lapack/unit_test/Test_Lapack_gesv.hpp @@ -15,11 +15,12 @@ //@HEADER // only enable this test where KokkosLapack supports gesv: -// CUDA+(MAGMA or CUSOLVER), HIP+ROCSOLVER and HOST+LAPACK -#if (defined(TEST_CUDA_LAPACK_CPP) && \ - (defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) || defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER))) || \ - (defined(TEST_HIP_LAPACK_CPP) && defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ - (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ +// CUDA+(MAGMA or CUSOLVER), HIP+(MAGMA or ROCSOLVER) and HOST+LAPACK +#if (defined(TEST_CUDA_LAPACK_CPP) && \ + (defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) || defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER))) || \ + (defined(TEST_HIP_LAPACK_CPP) && \ + (defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) || defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER))) || \ + (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || defined(TEST_THREADS_LAPACK_CPP))) #include @@ -97,8 +98,13 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { bool notpl_runtime_err = false; #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL +#if defined(KOKKOS_ENABLE_CUDA) nopivot_runtime_err = (!std::is_same::value) && (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); +#elif defined(KOKKOS_ENABLE_HIP) + nopivot_runtime_err = (!std::is_same::value) && + (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); +#endif notpl_runtime_err = false; #else notpl_runtime_err = true; @@ -200,8 +206,13 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, int nrhs) bool notpl_runtime_err = false; #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL +#if defined(KOKKOS_ENABLE_CUDA) nopivot_runtime_err = (!std::is_same::value) && (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); +#elif defined(KOKKOS_ENABLE_HIP) + nopivot_runtime_err = (!std::is_same::value) && + (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); +#endif notpl_runtime_err = false; #else notpl_runtime_err = true; @@ -222,9 +233,9 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, int nrhs) // Get the solution vector. Kokkos::deep_copy(h_B, B); - // Checking vs ref on CPU, this eps is about 10^-9 + // Checking vs ref on CPU, this eps is about 10^-8 typedef typename ats::mag_type mag_type; - const mag_type eps = 1.0e7 * ats::epsilon(); + const mag_type eps = 1.0e8 * ats::epsilon(); bool test_flag = true; for (int j = 0; j < nrhs; j++) { for (int i = 0; i < N; i++) { @@ -268,6 +279,19 @@ int test_gesv(const char* mode) { Test::impl_test_gesv(&mode[0], "N", 64); // no padding Test::impl_test_gesv(&mode[0], "N", 1024); // no padding + Test::impl_test_gesv(&mode[0], "Y", + 13); // padding + Test::impl_test_gesv(&mode[0], "Y", + 179); // padding + } +#elif defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && defined(KOKKOS_ENABLE_HIP) + if constexpr (std::is_same_v) { + Test::impl_test_gesv(&mode[0], "N", 2); // no padding + Test::impl_test_gesv(&mode[0], "N", 13); // no padding + Test::impl_test_gesv(&mode[0], "N", 179); // no padding + Test::impl_test_gesv(&mode[0], "N", 64); // no padding + Test::impl_test_gesv(&mode[0], "N", 1024); // no padding + Test::impl_test_gesv(&mode[0], "Y", 13); // padding Test::impl_test_gesv(&mode[0], "Y", @@ -307,6 +331,17 @@ int test_gesv_mrhs(const char* mode) { Test::impl_test_gesv_mrhs(&mode[0], "N", 64, 5); // no padding Test::impl_test_gesv_mrhs(&mode[0], "N", 1024, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "Y", 13, 5); // padding + Test::impl_test_gesv_mrhs(&mode[0], "Y", 179, 5); // padding + } +#elif defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && defined(KOKKOS_ENABLE_HIP) + if constexpr (std::is_same_v) { + Test::impl_test_gesv_mrhs(&mode[0], "N", 2, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "N", 13, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "N", 179, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "N", 64, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "N", 1024, 5); // no padding + Test::impl_test_gesv_mrhs(&mode[0], "Y", 13, 5); // padding Test::impl_test_gesv_mrhs(&mode[0], "Y", 179, 5); // padding }