diff --git a/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Internal.hpp b/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Internal.hpp index 574280c82b..6c8178b5cd 100644 --- a/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Internal.hpp +++ b/batched/dense/impl/KokkosBatched_ApplyHouseholder_Serial_Internal.hpp @@ -34,8 +34,8 @@ struct SerialApplyLeftHouseholderInternal { /* */ ValueType* u2, const int u2s, /* */ ValueType* a1t, const int a1ts, /* */ ValueType* A2, const int as0, const int as1, - /* */ ValueType* w1t) { - typedef ValueType value_type; + /* */ ValueType* w1t, const int ws0) { + using value_type = ValueType; /// u2 m x 1 /// a1t 1 x n @@ -54,15 +54,15 @@ struct SerialApplyLeftHouseholderInternal { for (int j = 0; j < n; ++j) { value_type tmp = a1t[j * a1ts]; for (int i = 0; i < m; ++i) tmp += Kokkos::ArithTraits::conj(u2[i * u2s]) * A2[i * as0 + j * as1]; - w1t[j] = tmp * inv_tau; // /= (*tau); + w1t[j * ws0] = tmp * inv_tau; // /= (*tau); } // a1t -= w1t (axpy) - for (int j = 0; j < n; ++j) a1t[j * a1ts] -= w1t[j]; + for (int j = 0; j < n; ++j) a1t[j * a1ts] -= w1t[j * ws0]; // A2 -= u2 w1t (ger) for (int j = 0; j < n; ++j) - for (int i = 0; i < m; ++i) A2[i * as0 + j * as1] -= u2[i * u2s] * w1t[j]; + for (int i = 0; i < m; ++i) A2[i * as0 + j * as1] -= u2[i * u2s] * w1t[j * ws0]; return 0; } @@ -74,8 +74,8 @@ struct SerialApplyRightHouseholderInternal { /* */ ValueType* u2, const int u2s, /* */ ValueType* a1, const int a1s, /* */ ValueType* A2, const int as0, const int as1, - /* */ ValueType* w1) { - typedef ValueType value_type; + /* */ ValueType* w1, const int ws0) { + using value_type = ValueType; /// u2 n x 1 /// a1 m x 1 /// A2 m x n @@ -93,15 +93,15 @@ struct SerialApplyRightHouseholderInternal { for (int i = 0; i < m; ++i) { value_type tmp = a1[i * a1s]; for (int j = 0; j < n; ++j) tmp += A2[i * as0 + j * as1] * u2[j * u2s]; - w1[i] = tmp * inv_tau; // \= (*tau); + w1[i * ws0] = tmp * inv_tau; // \= (*tau); } // a1 -= w1 (axpy) - for (int i = 0; i < m; ++i) a1[i * a1s] -= w1[i]; + for (int i = 0; i < m; ++i) a1[i * a1s] -= w1[i * ws0]; // A2 -= w1 * u2' (ger with conjugate) for (int j = 0; j < n; ++j) - for (int i = 0; i < m; ++i) A2[i * as0 + j * as1] -= w1[i] * Kokkos::ArithTraits::conj(u2[j * u2s]); + for (int i = 0; i < m; ++i) A2[i * as0 + j * as1] -= w1[i * ws0] * Kokkos::ArithTraits::conj(u2[j * u2s]); return 0; } diff --git a/batched/dense/impl/KokkosBatched_ApplyQ_Serial_Impl.hpp b/batched/dense/impl/KokkosBatched_ApplyQ_Serial_Impl.hpp index f1f2d29089..02335f319f 100644 --- a/batched/dense/impl/KokkosBatched_ApplyQ_Serial_Impl.hpp +++ b/batched/dense/impl/KokkosBatched_ApplyQ_Serial_Impl.hpp @@ -33,7 +33,7 @@ KOKKOS_INLINE_FUNCTION int SerialApplyQ @@ -42,7 +42,7 @@ KOKKOS_INLINE_FUNCTION int SerialApplyQ @@ -51,7 +51,7 @@ KOKKOS_INLINE_FUNCTION int SerialApplyQ KOKKOS_INLINE_FUNCTION int SerialQR::invoke(const AViewType &A, const tViewType &t, const wViewType &w) { return SerialQR_Internal::invoke(A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), t.data(), - t.stride_0(), w.data()); + t.stride_0(), w.data(), w.stride_0()); } } // namespace KokkosBatched diff --git a/batched/dense/impl/KokkosBatched_QR_Serial_Internal.hpp b/batched/dense/impl/KokkosBatched_QR_Serial_Internal.hpp index 851bd85ab9..04eda350f8 100644 --- a/batched/dense/impl/KokkosBatched_QR_Serial_Internal.hpp +++ b/batched/dense/impl/KokkosBatched_QR_Serial_Internal.hpp @@ -36,7 +36,7 @@ struct SerialQR_Internal { const int n, // n = NumCols(A) /* */ ValueType *A, const int as0, const int as1, /* */ ValueType *t, const int ts, - /* */ ValueType *w) { + /* */ ValueType *w, const int ws) { typedef ValueType value_type; /// Given a matrix A, it computes QR decomposition of the matrix @@ -69,7 +69,7 @@ struct SerialQR_Internal { // left apply householder to A22 SerialApplyLeftHouseholderInternal::invoke(m_A22, n_A22, tau, A_part3x3.A21, as0, A_part3x3.A12, as1, - A_part3x3.A22, as0, as1, w); + A_part3x3.A22, as0, as1, w, ws); /// ----------------------------------------------------- A_part2x2.mergeToATL(A_part3x3); t_part2x1.mergeToAT(t_part3x1); diff --git a/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp b/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp index 56a5619e06..108c0223f1 100644 --- a/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp +++ b/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp @@ -176,11 +176,11 @@ struct SerialSVDInternal { if (n - i > 1) { KokkosBatched::SerialApplyLeftHouseholderInternal::invoke( m - i - 1, n - i - 1, &tau, &SVDIND(A, i + 1, i), As0, &SVDIND(A, i, i + 1), As1, &SVDIND(A, i + 1, i + 1), - As0, As1, work); + As0, As1, work, 1); } if (U) { KokkosBatched::SerialApplyRightHouseholderInternal::invoke( - m, m - i - 1, &tau, &SVDIND(A, i + 1, i), As0, &SVDIND(U, 0, i), Us0, &SVDIND(U, 0, i + 1), Us0, Us1, work); + m, m - i - 1, &tau, &SVDIND(A, i + 1, i), As0, &SVDIND(U, 0, i), Us0, &SVDIND(U, 0, i + 1), Us0, Us1, work, 1); } // Zero out A subdiag explicitly (NOTE: may not be necessary...) for (int j = i + 1; j < m; j++) { @@ -193,12 +193,12 @@ struct SerialSVDInternal { if (m - i > 1) { KokkosBatched::SerialApplyRightHouseholderInternal::invoke( m - i - 1, n - i - 2, &tau, &SVDIND(A, i, i + 2), As1, &SVDIND(A, i + 1, i + 1), As0, - &SVDIND(A, i + 1, i + 2), As0, As1, work); + &SVDIND(A, i + 1, i + 2), As0, As1, work, 1); } if (Vt) { KokkosBatched::SerialApplyLeftHouseholderInternal::invoke( n - i - 2, n, &tau, &SVDIND(A, i, i + 2), As1, &SVDIND(Vt, i + 1, 0), Vts1, &SVDIND(Vt, i + 2, 0), Vts0, - Vts1, work); + Vts1, work, 1); } // Zero out A superdiag row explicitly for (int j = i + 2; j < n; j++) { diff --git a/batched/dense/src/KokkosBatched_ApplyQ_Decl.hpp b/batched/dense/src/KokkosBatched_ApplyQ_Decl.hpp index 9d17a8b435..589ca192dd 100644 --- a/batched/dense/src/KokkosBatched_ApplyQ_Decl.hpp +++ b/batched/dense/src/KokkosBatched_ApplyQ_Decl.hpp @@ -64,11 +64,11 @@ struct ApplyQ { KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const tViewType &t, const BViewType &B, const wViewType &w) { int r_val = 0; - if (std::is_same::value) { + if constexpr (std::is_same_v) { r_val = SerialApplyQ::invoke(A, t, B, w); - } else if (std::is_same::value) { + } else if constexpr (std::is_same_v) { r_val = TeamApplyQ::invoke(member, A, t, B, w); - } else if (std::is_same::value) { + } else if constexpr (std::is_same_v) { r_val = TeamVectorApplyQ::invoke(member, A, t, B, w); } return r_val; diff --git a/batched/dense/unit_test/Test_Batched_SerialQR.hpp b/batched/dense/unit_test/Test_Batched_SerialQR.hpp index 42dad37984..d24f1d8c3a 100644 --- a/batched/dense/unit_test/Test_Batched_SerialQR.hpp +++ b/batched/dense/unit_test/Test_Batched_SerialQR.hpp @@ -53,29 +53,19 @@ struct qrFunctor { int error = 0; KokkosBatched::SerialQR::invoke(A, tau, w); - Kokkos::printf("A%d = [%5.2f, %5.2f]\n [%5.2f, %5.2f]\n [%5.2f, %5.2f]\n", - matIdx, A(0, 0), A(0, 1), A(1, 0), A(1, 1), A(2, 0), A(2, 1)); - Kokkos::printf("tau%d = [%5.3f, %5.3f]\n", matIdx, tau(0), tau(1)); // Store identity in Q for (int diagIdx = 0; diagIdx < Q.extent_int(0); ++diagIdx) { Q(diagIdx, diagIdx) = SC_one; } - Kokkos::printf("Q stores I\nQ%d = [%5.2f, %5.2f, %5.2f]\n [%5.2f, %5.2f, %5.2f]\n [%5.2f, %5.2f, %5.2f]\n", - matIdx, Q(0, 0), Q(0, 1), Q(0, 2), Q(1, 0), Q(1, 1), Q(1, 2), Q(2, 0), Q(2, 1), Q(2, 2)); // Call ApplyQ on Q - w(0) = 0.0, w(1) = 0.0; + for(int idx = 0; idx < w.extent_int(0); ++idx) { w(idx) = 0.0; } KokkosBatched::SerialApplyQ::invoke(A, tau, Q, w); - Kokkos::printf("\nQ stores Q\nQ%d = [%5.2f, %5.2f, %5.2f]\n [%5.2f, %5.2f, %5.2f]\n [%5.2f, %5.2f, %5.2f]\n", - matIdx, Q(0, 0), Q(0, 1), Q(0, 2), Q(1, 0), Q(1, 1), Q(1, 2), Q(2, 0), Q(2, 1), Q(2, 2)); // Now apply Q' to Q - w(0) = 0.0, w(1) = 0.0; - // KokkosBatched::SerialApplyQ::invoke(A, tau, Q, w); + for(int idx = 0; idx < w.extent_int(0); ++idx) { w(idx) = 0.0; } KokkosBatched::SerialApplyQ::invoke(A, tau, Q, w); - Kokkos::printf("\nQ stores Q'Q=I\nQ%d = [%5.2f, %5.2f, %5.2f]\n [%5.2f, %5.2f, %5.2f]\n [%5.2f, %5.2f, %5.2f]\n", - matIdx, Q(0, 0), Q(0, 1), Q(0, 2), Q(1, 0), Q(1, 1), Q(1, 2), Q(2, 0), Q(2, 1), Q(2, 2)); // At this point Q stores Q'Q // which should be the identity matrix @@ -203,7 +193,7 @@ void test_QR_square() { Kokkos::parallel_for( "serialFormQ", 1, KOKKOS_LAMBDA(int) { KokkosBatched::SerialQR_FormQ_Internal::invoke(m, n, A.data(), A.stride(0), A.stride(1), t.data(), t.stride(0), - Q.data(), Q.stride(0), Q.stride(1), w.data()); + Q.data(), Q.stride(0), Q.stride(1), w.data(), w.stride(0)); }); typename MatrixViewType::HostMirror Q_h = Kokkos::create_mirror_view(Q); Kokkos::deep_copy(Q_h, Q); @@ -311,7 +301,7 @@ void test_QR_rectangular() { Kokkos::parallel_for( "serialFormQ", 1, KOKKOS_LAMBDA(int) { KokkosBatched::SerialQR_FormQ_Internal::invoke(m, n, A.data(), A.stride(0), A.stride(1), t.data(), t.stride(0), - Q.data(), Q.stride(0), Q.stride(1), w.data()); + Q.data(), Q.stride(0), Q.stride(1), w.data(), w.stride(0)); }); typename MatrixViewType::HostMirror Q_h = Kokkos::create_mirror_view(Q); Kokkos::deep_copy(Q_h, Q); @@ -354,32 +344,32 @@ void test_QR_batch() { using ExecutionSpace = typename Device::execution_space; - // { // Square matrix case - // constexpr int numMat = 1; // 314 - // constexpr int maxMatSize = 6; // 36 - // Kokkos::View tau("tau", numMat, maxMatSize); - // Kokkos::View tmp("work buffer", numMat, maxMatSize); - // Kokkos::View As("A matrices", numMat, maxMatSize, maxMatSize); - // Kokkos::View Bs("B matrices", numMat, maxMatSize, maxMatSize); - // Kokkos::View Qs("Q matrices", numMat, maxMatSize, maxMatSize); - // Kokkos::View global_error("global number of error"); - - // Kokkos::Random_XorShift64_Pool rand_pool(2718); - // constexpr double max_val = 1000; - // { - // Scalar randStart, randEnd; - // Test::getRandomBounds(max_val, randStart, randEnd); - // Kokkos::fill_random(ExecutionSpace{}, As, rand_pool, randStart, randEnd); - // } - // Kokkos::deep_copy(Bs, As); - - // qrFunctor myFunc(As, tau, tmp, Qs, Bs, global_error); - // Kokkos::parallel_for("KokkosBatched::test_QR_batch", Kokkos::RangePolicy(0, numMat), myFunc); - - // typename Kokkos::View::HostMirror global_error_h = Kokkos::create_mirror_view(global_error); - // Kokkos::deep_copy(global_error_h, global_error); - // EXPECT_EQ(global_error_h(), 0); - // } + { // Square matrix case + constexpr int numMat = 314; // 314 + constexpr int maxMatSize = 36; // 36 + Kokkos::View tau("tau", numMat, maxMatSize); + Kokkos::View tmp("work buffer", numMat, maxMatSize); + Kokkos::View As("A matrices", numMat, maxMatSize, maxMatSize); + Kokkos::View Bs("B matrices", numMat, maxMatSize, maxMatSize); + Kokkos::View Qs("Q matrices", numMat, maxMatSize, maxMatSize); + Kokkos::View global_error("global number of error"); + + Kokkos::Random_XorShift64_Pool rand_pool(2718); + constexpr double max_val = 1000; + { + Scalar randStart, randEnd; + Test::getRandomBounds(max_val, randStart, randEnd); + Kokkos::fill_random(ExecutionSpace{}, As, rand_pool, randStart, randEnd); + } + Kokkos::deep_copy(Bs, As); + + qrFunctor myFunc(As, tau, tmp, Qs, Bs, global_error); + Kokkos::parallel_for("KokkosBatched::test_QR_batch", Kokkos::RangePolicy(0, numMat), myFunc); + + typename Kokkos::View::HostMirror global_error_h = Kokkos::create_mirror_view(global_error); + Kokkos::deep_copy(global_error_h, global_error); + EXPECT_EQ(global_error_h(), 0); + } { // Rectangular matrix case constexpr int numMat = 2; // 314 @@ -410,12 +400,6 @@ void test_QR_batch() { As_h(1, 0, 0) = 3.0; As_h(1, 0, 1) = 5.0; As_h(1, 1, 0) = 4.0; As_h(1, 1, 1) = 0.0; As_h(1, 2, 0) = 0.0; As_h(1, 2, 1) = -3.0; - std::cout << "A0 = [" << As_h(0, 0, 0) << ", " << As_h(0, 0, 1) << "]\n" - << " [" << As_h(0, 1, 0) << ", " << As_h(0, 1, 1) << "]\n" - << " [" << As_h(0, 2, 0) << ", " << As_h(0, 2, 1) << "]\n" << std::endl; - std::cout << "A1 = [" << As_h(1, 0, 0) << ", " << As_h(1, 0, 1) << "]\n" - << " [" << As_h(1, 1, 0) << ", " << As_h(1, 1, 1) << "]\n" - << " [" << As_h(1, 2, 0) << ", " << As_h(1, 2, 1) << "]" << std::endl; Kokkos::deep_copy(As, As_h); }