Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

batched - dense: Testing and fixing Serial QR #2342

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ struct SerialApplyLeftHouseholderInternal {
/* */ ValueType* u2, const int u2s,
/* */ ValueType* a1t, const int a1ts,
/* */ ValueType* A2, const int as0, const int as1,
/* */ ValueType* w1t) {
typedef ValueType value_type;
/* */ ValueType* w1t, const int ws0) {
using value_type = ValueType;

/// u2 m x 1
/// a1t 1 x n
Expand All @@ -54,15 +54,15 @@ struct SerialApplyLeftHouseholderInternal {
for (int j = 0; j < n; ++j) {
value_type tmp = a1t[j * a1ts];
for (int i = 0; i < m; ++i) tmp += Kokkos::ArithTraits<value_type>::conj(u2[i * u2s]) * A2[i * as0 + j * as1];
w1t[j] = tmp * inv_tau; // /= (*tau);
w1t[j * ws0] = tmp * inv_tau; // /= (*tau);
}

// a1t -= w1t (axpy)
for (int j = 0; j < n; ++j) a1t[j * a1ts] -= w1t[j];
for (int j = 0; j < n; ++j) a1t[j * a1ts] -= w1t[j * ws0];

// A2 -= u2 w1t (ger)
for (int j = 0; j < n; ++j)
for (int i = 0; i < m; ++i) A2[i * as0 + j * as1] -= u2[i * u2s] * w1t[j];
for (int i = 0; i < m; ++i) A2[i * as0 + j * as1] -= u2[i * u2s] * w1t[j * ws0];

return 0;
}
Expand All @@ -74,8 +74,8 @@ struct SerialApplyRightHouseholderInternal {
/* */ ValueType* u2, const int u2s,
/* */ ValueType* a1, const int a1s,
/* */ ValueType* A2, const int as0, const int as1,
/* */ ValueType* w1) {
typedef ValueType value_type;
/* */ ValueType* w1, const int ws0) {
using value_type = ValueType;
/// u2 n x 1
/// a1 m x 1
/// A2 m x n
Expand All @@ -93,15 +93,16 @@ struct SerialApplyRightHouseholderInternal {
for (int i = 0; i < m; ++i) {
value_type tmp = a1[i * a1s];
for (int j = 0; j < n; ++j) tmp += A2[i * as0 + j * as1] * u2[j * u2s];
w1[i] = tmp * inv_tau; // \= (*tau);
w1[i * ws0] = tmp * inv_tau; // \= (*tau);
}

// a1 -= w1 (axpy)
for (int i = 0; i < m; ++i) a1[i * a1s] -= w1[i];
for (int i = 0; i < m; ++i) a1[i * a1s] -= w1[i * ws0];

// A2 -= w1 * u2' (ger with conjugate)
for (int j = 0; j < n; ++j)
for (int i = 0; i < m; ++i) A2[i * as0 + j * as1] -= w1[i] * Kokkos::ArithTraits<ValueType>::conj(u2[j * u2s]);
for (int i = 0; i < m; ++i)
A2[i * as0 + j * as1] -= w1[i * ws0] * Kokkos::ArithTraits<ValueType>::conj(u2[j * u2s]);

return 0;
}
Expand Down
6 changes: 3 additions & 3 deletions batched/dense/impl/KokkosBatched_ApplyQ_Serial_Impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ KOKKOS_INLINE_FUNCTION int SerialApplyQ<Side::Left, Trans::NoTranspose, Algo::Ap
const AViewType &A, const tViewType &t, const BViewType &B, const wViewType &w) {
return SerialApplyQ_LeftForwardInternal::invoke(B.extent(0), B.extent(1), A.extent(1), A.data(), A.stride_0(),
A.stride_1(), t.data(), t.stride_0(), B.data(), B.stride_0(),
B.stride_1(), w.data());
B.stride_1(), w.data(), w.stride_0());
}

template <>
Expand All @@ -42,7 +42,7 @@ KOKKOS_INLINE_FUNCTION int SerialApplyQ<Side::Left, Trans::Transpose, Algo::Appl
const AViewType &A, const tViewType &t, const BViewType &B, const wViewType &w) {
return SerialApplyQ_LeftBackwardInternal::invoke(B.extent(0), B.extent(1), A.extent(1), A.data(), A.stride_0(),
A.stride_1(), t.data(), t.stride_0(), B.data(), B.stride_0(),
B.stride_1(), w.data());
B.stride_1(), w.data(), w.stride_0());
}

template <>
Expand All @@ -51,7 +51,7 @@ KOKKOS_INLINE_FUNCTION int SerialApplyQ<Side::Right, Trans::NoTranspose, Algo::A
const AViewType &A, const tViewType &t, const BViewType &B, const wViewType &w) {
return SerialApplyQ_RightForwardInternal::invoke(B.extent(0), B.extent(1), A.extent(1), A.data(), A.stride_0(),
A.stride_1(), t.data(), t.stride_0(), B.data(), B.stride_0(),
B.stride_1(), w.data());
B.stride_1(), w.data(), w.strid_0());
}

} // namespace KokkosBatched
Expand Down
13 changes: 6 additions & 7 deletions batched/dense/impl/KokkosBatched_ApplyQ_Serial_Internal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct SerialApplyQ_LeftForwardInternal {
/* */ ValueType *A, const int as0, const int as1,
/* */ ValueType *t, const int ts,
/* */ ValueType *B, const int bs0, const int bs1,
/* */ ValueType *w) {
/* */ ValueType *w, const int ws) {
typedef ValueType value_type;

/// Given a matrix A that includes a series of householder vectors,
Expand Down Expand Up @@ -73,7 +73,7 @@ struct SerialApplyQ_LeftForwardInternal {
/// -----------------------------------------------------
// left apply householder to partitioned B1 and B2
SerialApplyLeftHouseholderInternal::invoke(m_A2, n, tau, A_part3x3.A21, as0, B_part3x1.A1, bs1, B_part3x1.A2, bs0,
bs1, w);
bs1, w, ws);

/// -----------------------------------------------------
A_part2x2.mergeToABR(A_part3x3);
Expand All @@ -90,7 +90,7 @@ struct SerialApplyQ_LeftBackwardInternal {
/* */ ValueType *A, const int as0, const int as1,
/* */ ValueType *t, const int ts,
/* */ ValueType *B, const int bs0, const int bs1,
/* */ ValueType *w) {
/* */ ValueType *w, const int ws) {
typedef ValueType value_type;

/// Given a matrix A that includes a series of householder vectors,
Expand Down Expand Up @@ -127,8 +127,7 @@ struct SerialApplyQ_LeftBackwardInternal {
/// -----------------------------------------------------
// left apply householder to partitioned B1 and B2
SerialApplyLeftHouseholderInternal::invoke(m_A2, n, tau, A_part3x3.A21, as0, B_part3x1.A1, bs1, B_part3x1.A2, bs0,
bs1, w);

bs1, w, ws);
/// -----------------------------------------------------
A_part2x2.mergeToATL(A_part3x3);
t_part2x1.mergeToAT(t_part3x1);
Expand All @@ -144,7 +143,7 @@ struct SerialApplyQ_RightForwardInternal {
/* */ ValueType *A, const int as0, const int as1,
/* */ ValueType *t, const int ts,
/* */ ValueType *B, const int bs0, const int bs1,
/* */ ValueType *w) {
/* */ ValueType *w, const int ws) {
typedef ValueType value_type;

/// Given a matrix A that includes a series of householder vectors,
Expand Down Expand Up @@ -181,7 +180,7 @@ struct SerialApplyQ_RightForwardInternal {
/// -----------------------------------------------------
// right apply householder to partitioned B1 and B2
SerialApplyRightHouseholderInternal::invoke(m, n_B2, tau, A_part3x3.A21, as0, B_part1x3.A1, bs0, B_part1x3.A2,
bs0, bs1, w);
bs0, bs1, w, ws);
/// -----------------------------------------------------
A_part2x2.mergeToATL(A_part3x3);
t_part2x1.mergeToAT(t_part3x1);
Expand Down
18 changes: 11 additions & 7 deletions batched/dense/impl/KokkosBatched_QR_FormQ_Serial_Internal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ struct SerialQR_FormQ_Internal {
/* */ ValueType* A, const int as0, const int as1,
/* */ ValueType* t, const int ts,
/* */ ValueType* Q, const int qs0, const int qs1,
/* */ ValueType* w, const bool is_Q_zero = false) {
typedef ValueType value_type;
/* */ ValueType* w, const int ws, const bool is_Q_zero = false) {
using value_type = ValueType;

/// Given a matrix A that includes QR factorization
/// it forms a unitary matrix Q
Expand All @@ -49,12 +49,16 @@ struct SerialQR_FormQ_Internal {
/// B is m x m

// set identity
if (is_Q_zero)
SerialSetInternal::invoke(m, value_type(1), Q, qs0 + qs1);
else
SerialSetIdentityInternal::invoke(m, Q, qs0, qs1);
if (is_Q_zero) {
for (int idx = 0; idx < m; ++idx) {
Q[(qs0 + qs1) * idx] = value_type(1);
// SerialSetInternal::invoke(m, value_type(1), Q, qs0 + qs1);
}
} else {
SerialSetIdentityInternal::invoke(m, m, Q, qs0, qs1);
}

return SerialApplyQ_LeftNoTransForwardInternal ::invoke(m, m, k, A, as0, as1, t, ts, Q, qs0, qs1, w);
return SerialApplyQ_LeftForwardInternal::invoke(m, m, k, A, as0, as1, t, ts, Q, qs0, qs1, w, ws);
}
};

Expand Down
2 changes: 1 addition & 1 deletion batched/dense/impl/KokkosBatched_QR_Serial_Impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ template <typename AViewType, typename tViewType, typename wViewType>
KOKKOS_INLINE_FUNCTION int SerialQR<Algo::QR::Unblocked>::invoke(const AViewType &A, const tViewType &t,
const wViewType &w) {
return SerialQR_Internal::invoke(A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), t.data(),
t.stride_0(), w.data());
t.stride_0(), w.data(), w.stride_0());
}

} // namespace KokkosBatched
Expand Down
6 changes: 3 additions & 3 deletions batched/dense/impl/KokkosBatched_QR_Serial_Internal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct SerialQR_Internal {
const int n, // n = NumCols(A)
/* */ ValueType *A, const int as0, const int as1,
/* */ ValueType *t, const int ts,
/* */ ValueType *w) {
/* */ ValueType *w, const int ws) {
typedef ValueType value_type;

/// Given a matrix A, it computes QR decomposition of the matrix
Expand All @@ -53,7 +53,7 @@ struct SerialQR_Internal {
A_part2x2.partWithATL(A, m, n, 0, 0);
t_part2x1.partWithAT(t, m, 0);

for (int m_atl = 0; m_atl < m; ++m_atl) {
for (int m_atl = 0; m_atl < Kokkos::min(m, n); ++m_atl) {
// part 2x2 into 3x3
A_part3x3.partWithABR(A_part2x2, 1, 1);
const int m_A22 = m - m_atl - 1;
Expand All @@ -69,7 +69,7 @@ struct SerialQR_Internal {

// left apply householder to A22
SerialApplyLeftHouseholderInternal::invoke(m_A22, n_A22, tau, A_part3x3.A21, as0, A_part3x3.A12, as1,
A_part3x3.A22, as0, as1, w);
A_part3x3.A22, as0, as1, w, ws);
/// -----------------------------------------------------
A_part2x2.mergeToATL(A_part3x3);
t_part2x1.mergeToAT(t_part3x1);
Expand Down
11 changes: 6 additions & 5 deletions batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,11 +176,12 @@ struct SerialSVDInternal {
if (n - i > 1) {
KokkosBatched::SerialApplyLeftHouseholderInternal::invoke<value_type>(
m - i - 1, n - i - 1, &tau, &SVDIND(A, i + 1, i), As0, &SVDIND(A, i, i + 1), As1, &SVDIND(A, i + 1, i + 1),
As0, As1, work);
As0, As1, work, 1);
}
if (U) {
KokkosBatched::SerialApplyRightHouseholderInternal::invoke<value_type>(
m, m - i - 1, &tau, &SVDIND(A, i + 1, i), As0, &SVDIND(U, 0, i), Us0, &SVDIND(U, 0, i + 1), Us0, Us1, work);
KokkosBatched::SerialApplyRightHouseholderInternal::invoke<value_type>(m, m - i - 1, &tau, &SVDIND(A, i + 1, i),
As0, &SVDIND(U, 0, i), Us0,
&SVDIND(U, 0, i + 1), Us0, Us1, work, 1);
}
// Zero out A subdiag explicitly (NOTE: may not be necessary...)
for (int j = i + 1; j < m; j++) {
Expand All @@ -193,12 +194,12 @@ struct SerialSVDInternal {
if (m - i > 1) {
KokkosBatched::SerialApplyRightHouseholderInternal::invoke<value_type>(
m - i - 1, n - i - 2, &tau, &SVDIND(A, i, i + 2), As1, &SVDIND(A, i + 1, i + 1), As0,
&SVDIND(A, i + 1, i + 2), As0, As1, work);
&SVDIND(A, i + 1, i + 2), As0, As1, work, 1);
}
if (Vt) {
KokkosBatched::SerialApplyLeftHouseholderInternal::invoke<value_type>(
n - i - 2, n, &tau, &SVDIND(A, i, i + 2), As1, &SVDIND(Vt, i + 1, 0), Vts1, &SVDIND(Vt, i + 2, 0), Vts0,
Vts1, work);
Vts1, work, 1);
}
// Zero out A superdiag row explicitly
for (int j = i + 2; j < n; j++) {
Expand Down
6 changes: 3 additions & 3 deletions batched/dense/src/KokkosBatched_ApplyQ_Decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ struct ApplyQ {
KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const tViewType &t,
const BViewType &B, const wViewType &w) {
int r_val = 0;
if (std::is_same<ArgMode, Mode::Serial>::value) {
if constexpr (std::is_same_v<ArgMode, Mode::Serial>) {
r_val = SerialApplyQ<ArgSide, ArgTrans, ArgAlgo>::invoke(A, t, B, w);
} else if (std::is_same<ArgMode, Mode::Team>::value) {
} else if constexpr (std::is_same_v<ArgMode, Mode::Team>) {
r_val = TeamApplyQ<MemberType, ArgSide, ArgTrans, ArgAlgo>::invoke(member, A, t, B, w);
} else if (std::is_same<ArgMode, Mode::Team>::value) {
} else if constexpr (std::is_same_v<ArgMode, Mode::TeamVector>) {
r_val = TeamVectorApplyQ<MemberType, ArgSide, ArgTrans, ArgAlgo>::invoke(member, A, t, B, w);
}
return r_val;
Expand Down
1 change: 1 addition & 0 deletions batched/dense/unit_test/Test_Batched_Dense.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "Test_Batched_SerialLU.hpp"
#include "Test_Batched_SerialLU_Real.hpp"
#include "Test_Batched_SerialLU_Complex.hpp"
#include "Test_Batched_SerialQR.hpp"
#include "Test_Batched_SerialSolveLU.hpp"
#include "Test_Batched_SerialSolveLU_Real.hpp"
#include "Test_Batched_SerialSolveLU_Complex.hpp"
Expand Down
Loading
Loading