From f338e5f79efbaf5ee5cbc9e9f018d32a7f54f6f0 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Tue, 7 May 2024 04:21:53 -0600
Subject: [PATCH 01/27] Backup

---
 .../KokkosLapack_geqrf_eti_spec_inst.cpp.in   |  26 +
 .../KokkosLapack_geqrf_eti_spec_avail.hpp.in  |  24 +
 lapack/impl/KokkosLapack_geqrf_impl.hpp       |  34 ++
 lapack/impl/KokkosLapack_geqrf_spec.hpp       | 140 +++++
 lapack/src/KokkosLapack_geqrf.hpp             | 198 +++++++
 .../KokkosLapack_geqrf_tpl_spec_avail.hpp     | 163 +++++
 .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 557 ++++++++++++++++++
 lapack/unit_test/Test_Lapack_geqrf.hpp        | 444 ++++++++++++++
 8 files changed, 1586 insertions(+)
 create mode 100644 lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in
 create mode 100644 lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in
 create mode 100644 lapack/impl/KokkosLapack_geqrf_impl.hpp
 create mode 100644 lapack/impl/KokkosLapack_geqrf_spec.hpp
 create mode 100644 lapack/src/KokkosLapack_geqrf.hpp
 create mode 100644 lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
 create mode 100644 lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
 create mode 100644 lapack/unit_test/Test_Lapack_geqrf.hpp

diff --git a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in
new file mode 100644
index 0000000000..9558d0f6cc
--- /dev/null
+++ b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in
@@ -0,0 +1,26 @@
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 4.0
+//       Copyright (2022) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
+// See https://kokkos.org/LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//@HEADER
+
+
+#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true
+#include "KokkosKernels_config.h"
+#include "KokkosLapack_geqrf_spec.hpp"
+
+namespace KokkosLapack {
+namespace Impl {
+@LAPACK_GEQRF_ETI_INST_BLOCK@
+  } //IMPL 
+} //Kokkos
diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in
new file mode 100644
index 0000000000..c4619b9c07
--- /dev/null
+++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in
@@ -0,0 +1,24 @@
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 4.0
+//       Copyright (2022) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
+// See https://kokkos.org/LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//@HEADER
+
+#ifndef KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_
+#define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_
+namespace KokkosLapack {
+namespace Impl {
+@LAPACK_GEQRF_ETI_AVAIL_BLOCK@
+  } //IMPL 
+} //Kokkos
+#endif
diff --git a/lapack/impl/KokkosLapack_geqrf_impl.hpp b/lapack/impl/KokkosLapack_geqrf_impl.hpp
new file mode 100644
index 0000000000..ea20018073
--- /dev/null
+++ b/lapack/impl/KokkosLapack_geqrf_impl.hpp
@@ -0,0 +1,34 @@
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 4.0
+//       Copyright (2022) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
+// See https://kokkos.org/LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//@HEADER
+
+#ifndef KOKKOSLAPACK_IMPL_GEQRF_HPP_
+#define KOKKOSLAPACK_IMPL_GEQRF_HPP_
+
+/// \file KokkosLapack_geqrf_impl.hpp
+/// \brief Implementation(s) of dense linear solve.
+
+#include <KokkosKernels_config.h>
+#include <Kokkos_ArithTraits.hpp>
+
+namespace KokkosLapack {
+namespace Impl {
+
+// NOTE: Might add the implementation of KokkosLapack::geqrf later
+
+}  // namespace Impl
+}  // namespace KokkosLapack
+
+#endif  // KOKKOSLAPACK_IMPL_GEQRF_HPP
diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp
new file mode 100644
index 0000000000..d0083cb151
--- /dev/null
+++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp
@@ -0,0 +1,140 @@
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 4.0
+//       Copyright (2022) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
+// See https://kokkos.org/LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//@HEADER
+#ifndef KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_
+#define KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_
+
+#include <KokkosKernels_config.h>
+#include <Kokkos_Core.hpp>
+#include <Kokkos_ArithTraits.hpp>
+
+// Include the actual functors
+#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY
+#include <KokkosLapack_geqrf_impl.hpp>
+#endif
+
+namespace KokkosLapack {
+namespace Impl {
+// Specialization struct which defines whether a specialization exists
+template <class ExecutionSpace, class AVT, class TWVT>
+struct geqrf_eti_spec_avail {
+  enum : bool { value = false };
+};
+}  // namespace Impl
+}  // namespace KokkosLapack
+
+//
+// Macro for declaration of full specialization availability
+// KokkosLapack::Impl::GEQRF.  This is NOT for users!!!  All
+// the declarations of full specializations go in this header file.
+// We may spread out definitions (see _INST macro below) across one or
+// more .cpp files.
+//
+#define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE,       \
+                                         EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \
+  template <>                                                             \
+  struct geqrf_eti_spec_avail<                                            \
+      EXEC_SPACE_TYPE,                                                    \
+      Kokkos::View<SCALAR_TYPE **, LAYOUT_TYPE,                           \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
+      Kokkos::View<SCALAR_TYPE **, LAYOUT_TYPE,                           \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
+      Kokkos::View<int *, LAYOUT_TYPE,                                    \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {            \
+    enum : bool { value = true };                                         \
+  };
+
+// Include the actual specialization declarations
+#include <KokkosLapack_geqrf_tpl_spec_avail.hpp>
+#include <generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp>
+
+namespace KokkosLapack {
+namespace Impl {
+
+// Unification layer
+/// \brief Implementation of KokkosLapack::geqrf.
+
+template <class ExecutionSpace, class AMatrix, class TWArray,
+          bool tpl_spec_avail =
+              geqrf_tpl_spec_avail<ExecutionSpace, AMatrix, TWArray>::value,
+          bool eti_spec_avail =
+              geqrf_eti_spec_avail<ExecutionSpace, AMatrix, TWArray>::value>
+struct GEQRF {
+  static void geqrf(const ExecutionSpace &space, const AMatrix &A, const TWArray &Tau,
+                   const TWArray &Work);
+};
+
+#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY
+//! Full specialization of geqrf for multi vectors.
+// Unification layer
+template <class ExecutionSpace, class AMatrix, class TWArray>
+struct GEQRF<ExecutionSpace, AMatrix, TWArray, TWArray, false,
+            KOKKOSKERNELS_IMPL_COMPILE_LIBRARY> {
+  static void geqrf(const ExecutionSpace & /* space */, const AMatrix & /* A */,
+                   const TWArray & /* Tau */, const TWArray & /* Work */) {
+    // NOTE: Might add the implementation of KokkosLapack::geqrf later
+    throw std::runtime_error(
+        "No fallback implementation of GEQRF (general QR factorization) "
+        "exists. Enable LAPACK, CUSOLVER, ROCSOLVER or MAGMA TPL.");
+  }
+};
+
+#endif
+}  // namespace Impl
+}  // namespace KokkosLapack
+
+//
+// Macro for declaration of full specialization of
+// KokkosLapack::Impl::GEQRF.  This is NOT for users!!!  All
+// the declarations of full specializations go in this header file.
+// We may spread out definitions (see _DEF macro below) across one or
+// more .cpp files.
+//
+#define KOKKOSLAPACK_GEQRF_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE,       \
+                                        EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \
+  extern template struct GEQRF<                                          \
+      EXEC_SPACE_TYPE,                                                   \
+      Kokkos::View<SCALAR_TYPE **, LAYOUT_TYPE,                          \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
+      Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                           \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
+      Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                           \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
+      false, true>;
+
+#define KOKKOSLAPACK_GEQRF_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE,       \
+                                        EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \
+  template struct GEQRF<                                                 \
+      EXEC_SPACE_TYPE,                                                   \
+      Kokkos::View<SCALAR_TYPE **, LAYOUT_TYPE,                          \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
+      Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                           \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
+      Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                           \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
+      false, true>;
+
+#include <KokkosLapack_geqrf_tpl_spec_decl.hpp>
+
+#endif  // KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_
diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp
new file mode 100644
index 0000000000..ba360ad830
--- /dev/null
+++ b/lapack/src/KokkosLapack_geqrf.hpp
@@ -0,0 +1,198 @@
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 4.0
+//       Copyright (2022) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
+// See https://kokkos.org/LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//@HEADER
+
+/// \file KokkosLapack_geqrf.hpp
+/// \brief Local dense linear solve
+///
+/// This file provides KokkosLapack::geqrf. This function performs a
+/// local (no MPI) QR factorization of a M-by-N matrix A.
+
+#ifndef KOKKOSLAPACK_GEQRF_HPP_
+#define KOKKOSLAPACK_GEQRF_HPP_
+
+#include <type_traits>
+
+#include "KokkosLapack_geqrf_spec.hpp"
+#include "KokkosKernels_Error.hpp"
+
+namespace KokkosLapack {
+
+/// \brief Computes a QR factorization of a matrix A
+///
+/// \tparam ExecutionSpace the space where the kernel will run.
+/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View.
+/// \tparam TWArray Type of arrays Tau and Work, as a 1-D Kokkos::View.
+///
+/// \param space [in] Execution space instance used to specified how to execute
+///                   the geqrf kernels.
+/// \param A [in,out] On entry, the M-by-N matrix to be factorized.
+///                   On exit, the elements on and above the diagonal contain
+///                   the min(M,N)-by-N upper trapezoidal matrix R (R is
+///                   upper triangular if M >= N); the elements below the
+///                   diagonal, with the array Tau, represent the unitary
+///                   matrix Q as a product of min(M,N) elementary reflectors.
+/// \param Tau [out]  One-dimensional array of size min(M,N) that contain
+///                   the scalar factors of the elementary reflectors.
+/// \param Work [out] One-dimensional array of size max(1,LWORK).
+///                   If min(M,N) == 0, then LWORK must be >= 1.
+///                   If min(M,N) != 0, then LWORK must be >= N.
+///                   If the QR factorization is successful, then the first
+///                   position of Work contains the optimal LWORK.
+///
+template <class ExecutionSpace, class AMatrix, class TWArray>
+void geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
+          const TWArray& Work) {
+  // NOTE: Currently, KokkosLapack::geqrf only supports LAPACK, MAGMA and
+  // rocSOLVER TPLs.
+  //       MAGMA/rocSOLVER TPL should be enabled to call the MAGMA/rocSOLVER GPU
+  //       interface for device views LAPACK TPL should be enabled to call the
+  //       LAPACK interface for host views
+
+  static_assert(
+      Kokkos::SpaceAccessibility<ExecutionSpace,
+                                 typename AMatrix::memory_space>::accessible);
+  static_assert(
+      Kokkos::SpaceAccessibility<ExecutionSpace,
+                                 typename BXMV::memory_space>::accessible);
+#if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA)
+  if constexpr (!std::is_same_v<ExecutionSpace, Kokkos::Cuda>) {
+    static_assert(
+        Kokkos::SpaceAccessibility<ExecutionSpace,
+                                   typename IPIVV::memory_space>::accessible);
+  }
+#else
+  static_assert(
+      Kokkos::SpaceAccessibility<ExecutionSpace,
+                                 typename IPIVV::memory_space>::accessible);
+#endif
+  static_assert(Kokkos::is_view<AMatrix>::value,
+                "KokkosLapack::geqrf: A must be a Kokkos::View.");
+  static_assert(Kokkos::is_view<BXMV>::value,
+                "KokkosLapack::geqrf: B must be a Kokkos::View.");
+  static_assert(Kokkos::is_view<IPIVV>::value,
+                "KokkosLapack::geqrf: IPIV must be a Kokkos::View.");
+  static_assert(static_cast<int>(AMatrix::rank) == 2,
+                "KokkosLapack::geqrf: A must have rank 2.");
+  static_assert(
+      static_cast<int>(BXMV::rank) == 1 || static_cast<int>(BXMV::rank) == 2,
+      "KokkosLapack::geqrf: B must have either rank 1 or rank 2.");
+  static_assert(static_cast<int>(IPIVV::rank) == 1,
+                "KokkosLapack::geqrf: IPIV must have rank 1.");
+
+  int64_t IPIV0 = IPIV.extent(0);
+  int64_t A0    = A.extent(0);
+  int64_t A1    = A.extent(1);
+  int64_t B0    = B.extent(0);
+
+  // Check validity of pivot argument
+  bool valid_pivot =
+      (IPIV0 == A1) || ((IPIV0 == 0) && (IPIV.data() == nullptr));
+  if (!(valid_pivot)) {
+    std::ostringstream os;
+    os << "KokkosLapack::geqrf: IPIV: " << IPIV0 << ". "
+       << "Valid options include zero-extent 1-D view (no pivoting), or 1-D "
+          "View with size of "
+       << A0 << " (partial pivoting).";
+g    KokkosKernels::Impl::throw_runtime_exception(os.str());
+  }
+
+  // Check for no pivoting case. Only MAGMA supports no pivoting interface
+#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA   // have MAGMA TPL
+#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK  // and have LAPACK TPL
+  if ((!std::is_same<typename AMatrix::device_type::memory_space,
+                     Kokkos::CudaSpace>::value) &&
+      (IPIV0 == 0) && (IPIV.data() == nullptr)) {
+    std::ostringstream os;
+    os << "KokkosLapack::geqrf: IPIV: " << IPIV0 << ". "
+       << "LAPACK TPL does not support no pivoting.";
+    KokkosKernels::Impl::throw_runtime_exception(os.str());
+  }
+#endif
+#else                                   // not have MAGMA TPL
+#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK  // but have LAPACK TPL
+  if ((IPIV0 == 0) && (IPIV.data() == nullptr)) {
+    std::ostringstream os;
+    os << "KokkosLapack::geqrf: IPIV: " << IPIV0 << ". "
+       << "LAPACK TPL does not support no pivoting.";
+    KokkosKernels::Impl::throw_runtime_exception(os.str());
+  }
+#endif
+#endif
+
+  // Check compatibility of dimensions at run time.
+  if ((A0 < A1) || (A0 != B0)) {
+    std::ostringstream os;
+    os << "KokkosLapack::geqrf: Dimensions of A, and B do not match: "
+       << " A: " << A.extent(0) << " x " << A.extent(1) << " B: " << B.extent(0)
+       << " x " << B.extent(1);
+    KokkosKernels::Impl::throw_runtime_exception(os.str());
+  }
+
+  typedef Kokkos::View<
+      typename AMatrix::non_const_value_type**, typename AMatrix::array_layout,
+      typename AMatrix::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
+      AMatrix_Internal;
+  typedef Kokkos::View<typename BXMV::non_const_value_type**,
+                       typename BXMV::array_layout, typename BXMV::device_type,
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged> >
+      BXMV_Internal;
+  typedef Kokkos::View<
+      typename IPIVV::non_const_value_type*, typename IPIVV::array_layout,
+      typename IPIVV::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
+      IPIVV_Internal;
+  AMatrix_Internal A_i = A;
+  // BXMV_Internal B_i = B;
+  IPIVV_Internal IPIV_i = IPIV;
+
+  if (BXMV::rank == 1) {
+    auto B_i = BXMV_Internal(B.data(), B.extent(0), 1);
+    KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, BXMV_Internal,
+                             IPIVV_Internal>::geqrf(space, A_i, B_i, IPIV_i);
+  } else {  // BXMV::rank == 2
+    auto B_i = BXMV_Internal(B.data(), B.extent(0), B.extent(1));
+    KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, BXMV_Internal,
+                             IPIVV_Internal>::geqrf(space, A_i, B_i, IPIV_i);
+  }
+}
+
+/// \brief Computes a QR factorization of a matrix A
+///
+/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View.
+/// \tparam TWArray Type of arrays Tau and Work, as a 1-D Kokkos::View.
+///
+/// \param A [in,out] On entry, the M-by-N matrix to be factorized.
+///                   On exit, the elements on and above the diagonal contain
+///                   the min(M,N)-by-N upper trapezoidal matrix R (R is
+///                   upper triangular if M >= N); the elements below the
+///                   diagonal, with the array Tau, represent the unitary
+///                   matrix Q as a product of min(M,N) elementary reflectors.
+/// \param Tau [out]  One-dimensional array of size min(M,N) that contain
+///                   the scalar factors of the elementary reflectors.
+/// \param Work [out] One-dimensional array of size max(1,LWORK).
+///                   If min(M,N) == 0, then LWORK must be >= 1.
+///                   If min(M,N) != 0, then LWORK must be >= N.
+///                   If the QR factorization is successful, then the first
+///                   position of Work contains the optimal LWORK.
+///
+template <class AMatrix, class TWArray>
+void geqrf(const AMatrix& A, const TWArray& Tau, const TWArray& Work) {
+  typename AMatrix::execution_space space{};
+  geqrf(space, A, Tau, Work);
+}
+
+}  // namespace KokkosLapack
+
+#endif  // KOKKOSLAPACK_GEQRF_HPP_
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
new file mode 100644
index 0000000000..733f0510e0
--- /dev/null
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
@@ -0,0 +1,163 @@
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 4.0
+//       Copyright (2022) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
+// See https://kokkos.org/LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//@HEADER
+
+#ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_HPP_
+#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_HPP_
+
+namespace KokkosLapack {
+namespace Impl {
+// Specialization struct which defines whether a specialization exists
+template <class ExecutionSpace, class AMatrix, class TWArray>
+struct geqrf_tpl_spec_avail {
+  enum : bool { value = false };
+};
+
+// Generic Host side LAPACK (could be MKL or whatever)
+#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK
+
+#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, MEMSPACE) \
+  template <class ExecSpace>                                               \
+  struct geqrf_tpl_spec_avail<                                             \
+      ExecSpace,                                                           \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>,  \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,              \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>,   \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,              \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>,   \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged> > > {           \
+    enum : bool { value = true };                                          \
+  };
+
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft,
+                                        Kokkos::HostSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft,
+                                        Kokkos::HostSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex<double>,
+                                        Kokkos::LayoutLeft, Kokkos::HostSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex<float>,
+                                        Kokkos::LayoutLeft, Kokkos::HostSpace)
+#endif
+}  // namespace Impl
+}  // namespace KokkosLapack
+
+// MAGMA
+#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA
+#include "magma_v2.h"
+
+namespace KokkosLapack {
+namespace Impl {
+#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(SCALAR, LAYOUT, MEMSPACE)    \
+  template <>                                                                \
+  struct geqrf_tpl_spec_avail<                                               \
+      Kokkos::Cuda,                                                          \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>, \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>,  \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>,  \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged> > > {             \
+    enum : bool { value = true };                                            \
+  };
+
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft,
+                                       Kokkos::CudaSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft,
+                                       Kokkos::CudaSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex<double>,
+                                       Kokkos::LayoutLeft, Kokkos::CudaSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex<float>,
+                                       Kokkos::LayoutLeft, Kokkos::CudaSpace)
+}  // namespace Impl
+}  // namespace KokkosLapack
+#endif  // KOKKOSKERNELS_ENABLE_TPL_MAGMA
+
+// CUSOLVER
+#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER
+namespace KokkosLapack {
+namespace Impl {
+
+#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUT, MEMSPACE) \
+  template <>                                                                \
+  struct geqrf_tpl_spec_avail<                                               \
+      Kokkos::Cuda,                                                          \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>, \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>,  \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>,  \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged> > > {             \
+    enum : bool { value = true };                                            \
+  };
+
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft,
+                                          Kokkos::CudaSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft,
+                                          Kokkos::CudaSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex<double>,
+                                          Kokkos::LayoutLeft, Kokkos::CudaSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex<float>,
+                                          Kokkos::LayoutLeft, Kokkos::CudaSpace)
+
+#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft,
+                                          Kokkos::CudaUVMSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft,
+                                          Kokkos::CudaUVMSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex<double>,
+                                          Kokkos::LayoutLeft,
+                                          Kokkos::CudaUVMSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex<float>,
+                                          Kokkos::LayoutLeft,
+                                          Kokkos::CudaUVMSpace)
+#endif
+
+}  // namespace Impl
+}  // namespace KokkosLapack
+#endif  // CUSOLVER
+
+#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER
+#include <rocsolver/rocsolver.h>
+
+namespace KokkosLapack {
+namespace Impl {
+
+#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(SCALAR, LAYOUT, MEMSPACE) \
+  template <>                                                                 \
+  struct geqrf_tpl_spec_avail<                                                \
+      Kokkos::HIP,                                                            \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEMSPACE>,   \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                 \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEMSPACE>,    \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                 \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEMSPACE>,    \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged> > > {              \
+    enum : bool { value = true };                                             \
+  };
+
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft,
+                                           Kokkos::HIPSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft,
+                                           Kokkos::HIPSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex<double>,
+                                           Kokkos::LayoutLeft, Kokkos::HIPSpace)
+KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex<float>,
+                                           Kokkos::LayoutLeft, Kokkos::HIPSpace)
+
+}  // namespace Impl
+}  // namespace KokkosLapack
+#endif  // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER
+
+#endif
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
new file mode 100644
index 0000000000..8a5b37812d
--- /dev/null
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -0,0 +1,557 @@
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 4.0
+//       Copyright (2022) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
+// See https://kokkos.org/LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//@HEADER
+
+#ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_
+#define KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_
+
+namespace KokkosLapack {
+namespace Impl {
+template <class AViewType, class BViewType, class PViewType>
+inline void geqrf_print_specialization() {
+#ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION
+#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA
+  printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s, %s >\n",
+         typeid(AViewType).name(), typeid(BViewType).name(),
+         typeid(PViewType).name());
+#else
+#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK
+  printf("KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n",
+         typeid(AViewType).name(), typeid(BViewType).name(),
+         typeid(PViewType).name());
+#endif
+#endif
+#endif
+}
+}  // namespace Impl
+}  // namespace KokkosLapack
+
+// Generic Host side LAPACK (could be MKL or whatever)
+#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK
+#include <KokkosLapack_Host_tpl.hpp>
+
+namespace KokkosLapack {
+namespace Impl {
+
+template <class AViewType, class BViewType, class IPIVViewType>
+void lapackGeqrfWrapper(const AViewType& A, const BViewType& B,
+                       const IPIVViewType& IPIV) {
+  using Scalar = typename AViewType::non_const_value_type;
+
+  const bool with_pivot = !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr));
+
+  const int N    = static_cast<int>(A.extent(1));
+  const int AST  = static_cast<int>(A.stride(1));
+  const int LDA  = (AST == 0) ? 1 : AST;
+  const int BST  = static_cast<int>(B.stride(1));
+  const int LDB  = (BST == 0) ? 1 : BST;
+  const int NRHS = static_cast<int>(B.extent(1));
+
+  int info = 0;
+
+  if (with_pivot) {
+    if constexpr (Kokkos::ArithTraits<Scalar>::is_complex) {
+      using MagType = typename Kokkos::ArithTraits<Scalar>::mag_type;
+
+      HostLapack<std::complex<MagType>>::geqrf(
+          N, NRHS, reinterpret_cast<std::complex<MagType>*>(A.data()), LDA,
+          IPIV.data(), reinterpret_cast<std::complex<MagType>*>(B.data()), LDB,
+          info);
+    } else {
+      HostLapack<Scalar>::geqrf(N, NRHS, A.data(), LDA, IPIV.data(), B.data(),
+                               LDB, info);
+    }
+  }
+}
+
+#define KOKKOSLAPACK_GEQRF_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE)         \
+  template <>                                                                  \
+  struct GEQRF<                                                                 \
+      EXECSPACE,                                                               \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,     \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,     \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      Kokkos::View<int*, LAYOUT, Kokkos::Device<EXECSPACE, Kokkos::HostSpace>, \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      true,                                                                    \
+      geqrf_eti_spec_avail<                                                     \
+          EXECSPACE,                                                           \
+          Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>, \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+          Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>, \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+          Kokkos::View<int*, LAYOUT,                                           \
+                       Kokkos::Device<EXECSPACE, Kokkos::HostSpace>,           \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
+    using AViewType =                                                          \
+        Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,   \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+    using BViewType =                                                          \
+        Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,   \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+    using PViewType =                                                          \
+        Kokkos::View<int*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,       \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+                                                                               \
+    static void geqrf(const EXECSPACE& /* space */, const AViewType& A,         \
+                     const BViewType& B, const PViewType& IPIV) {              \
+      Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR   \
+                                    "]");                                      \
+      geqrf_print_specialization<AViewType, BViewType, PViewType>();            \
+      lapackGeqrfWrapper(A, B, IPIV);                                           \
+      Kokkos::Profiling::popRegion();                                          \
+    }                                                                          \
+  };
+
+#if defined(KOKKOS_ENABLE_SERIAL)
+KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial,
+                         Kokkos::HostSpace)
+KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial,
+                         Kokkos::HostSpace)
+KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<float>, Kokkos::LayoutLeft,
+                         Kokkos::Serial, Kokkos::HostSpace)
+KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<double>, Kokkos::LayoutLeft,
+                         Kokkos::Serial, Kokkos::HostSpace)
+#endif
+
+#if defined(KOKKOS_ENABLE_OPENMP)
+KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP,
+                         Kokkos::HostSpace)
+KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP,
+                         Kokkos::HostSpace)
+KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<float>, Kokkos::LayoutLeft,
+                         Kokkos::OpenMP, Kokkos::HostSpace)
+KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<double>, Kokkos::LayoutLeft,
+                         Kokkos::OpenMP, Kokkos::HostSpace)
+#endif
+
+#if defined(KOKKOS_ENABLE_THREADS)
+KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Threads,
+                         Kokkos::HostSpace)
+KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Threads,
+                         Kokkos::HostSpace)
+KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<float>, Kokkos::LayoutLeft,
+                         Kokkos::Threads, Kokkos::HostSpace)
+KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<double>, Kokkos::LayoutLeft,
+                         Kokkos::Threads, Kokkos::HostSpace)
+#endif
+
+}  // namespace Impl
+}  // namespace KokkosLapack
+#endif  // KOKKOSKERNELS_ENABLE_TPL_LAPACK
+
+// MAGMA
+#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA
+#include <KokkosLapack_magma.hpp>
+
+namespace KokkosLapack {
+namespace Impl {
+
+template <class ExecSpace, class AViewType, class BViewType, class IPIVViewType>
+void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A,
+                      const BViewType& B, const IPIVViewType& IPIV) {
+  using scalar_type = typename AViewType::non_const_value_type;
+
+  Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_MAGMA," +
+                                Kokkos::ArithTraits<scalar_type>::name() + "]");
+  geqrf_print_specialization<AViewType, BViewType, IPIVViewType>();
+
+  const bool with_pivot = !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr));
+
+  magma_int_t N    = static_cast<magma_int_t>(A.extent(1));
+  magma_int_t AST  = static_cast<magma_int_t>(A.stride(1));
+  magma_int_t LDA  = (AST == 0) ? 1 : AST;
+  magma_int_t BST  = static_cast<magma_int_t>(B.stride(1));
+  magma_int_t LDB  = (BST == 0) ? 1 : BST;
+  magma_int_t NRHS = static_cast<magma_int_t>(B.extent(1));
+
+  KokkosLapack::Impl::MagmaSingleton& s =
+      KokkosLapack::Impl::MagmaSingleton::singleton();
+  magma_int_t info = 0;
+
+  space.fence();
+  if constexpr (std::is_same_v<scalar_type, float>) {
+    if (with_pivot) {
+      magma_sgeqrf_gpu(N, NRHS, reinterpret_cast<magmaFloat_ptr>(A.data()), LDA,
+                      IPIV.data(), reinterpret_cast<magmaFloat_ptr>(B.data()),
+                      LDB, &info);
+    } else {
+      magma_sgeqrf_nopiv_gpu(N, NRHS, reinterpret_cast<magmaFloat_ptr>(A.data()),
+                            LDA, reinterpret_cast<magmaFloat_ptr>(B.data()),
+                            LDB, &info);
+    }
+  }
+
+  if constexpr (std::is_same_v<scalar_type, double>) {
+    if (with_pivot) {
+      magma_dgeqrf_gpu(N, NRHS, reinterpret_cast<magmaDouble_ptr>(A.data()), LDA,
+                      IPIV.data(), reinterpret_cast<magmaDouble_ptr>(B.data()),
+                      LDB, &info);
+    } else {
+      magma_dgeqrf_nopiv_gpu(
+          N, NRHS, reinterpret_cast<magmaDouble_ptr>(A.data()), LDA,
+          reinterpret_cast<magmaDouble_ptr>(B.data()), LDB, &info);
+    }
+  }
+
+  if constexpr (std::is_same_v<scalar_type, Kokkos::complex<float>>) {
+    if (with_pivot) {
+      magma_cgeqrf_gpu(
+          N, NRHS, reinterpret_cast<magmaFloatComplex_ptr>(A.data()), LDA,
+          IPIV.data(), reinterpret_cast<magmaFloatComplex_ptr>(B.data()), LDB,
+          &info);
+    } else {
+      magma_cgeqrf_nopiv_gpu(
+          N, NRHS, reinterpret_cast<magmaFloatComplex_ptr>(A.data()), LDA,
+          reinterpret_cast<magmaFloatComplex_ptr>(B.data()), LDB, &info);
+    }
+  }
+
+  if constexpr (std::is_same_v<scalar_type, Kokkos::complex<double>>) {
+    if (with_pivot) {
+      magma_zgeqrf_gpu(
+          N, NRHS, reinterpret_cast<magmaDoubleComplex_ptr>(A.data()), LDA,
+          IPIV.data(), reinterpret_cast<magmaDoubleComplex_ptr>(B.data()), LDB,
+          &info);
+    } else {
+      magma_zgeqrf_nopiv_gpu(
+          N, NRHS, reinterpret_cast<magmaDoubleComplex_ptr>(A.data()), LDA,
+          reinterpret_cast<magmaDoubleComplex_ptr>(B.data()), LDB, &info);
+    }
+  }
+  ExecSpace().fence();
+  Kokkos::Profiling::popRegion();
+}
+
+#define KOKKOSLAPACK_GEQRF_MAGMA(SCALAR, LAYOUT, MEM_SPACE)                    \
+  template <>                                                                 \
+  struct GEQRF<                                                                \
+      Kokkos::Cuda,                                                           \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>, \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                  \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>, \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                  \
+      Kokkos::View<magma_int_t*, LAYOUT,                                      \
+                   Kokkos::Device<Kokkos::DefaultHostExecutionSpace,          \
+                                  Kokkos::HostSpace>,                         \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                  \
+      true,                                                                   \
+      geqrf_eti_spec_avail<                                                    \
+          Kokkos::Cuda,                                                       \
+          Kokkos::View<SCALAR**, LAYOUT,                                      \
+                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,               \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
+          Kokkos::View<SCALAR**, LAYOUT,                                      \
+                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,               \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
+          Kokkos::View<magma_int_t*, LAYOUT,                                  \
+                       Kokkos::Device<Kokkos::DefaultHostExecutionSpace,      \
+                                      Kokkos::HostSpace>,                     \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {    \
+    using AViewType = Kokkos::View<SCALAR**, LAYOUT,                          \
+                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,   \
+                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;  \
+    using BViewType = Kokkos::View<SCALAR**, LAYOUT,                          \
+                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,   \
+                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;  \
+    using PViewType = Kokkos::View<                                           \
+        magma_int_t*, LAYOUT,                                                 \
+        Kokkos::Device<Kokkos::DefaultHostExecutionSpace, Kokkos::HostSpace>, \
+        Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                             \
+                                                                              \
+    static void geqrf(const Kokkos::Cuda& space, const AViewType& A,           \
+                     const BViewType& B, const PViewType& IPIV) {             \
+      magmaGeqrfWrapper(space, A, B, IPIV);                                    \
+    }                                                                         \
+  };
+
+KOKKOSLAPACK_GEQRF_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace)
+KOKKOSLAPACK_GEQRF_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaSpace)
+KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex<float>, Kokkos::LayoutLeft,
+                        Kokkos::CudaSpace)
+KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex<double>, Kokkos::LayoutLeft,
+                        Kokkos::CudaSpace)
+
+}  // namespace Impl
+}  // namespace KokkosLapack
+#endif  // KOKKOSKERNELS_ENABLE_TPL_MAGMA
+
+// CUSOLVER
+#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER
+#include "KokkosLapack_cusolver.hpp"
+
+namespace KokkosLapack {
+namespace Impl {
+
+template <class ExecutionSpace, class IPIVViewType, class AViewType,
+          class BViewType>
+void cusolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV,
+                         const AViewType& A, const BViewType& B) {
+  using memory_space = typename AViewType::memory_space;
+  using Scalar       = typename BViewType::non_const_value_type;
+  using ALayout_t    = typename AViewType::array_layout;
+  using BLayout_t    = typename BViewType::array_layout;
+
+  const int m   = A.extent_int(0);
+  const int n   = A.extent_int(1);
+  const int lda = std::is_same_v<ALayout_t, Kokkos::LayoutRight> ? A.stride(0)
+                                                                 : A.stride(1);
+
+  (void)B;
+
+  const int nrhs = B.extent_int(1);
+  const int ldb  = std::is_same_v<BLayout_t, Kokkos::LayoutRight> ? B.stride(0)
+                                                                 : B.stride(1);
+  int lwork = 0;
+  Kokkos::View<int, memory_space> info("getrf info");
+
+  CudaLapackSingleton& s = CudaLapackSingleton::singleton();
+  KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
+      cusolverDnSetStream(s.handle, space.cuda_stream()));
+  if constexpr (std::is_same_v<Scalar, float>) {
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
+        cusolverDnSgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork));
+    Kokkos::View<float*, memory_space> Workspace("getrf workspace", lwork);
+
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgetrf(s.handle, m, n, A.data(),
+                                                    lda, Workspace.data(),
+                                                    IPIV.data(), info.data()));
+
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
+        cusolverDnSgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda,
+                         IPIV.data(), B.data(), ldb, info.data()));
+  }
+  if constexpr (std::is_same_v<Scalar, double>) {
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
+        cusolverDnDgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork));
+    Kokkos::View<double*, memory_space> Workspace("getrf workspace", lwork);
+
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgetrf(s.handle, m, n, A.data(),
+                                                    lda, Workspace.data(),
+                                                    IPIV.data(), info.data()));
+
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
+        cusolverDnDgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda,
+                         IPIV.data(), B.data(), ldb, info.data()));
+  }
+  if constexpr (std::is_same_v<Scalar, Kokkos::complex<float>>) {
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrf_bufferSize(
+        s.handle, m, n, reinterpret_cast<cuComplex*>(A.data()), lda, &lwork));
+    Kokkos::View<cuComplex*, memory_space> Workspace("getrf workspace", lwork);
+
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
+        cusolverDnCgetrf(s.handle, m, n, reinterpret_cast<cuComplex*>(A.data()),
+                         lda, reinterpret_cast<cuComplex*>(Workspace.data()),
+                         IPIV.data(), info.data()));
+
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrs(
+        s.handle, CUBLAS_OP_N, m, nrhs, reinterpret_cast<cuComplex*>(A.data()),
+        lda, IPIV.data(), reinterpret_cast<cuComplex*>(B.data()), ldb,
+        info.data()));
+  }
+  if constexpr (std::is_same_v<Scalar, Kokkos::complex<double>>) {
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrf_bufferSize(
+        s.handle, m, n, reinterpret_cast<cuDoubleComplex*>(A.data()), lda,
+        &lwork));
+    Kokkos::View<cuDoubleComplex*, memory_space> Workspace("getrf workspace",
+                                                           lwork);
+
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrf(
+        s.handle, m, n, reinterpret_cast<cuDoubleComplex*>(A.data()), lda,
+        reinterpret_cast<cuDoubleComplex*>(Workspace.data()), IPIV.data(),
+        info.data()));
+
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrs(
+        s.handle, CUBLAS_OP_N, m, nrhs,
+        reinterpret_cast<cuDoubleComplex*>(A.data()), lda, IPIV.data(),
+        reinterpret_cast<cuDoubleComplex*>(B.data()), ldb, info.data()));
+  }
+  KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL));
+}
+
+#define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE)                  \
+  template <>                                                                  \
+  struct GEQRF<                                                                 \
+      Kokkos::Cuda,                                                            \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,  \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,  \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,      \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      true,                                                                    \
+      geqrf_eti_spec_avail<                                                     \
+          Kokkos::Cuda,                                                        \
+          Kokkos::View<SCALAR**, LAYOUT,                                       \
+                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+          Kokkos::View<SCALAR**, LAYOUT,                                       \
+                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+          Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,  \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
+    using AViewType = Kokkos::View<SCALAR**, LAYOUT,                           \
+                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
+                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;   \
+    using BViewType = Kokkos::View<SCALAR**, LAYOUT,                           \
+                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
+                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;   \
+    using PViewType =                                                          \
+        Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+                                                                               \
+    static void geqrf(const Kokkos::Cuda& space, const AViewType& A,            \
+                     const BViewType& B, const PViewType& IPIV) {              \
+      Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR \
+                                    "]");                                      \
+      geqrf_print_specialization<AViewType, BViewType, PViewType>();            \
+                                                                               \
+      cusolverGeqrfWrapper(space, IPIV, A, B);                                  \
+      Kokkos::Profiling::popRegion();                                          \
+    }                                                                          \
+  };
+
+KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace)
+KOKKOSLAPACK_GEQRF_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace)
+KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<float>, Kokkos::LayoutLeft,
+                           Kokkos::CudaSpace)
+KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
+                           Kokkos::CudaSpace)
+
+#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE)
+KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace)
+KOKKOSLAPACK_GEQRF_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace)
+KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<float>, Kokkos::LayoutLeft,
+                           Kokkos::CudaUVMSpace)
+KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
+                           Kokkos::CudaUVMSpace)
+#endif
+
+}  // namespace Impl
+}  // namespace KokkosLapack
+#endif  // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER
+
+// ROCSOLVER
+#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER
+#include <KokkosBlas_tpl_spec.hpp>
+#include <rocsolver/rocsolver.h>
+
+namespace KokkosLapack {
+namespace Impl {
+
+template <class ExecutionSpace, class IPIVViewType, class AViewType,
+          class BViewType>
+void rocsolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV,
+                          const AViewType& A, const BViewType& B) {
+  using Scalar    = typename BViewType::non_const_value_type;
+  using ALayout_t = typename AViewType::array_layout;
+  using BLayout_t = typename BViewType::array_layout;
+
+  const rocblas_int N    = static_cast<rocblas_int>(A.extent(0));
+  const rocblas_int nrhs = static_cast<rocblas_int>(B.extent(1));
+  const rocblas_int lda  = std::is_same_v<ALayout_t, Kokkos::LayoutRight>
+                              ? A.stride(0)
+                              : A.stride(1);
+  const rocblas_int ldb = std::is_same_v<BLayout_t, Kokkos::LayoutRight>
+                              ? B.stride(0)
+                              : B.stride(1);
+  Kokkos::View<rocblas_int, ExecutionSpace> info("rocsolver info");
+
+  KokkosBlas::Impl::RocBlasSingleton& s =
+      KokkosBlas::Impl::RocBlasSingleton::singleton();
+  KOKKOS_ROCBLAS_SAFE_CALL_IMPL(
+      rocblas_set_stream(s.handle, space.hip_stream()));
+  if constexpr (std::is_same_v<Scalar, float>) {
+    KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_sgeqrf(s.handle, N, nrhs, A.data(),
+                                                  lda, IPIV.data(), B.data(),
+                                                  ldb, info.data()));
+  }
+  if constexpr (std::is_same_v<Scalar, double>) {
+    KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_dgeqrf(s.handle, N, nrhs, A.data(),
+                                                  lda, IPIV.data(), B.data(),
+                                                  ldb, info.data()));
+  }
+  if constexpr (std::is_same_v<Scalar, Kokkos::complex<float>>) {
+    KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgeqrf(
+        s.handle, N, nrhs, reinterpret_cast<rocblas_float_complex*>(A.data()),
+        lda, IPIV.data(), reinterpret_cast<rocblas_float_complex*>(B.data()),
+        ldb, info.data()));
+  }
+  if constexpr (std::is_same_v<Scalar, Kokkos::complex<double>>) {
+    KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgeqrf(
+        s.handle, N, nrhs, reinterpret_cast<rocblas_double_complex*>(A.data()),
+        lda, IPIV.data(), reinterpret_cast<rocblas_double_complex*>(B.data()),
+        ldb, info.data()));
+  }
+  KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL));
+}
+
+#define KOKKOSLAPACK_GEQRF_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE)                 \
+  template <>                                                                  \
+  struct GEQRF<                                                                 \
+      Kokkos::HIP,                                                             \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,   \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,   \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      Kokkos::View<rocblas_int*, LAYOUT,                                       \
+                   Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                     \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      true,                                                                    \
+      geqrf_eti_spec_avail<                                                     \
+          Kokkos::HIP,                                                         \
+          Kokkos::View<SCALAR**, LAYOUT,                                       \
+                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+          Kokkos::View<SCALAR**, LAYOUT,                                       \
+                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+          Kokkos::View<rocblas_int*, LAYOUT,                                   \
+                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
+    using AViewType =                                                          \
+        Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+    using BViewType =                                                          \
+        Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+    using PViewType = Kokkos::View<rocblas_int*, LAYOUT,                       \
+                                   Kokkos::Device<Kokkos::HIP, MEM_SPACE>,     \
+                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;   \
+                                                                               \
+    static void geqrf(const Kokkos::HIP& space, const AViewType& A,             \
+                     const BViewType& B, const PViewType& IPIV) {              \
+      Kokkos::Profiling::pushRegion(                                           \
+          "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]");                    \
+      geqrf_print_specialization<AViewType, BViewType, PViewType>();            \
+                                                                               \
+      rocsolverGeqrfWrapper(space, IPIV, A, B);                                 \
+      Kokkos::Profiling::popRegion();                                          \
+    }                                                                          \
+  };
+
+KOKKOSLAPACK_GEQRF_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace)
+KOKKOSLAPACK_GEQRF_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace)
+KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex<float>, Kokkos::LayoutLeft,
+                            Kokkos::HIPSpace)
+KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
+                            Kokkos::HIPSpace)
+
+}  // namespace Impl
+}  // namespace KokkosLapack
+#endif  // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER
+
+#endif
diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
new file mode 100644
index 0000000000..f9e93180b1
--- /dev/null
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -0,0 +1,444 @@
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 4.0
+//       Copyright (2022) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
+// See https://kokkos.org/LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//@HEADER
+
+// only enable this test where KokkosLapack supports geqrf:
+// CUDA+(MAGMA or CUSOLVER), HIP+ROCSOLVER and HOST+LAPACK
+#if (defined(TEST_CUDA_LAPACK_CPP) &&                                       \
+     (defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) ||                            \
+      defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER))) ||                       \
+    (defined(TEST_HIP_LAPACK_CPP) &&                                        \
+     defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) ||                        \
+    (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) &&                            \
+     (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \
+      defined(TEST_THREADS_LAPACK_CPP)))
+
+#include <gtest/gtest.h>
+#include <Kokkos_Core.hpp>
+#include <Kokkos_Random.hpp>
+
+#include <KokkosLapack_geqrf.hpp>
+#include <KokkosBlas2_gemv.hpp>
+#include <KokkosBlas3_gemm.hpp>
+#include <KokkosKernels_TestUtils.hpp>
+
+namespace Test {
+
+template <class ViewTypeA, class ViewTypeB, class Device, bool MAGMA>
+void impl_test_geqrf(const char* mode, const char* padding, int N) {
+  using execution_space = typename Device::execution_space;
+  using ScalarA         = typename ViewTypeA::value_type;
+  using ats             = Kokkos::ArithTraits<ScalarA>;
+
+  execution_space space{};
+
+  Kokkos::Random_XorShift64_Pool<execution_space> rand_pool(13718);
+
+  int ldda, lddb;
+
+  if (padding[0] == 'Y') {  // rounded up to multiple of 32
+    ldda = ((N + 32 - 1) / 32) * 32;
+    lddb = ldda;
+  } else {
+    ldda = N;
+    lddb = N;
+  }
+
+  // Create device views
+  ViewTypeA A("A", ldda, N);
+  ViewTypeB X0("X0", N);
+  ViewTypeB B("B", lddb);
+
+  // Create host mirrors of device views.
+  typename ViewTypeB::HostMirror h_X0 = Kokkos::create_mirror_view(X0);
+  typename ViewTypeB::HostMirror h_B  = Kokkos::create_mirror(B);
+
+  // Initialize data.
+  Kokkos::fill_random(
+      A, rand_pool,
+      Kokkos::rand<Kokkos::Random_XorShift64<execution_space>, ScalarA>::max());
+  Kokkos::fill_random(
+      X0, rand_pool,
+      Kokkos::rand<Kokkos::Random_XorShift64<execution_space>, ScalarA>::max());
+
+  // Generate RHS B = A*X0.
+  ScalarA alpha = 1.0;
+  ScalarA beta  = 0.0;
+
+  KokkosBlas::gemv("N", alpha, A, X0, beta, B);
+  Kokkos::fence();
+
+  // Deep copy device view to host view.
+  Kokkos::deep_copy(h_X0, X0);
+
+  // Allocate IPIV view on host
+  using ViewTypeP = typename std::conditional<
+      MAGMA, Kokkos::View<int*, Kokkos::LayoutLeft, Kokkos::HostSpace>,
+      Kokkos::View<int*, Kokkos::LayoutLeft, execution_space>>::type;
+  ViewTypeP ipiv;
+  int Nt = 0;
+  if (mode[0] == 'Y') {
+    Nt   = N;
+    ipiv = ViewTypeP("IPIV", Nt);
+  }
+
+  // Solve.
+  try {
+    KokkosLapack::geqrf(space, A, B, ipiv);
+  } catch (const std::runtime_error& error) {
+    // Check for expected runtime errors due to:
+    // no-pivoting case (note: only MAGMA supports no-pivoting interface)
+    // and no-tpl case
+    bool nopivot_runtime_err = false;
+    bool notpl_runtime_err   = false;
+#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA   // have MAGMA TPL
+#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK  // and have LAPACK TPL
+    nopivot_runtime_err = (!std::is_same<typename Device::memory_space,
+                                         Kokkos::CudaSpace>::value) &&
+                          (ipiv.extent(0) == 0) && (ipiv.data() == nullptr);
+    notpl_runtime_err = false;
+#else
+    notpl_runtime_err = true;
+#endif
+#else                                   // not have MAGMA TPL
+#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK  // but have LAPACK TPL
+    nopivot_runtime_err = (ipiv.extent(0) == 0) && (ipiv.data() == nullptr);
+    notpl_runtime_err   = false;
+#else
+    notpl_runtime_err = true;
+#endif
+#endif
+    if (!nopivot_runtime_err && !notpl_runtime_err) FAIL();
+    return;
+  }
+  Kokkos::fence();
+
+  // Get the solution vector.
+  Kokkos::deep_copy(h_B, B);
+
+  // Checking vs ref on CPU, this eps is about 10^-9
+  typedef typename ats::mag_type mag_type;
+  const mag_type eps = 3.0e7 * ats::epsilon();
+  bool test_flag     = true;
+  for (int i = 0; i < N; i++) {
+    if (ats::abs(h_B(i) - h_X0(i)) > eps) {
+      test_flag = false;
+      printf(
+          "    Error %d, pivot %c, padding %c: result( %.15lf ) !="
+          "solution( %.15lf ) at (%d), error=%.15e, eps=%.15e\n",
+          N, mode[0], padding[0], ats::abs(h_B(i)), ats::abs(h_X0(i)), int(i),
+          ats::abs(h_B(i) - h_X0(i)), eps);
+      break;
+    }
+  }
+  ASSERT_EQ(test_flag, true);
+}
+
+template <class ViewTypeA, class ViewTypeB, class Device, bool MAGMA>
+void impl_test_geqrf_mrhs(const char* mode, const char* padding, int N,
+                         int nrhs) {
+  using execution_space = typename Device::execution_space;
+  using ScalarA         = typename ViewTypeA::value_type;
+  using ats             = Kokkos::ArithTraits<ScalarA>;
+
+  execution_space space{};
+
+  Kokkos::Random_XorShift64_Pool<execution_space> rand_pool(13718);
+
+  int ldda, lddb;
+
+  if (padding[0] == 'Y') {  // rounded up to multiple of 32
+    ldda = ((N + 32 - 1) / 32) * 32;
+    lddb = ldda;
+  } else {
+    ldda = N;
+    lddb = N;
+  }
+
+  // Create device views
+  ViewTypeA A("A", ldda, N);
+  ViewTypeB X0("X0", N, nrhs);
+  ViewTypeB B("B", lddb, nrhs);
+
+  // Create host mirrors of device views.
+  typename ViewTypeB::HostMirror h_X0 = Kokkos::create_mirror_view(X0);
+  typename ViewTypeB::HostMirror h_B  = Kokkos::create_mirror(B);
+
+  // Initialize data.
+  Kokkos::fill_random(
+      A, rand_pool,
+      Kokkos::rand<Kokkos::Random_XorShift64<execution_space>, ScalarA>::max());
+  Kokkos::fill_random(
+      X0, rand_pool,
+      Kokkos::rand<Kokkos::Random_XorShift64<execution_space>, ScalarA>::max());
+
+  // Generate RHS B = A*X0.
+  ScalarA alpha = 1.0;
+  ScalarA beta  = 0.0;
+
+  KokkosBlas::gemm("N", "N", alpha, A, X0, beta, B);
+  Kokkos::fence();
+
+  // Deep copy device view to host view.
+  Kokkos::deep_copy(h_X0, X0);
+
+  // Allocate IPIV view on host
+  using ViewTypeP = typename std::conditional<
+      MAGMA, Kokkos::View<int*, Kokkos::LayoutLeft, Kokkos::HostSpace>,
+      Kokkos::View<int*, Kokkos::LayoutLeft, execution_space>>::type;
+  ViewTypeP ipiv;
+  int Nt = 0;
+  if (mode[0] == 'Y') {
+    Nt   = N;
+    ipiv = ViewTypeP("IPIV", Nt);
+  }
+
+  // Solve.
+  try {
+    KokkosLapack::geqrf(space, A, B, ipiv);
+  } catch (const std::runtime_error& error) {
+    // Check for expected runtime errors due to:
+    // no-pivoting case (note: only MAGMA supports no-pivoting interface)
+    // and no-tpl case
+    bool nopivot_runtime_err = false;
+    bool notpl_runtime_err   = false;
+#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA   // have MAGMA TPL
+#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK  // and have LAPACK TPL
+    nopivot_runtime_err = (!std::is_same<typename Device::memory_space,
+                                         Kokkos::CudaSpace>::value) &&
+                          (ipiv.extent(0) == 0) && (ipiv.data() == nullptr);
+    notpl_runtime_err = false;
+#else
+    notpl_runtime_err = true;
+#endif
+#else                                   // not have MAGMA TPL
+#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK  // but have LAPACK TPL
+    nopivot_runtime_err = (ipiv.extent(0) == 0) && (ipiv.data() == nullptr);
+    notpl_runtime_err   = false;
+#else
+    notpl_runtime_err = true;
+#endif
+#endif
+    if (!nopivot_runtime_err && !notpl_runtime_err) FAIL();
+    return;
+  }
+  Kokkos::fence();
+
+  // Get the solution vector.
+  Kokkos::deep_copy(h_B, B);
+
+  // Checking vs ref on CPU, this eps is about 10^-9
+  typedef typename ats::mag_type mag_type;
+  const mag_type eps = 1.0e7 * ats::epsilon();
+  bool test_flag     = true;
+  for (int j = 0; j < nrhs; j++) {
+    for (int i = 0; i < N; i++) {
+      if (ats::abs(h_B(i, j) - h_X0(i, j)) > eps) {
+        test_flag = false;
+        // printf( "    Error %d, pivot %c, padding %c: result( %.15lf ) !=
+        // solution( %.15lf ) at (%ld) at rhs %d\n", N, mode[0], padding[0],
+        // ats::abs(h_B(i,j)), ats::abs(h_X0(i,j)), i, j );
+        break;
+      }
+    }
+    if (test_flag == false) break;
+  }
+  ASSERT_EQ(test_flag, true);
+}
+
+}  // namespace Test
+
+template <class Scalar, class Device>
+int test_geqrf(const char* mode) {
+#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \
+    (!defined(KOKKOSKERNELS_ETI_ONLY) &&      \
+     !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
+  using view_type_a_ll = Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device>;
+  using view_type_b_ll = Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device>;
+
+#if (defined(TEST_CUDA_LAPACK_CPP) &&                                       \
+     defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) ||                         \
+    (defined(TEST_HIP_LAPACK_CPP) &&                                        \
+     defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) ||                        \
+    (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) &&                            \
+     (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \
+      defined(TEST_THREADS_LAPACK_CPP)))
+  Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, false>(
+      &mode[0], "N", 2);  // no padding
+  Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, false>(
+      &mode[0], "N", 13);  // no padding
+  Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, false>(
+      &mode[0], "N", 179);  // no padding
+  Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, false>(
+      &mode[0], "N", 64);  // no padding
+  Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, false>(
+      &mode[0], "N", 1024);  // no padding
+
+#elif defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && defined(KOKKOS_ENABLE_CUDA)
+  if constexpr (std::is_same_v<Kokkos::Cuda,
+                               typename Device::execution_space>) {
+    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "N", 2);  // no padding
+    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "N", 13);  // no padding
+    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "N", 179);  // no padding
+    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "N", 64);  // no padding
+    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "N", 1024);  // no padding
+
+    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "Y",
+        13);  // padding
+    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "Y",
+        179);  // padding
+  }
+#endif
+#endif
+
+  // Supress unused parameters on CUDA10
+  (void)mode;
+  return 1;
+}
+
+template <class Scalar, class Device>
+int test_geqrf_mrhs(const char* mode) {
+#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \
+    (!defined(KOKKOSKERNELS_ETI_ONLY) &&      \
+     !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
+  using view_type_a_ll = Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device>;
+  using view_type_b_ll = Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device>;
+
+#if (defined(TEST_CUDA_LAPACK_CPP) &&                                       \
+     defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) ||                         \
+    (defined(TEST_HIP_LAPACK_CPP) &&                                        \
+     defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) ||                        \
+    (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) &&                            \
+     (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \
+      defined(TEST_THREADS_LAPACK_CPP)))
+  Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, false>(
+      &mode[0], "N", 2, 5);  // no padding
+  Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, false>(
+      &mode[0], "N", 13, 5);  // no padding
+  Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, false>(
+      &mode[0], "N", 179, 5);  // no padding
+  Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, false>(
+      &mode[0], "N", 64, 5);  // no padding
+  Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, false>(
+      &mode[0], "N", 1024, 5);  // no padding
+
+// When appropriate run MAGMA specific tests
+#elif defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && defined(KOKKOS_ENABLE_CUDA)
+  if constexpr (std::is_same_v<Kokkos::Cuda,
+                               typename Device::execution_space>) {
+    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "N", 2, 5);  // no padding
+    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "N", 13, 5);  // no padding
+    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "N", 179, 5);  // no padding
+    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "N", 64, 5);  // no padding
+    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "N", 1024, 5);  // no padding
+
+    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "Y", 13, 5);  // padding
+    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
+        &mode[0], "Y", 179, 5);  // padding
+  }
+#endif
+#endif
+
+  // Supress unused parameters on CUDA10
+  (void)mode;
+  return 1;
+}
+
+#if defined(KOKKOSKERNELS_INST_FLOAT) || \
+    (!defined(KOKKOSKERNELS_ETI_ONLY) && \
+     !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
+TEST_F(TestCategory, geqrf_float) {
+  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_float");
+  test_geqrf<float, TestDevice>("N");  // No pivoting
+  test_geqrf<float, TestDevice>("Y");  // Partial pivoting
+  Kokkos::Profiling::popRegion();
+}
+
+TEST_F(TestCategory, geqrf_mrhs_float) {
+  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_float");
+  test_geqrf_mrhs<float, TestDevice>("N");  // No pivoting
+  test_geqrf_mrhs<float, TestDevice>("Y");  // Partial pivoting
+  Kokkos::Profiling::popRegion();
+}
+#endif
+
+#if defined(KOKKOSKERNELS_INST_DOUBLE) || \
+    (!defined(KOKKOSKERNELS_ETI_ONLY) &&  \
+     !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
+TEST_F(TestCategory, geqrf_double) {
+  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_double");
+  test_geqrf<double, TestDevice>("N");  // No pivoting
+  test_geqrf<double, TestDevice>("Y");  // Partial pivoting
+  Kokkos::Profiling::popRegion();
+}
+
+TEST_F(TestCategory, geqrf_mrhs_double) {
+  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_double");
+  test_geqrf_mrhs<double, TestDevice>("N");  // No pivoting
+  test_geqrf_mrhs<double, TestDevice>("Y");  // Partial pivoting
+  Kokkos::Profiling::popRegion();
+}
+#endif
+
+#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \
+    (!defined(KOKKOSKERNELS_ETI_ONLY) &&          \
+     !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
+TEST_F(TestCategory, geqrf_complex_double) {
+  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_double");
+  test_geqrf<Kokkos::complex<double>, TestDevice>("N");  // No pivoting
+  test_geqrf<Kokkos::complex<double>, TestDevice>("Y");  // Partial pivoting
+  Kokkos::Profiling::popRegion();
+}
+
+TEST_F(TestCategory, geqrf_mrhs_complex_double) {
+  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_complex_double");
+  test_geqrf_mrhs<Kokkos::complex<double>, TestDevice>("N");  // No pivoting
+  test_geqrf_mrhs<Kokkos::complex<double>, TestDevice>("Y");  // Partial pivoting
+  Kokkos::Profiling::popRegion();
+}
+#endif
+
+#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \
+    (!defined(KOKKOSKERNELS_ETI_ONLY) &&         \
+     !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
+TEST_F(TestCategory, geqrf_complex_float) {
+  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_float");
+  test_geqrf<Kokkos::complex<float>, TestDevice>("N");  // No pivoting
+  test_geqrf<Kokkos::complex<float>, TestDevice>("Y");  // Partial pivoting
+  Kokkos::Profiling::popRegion();
+}
+
+TEST_F(TestCategory, geqrf_mrhs_complex_float) {
+  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_complex_float");
+  test_geqrf_mrhs<Kokkos::complex<float>, TestDevice>("N");  // No pivoting
+  test_geqrf_mrhs<Kokkos::complex<float>, TestDevice>("Y");  // Partial pivoting
+  Kokkos::Profiling::popRegion();
+}
+#endif
+
+#endif  // CUDA+(MAGMA or CUSOLVER) or HIP+ROCSOLVER or LAPACK+HOST

From 04fbf5fda48de0c56705799b59e0aca9607781c3 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Tue, 14 May 2024 05:46:53 -0600
Subject: [PATCH 02/27] Backup

---
 lapack/CMakeLists.txt                         |   7 +
 lapack/impl/KokkosLapack_geqrf_spec.hpp       |  13 +-
 lapack/src/KokkosLapack_geqrf.hpp             | 116 ++++--------
 .../KokkosLapack_geqrf_tpl_spec_avail.hpp     |   8 -
 .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 171 +++++-------------
 lapack/unit_test/Test_Lapack_geqrf.hpp        |   2 +
 6 files changed, 90 insertions(+), 227 deletions(-)

diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt
index 804a2b7542..2bd27c3681 100644
--- a/lapack/CMakeLists.txt
+++ b/lapack/CMakeLists.txt
@@ -71,3 +71,10 @@ KOKKOSKERNELS_GENERATE_ETI(Lapack_svd svd
   SOURCE_LIST SOURCES
   TYPE_LISTS  FLOATS LAYOUTS DEVICES
 )
+
+KOKKOSKERNELS_GENERATE_ETI(Lapack_geqrf geqrf
+  COMPONENTS  lapack
+  HEADER_LIST ETI_HEADERS
+  SOURCE_LIST SOURCES
+  TYPE_LISTS  FLOATS LAYOUTS DEVICES
+)
diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp
index d0083cb151..b990e7550f 100644
--- a/lapack/impl/KokkosLapack_geqrf_spec.hpp
+++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp
@@ -50,10 +50,7 @@ struct geqrf_eti_spec_avail {
       Kokkos::View<SCALAR_TYPE **, LAYOUT_TYPE,                           \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
-      Kokkos::View<SCALAR_TYPE **, LAYOUT_TYPE,                           \
-                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
-      Kokkos::View<int *, LAYOUT_TYPE,                                    \
+      Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                            \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {            \
     enum : bool { value = true };                                         \
@@ -83,7 +80,7 @@ struct GEQRF {
 //! Full specialization of geqrf for multi vectors.
 // Unification layer
 template <class ExecutionSpace, class AMatrix, class TWArray>
-struct GEQRF<ExecutionSpace, AMatrix, TWArray, TWArray, false,
+struct GEQRF<ExecutionSpace, AMatrix, TWArray, false,
             KOKKOSKERNELS_IMPL_COMPILE_LIBRARY> {
   static void geqrf(const ExecutionSpace & /* space */, const AMatrix & /* A */,
                    const TWArray & /* Tau */, const TWArray & /* Work */) {
@@ -115,9 +112,6 @@ struct GEQRF<ExecutionSpace, AMatrix, TWArray, TWArray, false,
       Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                           \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
-      Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                           \
-                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
       false, true>;
 
 #define KOKKOSLAPACK_GEQRF_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE,       \
@@ -130,9 +124,6 @@ struct GEQRF<ExecutionSpace, AMatrix, TWArray, TWArray, false,
       Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                           \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
-      Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                           \
-                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
       false, true>;
 
 #include <KokkosLapack_geqrf_tpl_spec_decl.hpp>
diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp
index ba360ad830..e5c59c996e 100644
--- a/lapack/src/KokkosLapack_geqrf.hpp
+++ b/lapack/src/KokkosLapack_geqrf.hpp
@@ -66,106 +66,56 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
                                  typename AMatrix::memory_space>::accessible);
   static_assert(
       Kokkos::SpaceAccessibility<ExecutionSpace,
-                                 typename BXMV::memory_space>::accessible);
-#if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA)
-  if constexpr (!std::is_same_v<ExecutionSpace, Kokkos::Cuda>) {
-    static_assert(
-        Kokkos::SpaceAccessibility<ExecutionSpace,
-                                   typename IPIVV::memory_space>::accessible);
-  }
-#else
-  static_assert(
-      Kokkos::SpaceAccessibility<ExecutionSpace,
-                                 typename IPIVV::memory_space>::accessible);
-#endif
+                                 typename TWArray::memory_space>::accessible);
+
   static_assert(Kokkos::is_view<AMatrix>::value,
                 "KokkosLapack::geqrf: A must be a Kokkos::View.");
-  static_assert(Kokkos::is_view<BXMV>::value,
-                "KokkosLapack::geqrf: B must be a Kokkos::View.");
-  static_assert(Kokkos::is_view<IPIVV>::value,
-                "KokkosLapack::geqrf: IPIV must be a Kokkos::View.");
+  static_assert(Kokkos::is_view<TWArray>::value,
+                "KokkosLapack::geqrf: Tau and Work must be Kokkos::View.");
   static_assert(static_cast<int>(AMatrix::rank) == 2,
                 "KokkosLapack::geqrf: A must have rank 2.");
-  static_assert(
-      static_cast<int>(BXMV::rank) == 1 || static_cast<int>(BXMV::rank) == 2,
-      "KokkosLapack::geqrf: B must have either rank 1 or rank 2.");
-  static_assert(static_cast<int>(IPIVV::rank) == 1,
-                "KokkosLapack::geqrf: IPIV must have rank 1.");
-
-  int64_t IPIV0 = IPIV.extent(0);
-  int64_t A0    = A.extent(0);
-  int64_t A1    = A.extent(1);
-  int64_t B0    = B.extent(0);
+  static_assert(static_cast<int>(TWArray::rank) == 1,
+                "KokkosLapack::geqrf: Tau and Work must have rank 1.");
 
-  // Check validity of pivot argument
-  bool valid_pivot =
-      (IPIV0 == A1) || ((IPIV0 == 0) && (IPIV.data() == nullptr));
-  if (!(valid_pivot)) {
-    std::ostringstream os;
-    os << "KokkosLapack::geqrf: IPIV: " << IPIV0 << ". "
-       << "Valid options include zero-extent 1-D view (no pivoting), or 1-D "
-          "View with size of "
-       << A0 << " (partial pivoting).";
-g    KokkosKernels::Impl::throw_runtime_exception(os.str());
-  }
+  int64_t m = A.extent(0);
+  int64_t n = A.extent(1);
 
-  // Check for no pivoting case. Only MAGMA supports no pivoting interface
-#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA   // have MAGMA TPL
-#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK  // and have LAPACK TPL
-  if ((!std::is_same<typename AMatrix::device_type::memory_space,
-                     Kokkos::CudaSpace>::value) &&
-      (IPIV0 == 0) && (IPIV.data() == nullptr)) {
+  // Check validity of dimensions
+  if (Tau.extent(0) != std::min(m,n)) {
     std::ostringstream os;
-    os << "KokkosLapack::geqrf: IPIV: " << IPIV0 << ". "
-       << "LAPACK TPL does not support no pivoting.";
+    os << "KokkosLapack::geqrf: length of Tau must be equal to min(m,n): "
+       << " A: " << m << " x " << n << ", Tau length = " << Tau.extent(0);
     KokkosKernels::Impl::throw_runtime_exception(os.str());
   }
-#endif
-#else                                   // not have MAGMA TPL
-#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK  // but have LAPACK TPL
-  if ((IPIV0 == 0) && (IPIV.data() == nullptr)) {
-    std::ostringstream os;
-    os << "KokkosLapack::geqrf: IPIV: " << IPIV0 << ". "
-       << "LAPACK TPL does not support no pivoting.";
-    KokkosKernels::Impl::throw_runtime_exception(os.str());
+  if ((m == 0) || (n == 0)) {
+    if (Work.extent(0) < 1) {
+      std::ostringstream os;
+      os << "KokkosLapack::geqrf: In case min(m,n) == 0, then Work must have length >= 1: "
+         << " A: " << m << " x " << n << ", Work length = " << Work.extent(0);
+      KokkosKernels::Impl::throw_runtime_exception(os.str());
+    }
   }
-#endif
-#endif
-
-  // Check compatibility of dimensions at run time.
-  if ((A0 < A1) || (A0 != B0)) {
-    std::ostringstream os;
-    os << "KokkosLapack::geqrf: Dimensions of A, and B do not match: "
-       << " A: " << A.extent(0) << " x " << A.extent(1) << " B: " << B.extent(0)
-       << " x " << B.extent(1);
-    KokkosKernels::Impl::throw_runtime_exception(os.str());
+  else {
+    if (Work.extent(0) < n) {
+      std::ostringstream os;
+      os << "KokkosLapack::geqrf: In case min(m,n) != 0, then Work must have length >= n: "
+         << " A: " << m << " x " << n << ", Work length = " << Work.extent(0);
+      KokkosKernels::Impl::throw_runtime_exception(os.str());
+    }
   }
 
   typedef Kokkos::View<
       typename AMatrix::non_const_value_type**, typename AMatrix::array_layout,
       typename AMatrix::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
       AMatrix_Internal;
-  typedef Kokkos::View<typename BXMV::non_const_value_type**,
-                       typename BXMV::array_layout, typename BXMV::device_type,
+  typedef Kokkos::View<typename TWArray::non_const_value_type*,
+                       typename TWArray::array_layout, typename TWArray::device_type,
                        Kokkos::MemoryTraits<Kokkos::Unmanaged> >
-      BXMV_Internal;
-  typedef Kokkos::View<
-      typename IPIVV::non_const_value_type*, typename IPIVV::array_layout,
-      typename IPIVV::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
-      IPIVV_Internal;
-  AMatrix_Internal A_i = A;
-  // BXMV_Internal B_i = B;
-  IPIVV_Internal IPIV_i = IPIV;
-
-  if (BXMV::rank == 1) {
-    auto B_i = BXMV_Internal(B.data(), B.extent(0), 1);
-    KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, BXMV_Internal,
-                             IPIVV_Internal>::geqrf(space, A_i, B_i, IPIV_i);
-  } else {  // BXMV::rank == 2
-    auto B_i = BXMV_Internal(B.data(), B.extent(0), B.extent(1));
-    KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, BXMV_Internal,
-                             IPIVV_Internal>::geqrf(space, A_i, B_i, IPIV_i);
-  }
+      TWArray_Internal;
+  AMatrix_Internal A_i    = A;
+  TWArray_Internal Tau_i  = Tau;
+  TWArray_Internal Work_i = Work;
+  KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, TWArray_Internal>::geqrf(space, A_i, Tau_i, Work_i);
 }
 
 /// \brief Computes a QR factorization of a matrix A
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
index 733f0510e0..d4f1ff107e 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
@@ -34,8 +34,6 @@ struct geqrf_tpl_spec_avail {
       ExecSpace,                                                           \
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>,  \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged> >,              \
-      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>,   \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,              \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>,   \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged> > > {           \
     enum : bool { value = true };                                          \
@@ -65,8 +63,6 @@ namespace Impl {
       Kokkos::Cuda,                                                          \
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>, \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                \
-      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>,  \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>,  \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged> > > {             \
     enum : bool { value = true };                                            \
@@ -95,8 +91,6 @@ namespace Impl {
       Kokkos::Cuda,                                                          \
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>, \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                \
-      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>,  \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>,  \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged> > > {             \
     enum : bool { value = true };                                            \
@@ -140,8 +134,6 @@ namespace Impl {
       Kokkos::HIP,                                                            \
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEMSPACE>,   \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                 \
-      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEMSPACE>,    \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                 \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEMSPACE>,    \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged> > > {              \
     enum : bool { value = true };                                             \
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index 8a5b37812d..9b6e1700a3 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -17,20 +17,20 @@
 #ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_
 #define KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_
 
+// AquiEEP
+
 namespace KokkosLapack {
 namespace Impl {
-template <class AViewType, class BViewType, class PViewType>
+template <class AViewType, class TWViewType>
 inline void geqrf_print_specialization() {
 #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA
-  printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s, %s >\n",
-         typeid(AViewType).name(), typeid(BViewType).name(),
-         typeid(PViewType).name());
+  printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s >\n",
+         typeid(AViewType).name(), typeid(TWViewType).name());
 #else
 #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK
-  printf("KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n",
-         typeid(AViewType).name(), typeid(BViewType).name(),
-         typeid(PViewType).name());
+  printf("KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s >\n",
+         typeid(AViewType).name(), typeid(TWViewType).name());
 #endif
 #endif
 #endif
@@ -45,13 +45,11 @@ inline void geqrf_print_specialization() {
 namespace KokkosLapack {
 namespace Impl {
 
-template <class AViewType, class BViewType, class IPIVViewType>
-void lapackGeqrfWrapper(const AViewType& A, const BViewType& B,
-                       const IPIVViewType& IPIV) {
+template <class AViewType, class TWViewType>
+void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau,
+                       const TWViewType& Work) {
   using Scalar = typename AViewType::non_const_value_type;
 
-  const bool with_pivot = !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr));
-
   const int N    = static_cast<int>(A.extent(1));
   const int AST  = static_cast<int>(A.stride(1));
   const int LDA  = (AST == 0) ? 1 : AST;
@@ -61,19 +59,8 @@ void lapackGeqrfWrapper(const AViewType& A, const BViewType& B,
 
   int info = 0;
 
-  if (with_pivot) {
-    if constexpr (Kokkos::ArithTraits<Scalar>::is_complex) {
-      using MagType = typename Kokkos::ArithTraits<Scalar>::mag_type;
-
-      HostLapack<std::complex<MagType>>::geqrf(
-          N, NRHS, reinterpret_cast<std::complex<MagType>*>(A.data()), LDA,
-          IPIV.data(), reinterpret_cast<std::complex<MagType>*>(B.data()), LDB,
-          info);
-    } else {
       HostLapack<Scalar>::geqrf(N, NRHS, A.data(), LDA, IPIV.data(), B.data(),
                                LDB, info);
-    }
-  }
 }
 
 #define KOKKOSLAPACK_GEQRF_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE)         \
@@ -82,36 +69,28 @@ void lapackGeqrfWrapper(const AViewType& A, const BViewType& B,
       EXECSPACE,                                                               \
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,     \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
-      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,     \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
-      Kokkos::View<int*, LAYOUT, Kokkos::Device<EXECSPACE, Kokkos::HostSpace>, \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,     \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
       true,                                                                    \
       geqrf_eti_spec_avail<                                                     \
           EXECSPACE,                                                           \
           Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>, \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-          Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>, \
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-          Kokkos::View<int*, LAYOUT,                                           \
-                       Kokkos::Device<EXECSPACE, Kokkos::HostSpace>,           \
+          Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>, \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
     using AViewType =                                                          \
         Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,   \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-    using BViewType =                                                          \
-        Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,   \
-                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-    using PViewType =                                                          \
-        Kokkos::View<int*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,       \
+    using TWViewType =                                                          \
+        Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,   \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
                                                                                \
     static void geqrf(const EXECSPACE& /* space */, const AViewType& A,         \
-                     const BViewType& B, const PViewType& IPIV) {              \
+                     const TWViewType& Tau, const TWViewType& Work) {              \
       Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR   \
                                     "]");                                      \
-      geqrf_print_specialization<AViewType, BViewType, PViewType>();            \
-      lapackGeqrfWrapper(A, B, IPIV);                                           \
+      geqrf_print_specialization<AViewType, TWViewType>();            \
+      lapackGeqrfWrapper(A, Tau, Work);                                           \
       Kokkos::Profiling::popRegion();                                          \
     }                                                                          \
   };
@@ -160,16 +139,14 @@ KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<double>, Kokkos::LayoutLeft,
 namespace KokkosLapack {
 namespace Impl {
 
-template <class ExecSpace, class AViewType, class BViewType, class IPIVViewType>
+template <class ExecSpace, class AViewType, class TWViewType>
 void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A,
-                      const BViewType& B, const IPIVViewType& IPIV) {
+                      const TWViewType& Tau, const TWViewType& Work) {
   using scalar_type = typename AViewType::non_const_value_type;
 
   Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_MAGMA," +
                                 Kokkos::ArithTraits<scalar_type>::name() + "]");
-  geqrf_print_specialization<AViewType, BViewType, IPIVViewType>();
-
-  const bool with_pivot = !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr));
+  geqrf_print_specialization<AViewType, TWViewType>();
 
   magma_int_t N    = static_cast<magma_int_t>(A.extent(1));
   magma_int_t AST  = static_cast<magma_int_t>(A.stride(1));
@@ -184,53 +161,27 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A,
 
   space.fence();
   if constexpr (std::is_same_v<scalar_type, float>) {
-    if (with_pivot) {
-      magma_sgeqrf_gpu(N, NRHS, reinterpret_cast<magmaFloat_ptr>(A.data()), LDA,
-                      IPIV.data(), reinterpret_cast<magmaFloat_ptr>(B.data()),
-                      LDB, &info);
-    } else {
       magma_sgeqrf_nopiv_gpu(N, NRHS, reinterpret_cast<magmaFloat_ptr>(A.data()),
                             LDA, reinterpret_cast<magmaFloat_ptr>(B.data()),
                             LDB, &info);
-    }
   }
 
   if constexpr (std::is_same_v<scalar_type, double>) {
-    if (with_pivot) {
-      magma_dgeqrf_gpu(N, NRHS, reinterpret_cast<magmaDouble_ptr>(A.data()), LDA,
-                      IPIV.data(), reinterpret_cast<magmaDouble_ptr>(B.data()),
-                      LDB, &info);
-    } else {
       magma_dgeqrf_nopiv_gpu(
           N, NRHS, reinterpret_cast<magmaDouble_ptr>(A.data()), LDA,
           reinterpret_cast<magmaDouble_ptr>(B.data()), LDB, &info);
-    }
   }
 
   if constexpr (std::is_same_v<scalar_type, Kokkos::complex<float>>) {
-    if (with_pivot) {
-      magma_cgeqrf_gpu(
-          N, NRHS, reinterpret_cast<magmaFloatComplex_ptr>(A.data()), LDA,
-          IPIV.data(), reinterpret_cast<magmaFloatComplex_ptr>(B.data()), LDB,
-          &info);
-    } else {
       magma_cgeqrf_nopiv_gpu(
           N, NRHS, reinterpret_cast<magmaFloatComplex_ptr>(A.data()), LDA,
           reinterpret_cast<magmaFloatComplex_ptr>(B.data()), LDB, &info);
-    }
   }
 
   if constexpr (std::is_same_v<scalar_type, Kokkos::complex<double>>) {
-    if (with_pivot) {
-      magma_zgeqrf_gpu(
-          N, NRHS, reinterpret_cast<magmaDoubleComplex_ptr>(A.data()), LDA,
-          IPIV.data(), reinterpret_cast<magmaDoubleComplex_ptr>(B.data()), LDB,
-          &info);
-    } else {
       magma_zgeqrf_nopiv_gpu(
           N, NRHS, reinterpret_cast<magmaDoubleComplex_ptr>(A.data()), LDA,
           reinterpret_cast<magmaDoubleComplex_ptr>(B.data()), LDB, &info);
-    }
   }
   ExecSpace().fence();
   Kokkos::Profiling::popRegion();
@@ -242,11 +193,7 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A,
       Kokkos::Cuda,                                                           \
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>, \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                  \
-      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>, \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                  \
-      Kokkos::View<magma_int_t*, LAYOUT,                                      \
-                   Kokkos::Device<Kokkos::DefaultHostExecutionSpace,          \
-                                  Kokkos::HostSpace>,                         \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>, \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                  \
       true,                                                                   \
       geqrf_eti_spec_avail<                                                    \
@@ -254,27 +201,19 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A,
           Kokkos::View<SCALAR**, LAYOUT,                                      \
                        Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,               \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
-          Kokkos::View<SCALAR**, LAYOUT,                                      \
+          Kokkos::View<SCALAR*, LAYOUT,                                      \
                        Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,               \
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
-          Kokkos::View<magma_int_t*, LAYOUT,                                  \
-                       Kokkos::Device<Kokkos::DefaultHostExecutionSpace,      \
-                                      Kokkos::HostSpace>,                     \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {    \
     using AViewType = Kokkos::View<SCALAR**, LAYOUT,                          \
                                    Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,   \
                                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>;  \
-    using BViewType = Kokkos::View<SCALAR**, LAYOUT,                          \
+    using TWViewType = Kokkos::View<SCALAR*, LAYOUT,                          \
                                    Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,   \
                                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>;  \
-    using PViewType = Kokkos::View<                                           \
-        magma_int_t*, LAYOUT,                                                 \
-        Kokkos::Device<Kokkos::DefaultHostExecutionSpace, Kokkos::HostSpace>, \
-        Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                             \
                                                                               \
     static void geqrf(const Kokkos::Cuda& space, const AViewType& A,           \
-                     const BViewType& B, const PViewType& IPIV) {             \
-      magmaGeqrfWrapper(space, A, B, IPIV);                                    \
+                     const TWViewType& Tau, const TWViewType& Work) {             \
+      magmaGeqrfWrapper(space, A, Tau, Work);                                    \
     }                                                                         \
   };
 
@@ -296,14 +235,13 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex<double>, Kokkos::LayoutLeft,
 namespace KokkosLapack {
 namespace Impl {
 
-template <class ExecutionSpace, class IPIVViewType, class AViewType,
-          class BViewType>
-void cusolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV,
-                         const AViewType& A, const BViewType& B) {
+template <class ExecutionSpace, class AViewType, class TWViewType>
+void cusolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
+                         const AViewType& A, const TWViewType& Tau) {
   using memory_space = typename AViewType::memory_space;
-  using Scalar       = typename BViewType::non_const_value_type;
+  using Scalar       = typename TWViewType::non_const_value_type;
   using ALayout_t    = typename AViewType::array_layout;
-  using BLayout_t    = typename BViewType::array_layout;
+  using BLayout_t    = typename TWViewType::array_layout;
 
   const int m   = A.extent_int(0);
   const int n   = A.extent_int(1);
@@ -388,9 +326,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV,
       Kokkos::Cuda,                                                            \
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,  \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
-      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,  \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
-      Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,      \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,  \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
       true,                                                                    \
       geqrf_eti_spec_avail<                                                     \
@@ -398,26 +334,21 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV,
           Kokkos::View<SCALAR**, LAYOUT,                                       \
                        Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-          Kokkos::View<SCALAR**, LAYOUT,                                       \
+          Kokkos::View<SCALAR*, LAYOUT,                                       \
                        Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-          Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,  \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
     using AViewType = Kokkos::View<SCALAR**, LAYOUT,                           \
                                    Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
                                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>;   \
-    using BViewType = Kokkos::View<SCALAR**, LAYOUT,                           \
+    using TWViewType = Kokkos::View<SCALAR*, LAYOUT,                           \
                                    Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
                                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>;   \
-    using PViewType =                                                          \
-        Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
-                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
                                                                                \
     static void geqrf(const Kokkos::Cuda& space, const AViewType& A,            \
-                     const BViewType& B, const PViewType& IPIV) {              \
+                     const TWViewType& Tau, const TWViewType& Work) {              \
       Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR \
                                     "]");                                      \
-      geqrf_print_specialization<AViewType, BViewType, PViewType>();            \
+      geqrf_print_specialization<AViewType, TWViewType>();            \
                                                                                \
       cusolverGeqrfWrapper(space, IPIV, A, B);                                  \
       Kokkos::Profiling::popRegion();                                          \
@@ -452,13 +383,12 @@ KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
 namespace KokkosLapack {
 namespace Impl {
 
-template <class ExecutionSpace, class IPIVViewType, class AViewType,
-          class BViewType>
-void rocsolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV,
-                          const AViewType& A, const BViewType& B) {
-  using Scalar    = typename BViewType::non_const_value_type;
+template <class ExecutionSpace, class AViewType, class TWViewType>
+void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
+                          const AViewType& A, const TWViewType& Tau) {
+  using Scalar    = typename TWViewType::non_const_value_type;
   using ALayout_t = typename AViewType::array_layout;
-  using BLayout_t = typename BViewType::array_layout;
+  using BLayout_t = typename TWViewType::array_layout;
 
   const rocblas_int N    = static_cast<rocblas_int>(A.extent(0));
   const rocblas_int nrhs = static_cast<rocblas_int>(B.extent(1));
@@ -505,10 +435,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV
       Kokkos::HIP,                                                             \
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,   \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
-      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,   \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
-      Kokkos::View<rocblas_int*, LAYOUT,                                       \
-                   Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                     \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,   \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
       true,                                                                    \
       geqrf_eti_spec_avail<                                                     \
@@ -516,27 +443,21 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV
           Kokkos::View<SCALAR**, LAYOUT,                                       \
                        Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-          Kokkos::View<SCALAR**, LAYOUT,                                       \
-                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-          Kokkos::View<rocblas_int*, LAYOUT,                                   \
+          Kokkos::View<SCALAR*, LAYOUT,                                       \
                        Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
     using AViewType =                                                          \
         Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-    using BViewType =                                                          \
-        Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \
+    using TWViewType =                                                          \
+        Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-    using PViewType = Kokkos::View<rocblas_int*, LAYOUT,                       \
-                                   Kokkos::Device<Kokkos::HIP, MEM_SPACE>,     \
-                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;   \
                                                                                \
     static void geqrf(const Kokkos::HIP& space, const AViewType& A,             \
-                     const BViewType& B, const PViewType& IPIV) {              \
+                     const TWViewType& Tau, const TWViewType& Work) {              \
       Kokkos::Profiling::pushRegion(                                           \
           "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]");                    \
-      geqrf_print_specialization<AViewType, BViewType, PViewType>();            \
+      geqrf_print_specialization<AViewType, TWViewType>();            \
                                                                                \
       rocsolverGeqrfWrapper(space, IPIV, A, B);                                 \
       Kokkos::Profiling::popRegion();                                          \
diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
index f9e93180b1..c246b86a29 100644
--- a/lapack/unit_test/Test_Lapack_geqrf.hpp
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -25,6 +25,8 @@
      (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \
       defined(TEST_THREADS_LAPACK_CPP)))
 
+// AquiEEP
+
 #include <gtest/gtest.h>
 #include <Kokkos_Core.hpp>
 #include <Kokkos_Random.hpp>

From 1caf1476f2941f59a8ea57e284b9dfdb0b70338f Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Sun, 19 May 2024 01:10:37 -0600
Subject: [PATCH 03/27] Backup

---
 lapack/impl/KokkosLapack_geqrf_spec.hpp       |  24 +-
 lapack/src/KokkosLapack_geqrf.hpp             |  29 ++-
 lapack/tpls/KokkosLapack_Host_tpl.cpp         |  47 ++++
 lapack/tpls/KokkosLapack_Host_tpl.hpp         |   2 +
 .../KokkosLapack_geqrf_tpl_spec_avail.hpp     |   8 +-
 .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 179 +++++++-------
 lapack/unit_test/Test_Lapack_geqrf.hpp        | 224 +-----------------
 7 files changed, 197 insertions(+), 316 deletions(-)

diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp
index b990e7550f..98d532489b 100644
--- a/lapack/impl/KokkosLapack_geqrf_spec.hpp
+++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp
@@ -28,7 +28,7 @@
 namespace KokkosLapack {
 namespace Impl {
 // Specialization struct which defines whether a specialization exists
-template <class ExecutionSpace, class AVT, class TWVT>
+template <class ExecutionSpace, class AVT, class TWVT, class RT>
 struct geqrf_eti_spec_avail {
   enum : bool { value = false };
 };
@@ -52,6 +52,8 @@ struct geqrf_eti_spec_avail {
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
       Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                            \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
+      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,           \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {            \
     enum : bool { value = true };                                         \
   };
@@ -64,26 +66,24 @@ namespace KokkosLapack {
 namespace Impl {
 
 // Unification layer
-/// \brief Implementation of KokkosLapack::geqrf.
-
-template <class ExecutionSpace, class AMatrix, class TWArray,
+template <class ExecutionSpace, class AMatrix, class TWArray, class RType,
           bool tpl_spec_avail =
-              geqrf_tpl_spec_avail<ExecutionSpace, AMatrix, TWArray>::value,
+              geqrf_tpl_spec_avail<ExecutionSpace, AMatrix, TWArray, RType>::value,
           bool eti_spec_avail =
-              geqrf_eti_spec_avail<ExecutionSpace, AMatrix, TWArray>::value>
+              geqrf_eti_spec_avail<ExecutionSpace, AMatrix, TWArray, RType>::value>
 struct GEQRF {
   static void geqrf(const ExecutionSpace &space, const AMatrix &A, const TWArray &Tau,
-                   const TWArray &Work);
+                    const TWArray &Work, const RType &R);
 };
 
 #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY
 //! Full specialization of geqrf for multi vectors.
 // Unification layer
-template <class ExecutionSpace, class AMatrix, class TWArray>
-struct GEQRF<ExecutionSpace, AMatrix, TWArray, false,
+template <class ExecutionSpace, class AMatrix, class TWArray, class RType>
+struct GEQRF<ExecutionSpace, AMatrix, TWArray, RType, false,
             KOKKOSKERNELS_IMPL_COMPILE_LIBRARY> {
   static void geqrf(const ExecutionSpace & /* space */, const AMatrix & /* A */,
-                   const TWArray & /* Tau */, const TWArray & /* Work */) {
+                    const TWArray & /* Tau */, const TWArray & /* Work */, const RType & /* R */) {
     // NOTE: Might add the implementation of KokkosLapack::geqrf later
     throw std::runtime_error(
         "No fallback implementation of GEQRF (general QR factorization) "
@@ -112,6 +112,8 @@ struct GEQRF<ExecutionSpace, AMatrix, TWArray, false,
       Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                           \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
+      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,          \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
       false, true>;
 
 #define KOKKOSLAPACK_GEQRF_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE,       \
@@ -124,6 +126,8 @@ struct GEQRF<ExecutionSpace, AMatrix, TWArray, false,
       Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                           \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
+      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,          \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
       false, true>;
 
 #include <KokkosLapack_geqrf_tpl_spec_decl.hpp>
diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp
index e5c59c996e..78bea3a4e7 100644
--- a/lapack/src/KokkosLapack_geqrf.hpp
+++ b/lapack/src/KokkosLapack_geqrf.hpp
@@ -44,16 +44,19 @@ namespace KokkosLapack {
 ///                   upper triangular if M >= N); the elements below the
 ///                   diagonal, with the array Tau, represent the unitary
 ///                   matrix Q as a product of min(M,N) elementary reflectors.
-/// \param Tau [out]  One-dimensional array of size min(M,N) that contain
+/// \param Tau [out]  One-dimensional array of size min(M,N) that contains
 ///                   the scalar factors of the elementary reflectors.
 /// \param Work [out] One-dimensional array of size max(1,LWORK).
 ///                   If min(M,N) == 0, then LWORK must be >= 1.
 ///                   If min(M,N) != 0, then LWORK must be >= N.
 ///                   If the QR factorization is successful, then the first
 ///                   position of Work contains the optimal LWORK.
+/// \return           = 0: successfull exit
+///                   < 0: if equal to '-i', the i-th argument had an illegal
+///                        value
 ///
 template <class ExecutionSpace, class AMatrix, class TWArray>
-void geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
+int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
           const TWArray& Work) {
   // NOTE: Currently, KokkosLapack::geqrf only supports LAPACK, MAGMA and
   // rocSOLVER TPLs.
@@ -115,7 +118,18 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
   AMatrix_Internal A_i    = A;
   TWArray_Internal Tau_i  = Tau;
   TWArray_Internal Work_i = Work;
-  KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, TWArray_Internal>::geqrf(space, A_i, Tau_i, Work_i);
+
+  // This is the return value type and should always reside on host
+  using RViewInternalType =
+      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
+
+  int result;
+  RViewInternalType R = RViewInternalType(&result);
+
+  KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, TWArray_Internal, RViewInternalType>::geqrf(space, A_i, Tau_i, Work_i, R);
+
+  return result;
 }
 
 /// \brief Computes a QR factorization of a matrix A
@@ -129,18 +143,21 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
 ///                   upper triangular if M >= N); the elements below the
 ///                   diagonal, with the array Tau, represent the unitary
 ///                   matrix Q as a product of min(M,N) elementary reflectors.
-/// \param Tau [out]  One-dimensional array of size min(M,N) that contain
+/// \param Tau [out]  One-dimensional array of size min(M,N) that contains
 ///                   the scalar factors of the elementary reflectors.
 /// \param Work [out] One-dimensional array of size max(1,LWORK).
 ///                   If min(M,N) == 0, then LWORK must be >= 1.
 ///                   If min(M,N) != 0, then LWORK must be >= N.
 ///                   If the QR factorization is successful, then the first
 ///                   position of Work contains the optimal LWORK.
+/// \return           = 0: successfull exit
+///                   < 0: if equal to '-i', the i-th argument had an illegal
+///                        value
 ///
 template <class AMatrix, class TWArray>
-void geqrf(const AMatrix& A, const TWArray& Tau, const TWArray& Work) {
+int geqrf(const AMatrix& A, const TWArray& Tau, const TWArray& Work) {
   typename AMatrix::execution_space space{};
-  geqrf(space, A, Tau, Work);
+  return geqrf(space, A, Tau, Work);
 }
 
 }  // namespace KokkosLapack
diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp
index add0a802bd..89085619e8 100644
--- a/lapack/tpls/KokkosLapack_Host_tpl.cpp
+++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp
@@ -82,6 +82,20 @@ void F77_BLAS_MANGLE(ctrtri, CTRTRI)(const char*, const char*, int*,
                                      const std::complex<float>*, int*, int*);
 void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*,
                                      const std::complex<double>*, int*, int*);
+
+///
+/// Geqrf
+///
+
+void F77_BLAS_MANGLE(sgeqrf, SGEQRF)(int*, int*, float*, int*, float*, float*, int*,
+                                   int*);
+void F77_BLAS_MANGLE(dgeqrf, DGEQRF)(int*, int*, double*, int*, double*, double*,
+                                   int*, int*);
+void F77_BLAS_MANGLE(cgeqrf, CGEQRF)(int*, int*, std::complex<float>*, int*, std::complex<float>*,
+                                   std::complex<float>*, int*, int*);
+void F77_BLAS_MANGLE(zgeqrf, ZGEQRF)(int*, int*, std::complex<double>*, int*,
+                                   std::complex<double>*, std::complex<double>*, int*, int*);
+
 }
 
 #define F77_FUNC_SGESV F77_BLAS_MANGLE(sgesv, SGESV)
@@ -99,6 +113,11 @@ void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*,
 #define F77_FUNC_CTRTRI F77_BLAS_MANGLE(ctrtri, CTRTRI)
 #define F77_FUNC_ZTRTRI F77_BLAS_MANGLE(ztrtri, ZTRTRI)
 
+#define F77_FUNC_SGEQRF F77_BLAS_MANGLE(sgeqrf, SGEQRF)
+#define F77_FUNC_DGEQRF F77_BLAS_MANGLE(dgeqrf, DGEQRF)
+#define F77_FUNC_CGEQRF F77_BLAS_MANGLE(cgeqrf, CGEQRF)
+#define F77_FUNC_ZGEQRF F77_BLAS_MANGLE(zgeqrf, ZGEQRF)
+
 namespace KokkosLapack {
 namespace Impl {
 
@@ -127,6 +146,13 @@ int HostLapack<float>::trtri(const char uplo, const char diag, int n,
   F77_FUNC_STRTRI(&uplo, &diag, &n, a, &lda, &info);
   return info;
 }
+template <>
+int HostLapack<float>::geqrf(int m, int n, float* a, int lda, float* tau,
+                             float* work, int lwork) {
+  int info = 0;
+  F77_FUNC_SGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info);
+  return info;
+}
 
 ///
 /// double
@@ -153,6 +179,13 @@ int HostLapack<double>::trtri(const char uplo, const char diag, int n,
   F77_FUNC_DTRTRI(&uplo, &diag, &n, a, &lda, &info);
   return info;
 }
+template <>
+int HostLapack<double>::geqrf(int m, int n, double* a, int lda, double* tau,
+                              double* work, int lwork) {
+  int info = 0;
+  F77_FUNC_DGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info);
+  return info;
+}
 
 ///
 /// std::complex<float>
@@ -182,6 +215,13 @@ int HostLapack<std::complex<float> >::trtri(const char uplo, const char diag,
   F77_FUNC_CTRTRI(&uplo, &diag, &n, a, &lda, &info);
   return info;
 }
+template <>
+int HostLapack<std::complex<float>>::geqrf(int m, int n, std::complex<float>* a, int lda, std::complex<float>* tau,
+                                           std::complex<float>* work, int lwork) {
+  int info = 0;
+  F77_FUNC_CGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info);
+  return info;
+}
 
 ///
 /// std::complex<double>
@@ -212,6 +252,13 @@ int HostLapack<std::complex<double> >::trtri(const char uplo, const char diag,
   F77_FUNC_ZTRTRI(&uplo, &diag, &n, a, &lda, &info);
   return info;
 }
+template <>
+int HostLapack<std::complex<double>>::geqrf(int m, int n, std::complex<double>* a, int lda, std::complex<double>* tau,
+                                            std::complex<double>* work, int lwork) {
+  int info = 0;
+  F77_FUNC_ZGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info);
+  return info;
+}
 
 }  // namespace Impl
 }  // namespace KokkosLapack
diff --git a/lapack/tpls/KokkosLapack_Host_tpl.hpp b/lapack/tpls/KokkosLapack_Host_tpl.hpp
index 9eca83afea..d651c9ca52 100644
--- a/lapack/tpls/KokkosLapack_Host_tpl.hpp
+++ b/lapack/tpls/KokkosLapack_Host_tpl.hpp
@@ -41,6 +41,8 @@ struct HostLapack {
 
   static int trtri(const char uplo, const char diag, int n, const T *a,
                    int lda);
+
+  static int geqrf(int m, int n, T *a, int lda, T *tau, T *work, int lwork);
 };
 }  // namespace Impl
 }  // namespace KokkosLapack
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
index d4f1ff107e..aaa465a814 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
@@ -20,7 +20,7 @@
 namespace KokkosLapack {
 namespace Impl {
 // Specialization struct which defines whether a specialization exists
-template <class ExecutionSpace, class AMatrix, class TWArray>
+template <class ExecutionSpace, class AMatrix, class TWArray, class RType>
 struct geqrf_tpl_spec_avail {
   enum : bool { value = false };
 };
@@ -33,9 +33,11 @@ struct geqrf_tpl_spec_avail {
   struct geqrf_tpl_spec_avail<                                             \
       ExecSpace,                                                           \
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>,  \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,              \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>,   \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged> > > {           \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,            \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {             \
     enum : bool { value = true };                                          \
   };
 
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index 9b6e1700a3..559e2854a6 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -21,16 +21,16 @@
 
 namespace KokkosLapack {
 namespace Impl {
-template <class AViewType, class TWViewType>
+template <class AViewType, class TWViewType, class RType>
 inline void geqrf_print_specialization() {
 #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA
-  printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s >\n",
-         typeid(AViewType).name(), typeid(TWViewType).name());
+  printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s, %s >\n",
+         typeid(AViewType).name(), typeid(TWViewType).name(), typeid(RType).name());
 #else
 #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK
-  printf("KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s >\n",
-         typeid(AViewType).name(), typeid(TWViewType).name());
+  printf("KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n",
+         typeid(AViewType).name(), typeid(TWViewType).name(), typeid(RType).name());
 #endif
 #endif
 #endif
@@ -45,52 +45,59 @@ inline void geqrf_print_specialization() {
 namespace KokkosLapack {
 namespace Impl {
 
-template <class AViewType, class TWViewType>
+template <class AViewType, class TWViewType, class RType>
 void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau,
-                       const TWViewType& Work) {
+                        const TWViewType& Work, const RType& R) {
   using Scalar = typename AViewType::non_const_value_type;
 
-  const int N    = static_cast<int>(A.extent(1));
-  const int AST  = static_cast<int>(A.stride(1));
-  const int LDA  = (AST == 0) ? 1 : AST;
-  const int BST  = static_cast<int>(B.stride(1));
-  const int LDB  = (BST == 0) ? 1 : BST;
-  const int NRHS = static_cast<int>(B.extent(1));
-
-  int info = 0;
-
-      HostLapack<Scalar>::geqrf(N, NRHS, A.data(), LDA, IPIV.data(), B.data(),
-                               LDB, info);
+  using ALayout_t = typename AViewType::array_layout;
+  static_assert(std::is_same_v<ALayout_t, Kokkos::LayoutLeft>,
+                "KokkosLapack - geqrf: A needs to have a Kokkos::LayoutLeft");
+  const int M     = A.extent_int(0);
+  const int N     = A.extent_int(1);
+  const int LDA   = A.stride(1);
+  const int LWORK = static_cast<int>(Work.extent(0));
+
+  R() = HostLapack<Scalar>::geqrf(M, N, A.data(), LDA, Tau.data(), Work.data(),
+                                  LWORK);
 }
 
-#define KOKKOSLAPACK_GEQRF_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE)         \
+#define KOKKOSLAPACK_GEQRF_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE)        \
   template <>                                                                  \
-  struct GEQRF<                                                                 \
+  struct GEQRF<                                                                \
       EXECSPACE,                                                               \
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,     \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
-      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,     \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,      \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,                \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
       true,                                                                    \
-      geqrf_eti_spec_avail<                                                     \
+      geqrf_eti_spec_avail<                                                    \
           EXECSPACE,                                                           \
           Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>, \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-          Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>, \
+          Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,  \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+          Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,            \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
     using AViewType =                                                          \
         Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,   \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-    using TWViewType =                                                          \
-        Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,   \
+    using TWViewType =                                                         \
+        Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,    \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+    using RType =                                                              \
+        Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,              \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
                                                                                \
-    static void geqrf(const EXECSPACE& /* space */, const AViewType& A,         \
-                     const TWViewType& Tau, const TWViewType& Work) {              \
-      Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR   \
+    static void geqrf(const EXECSPACE& /* space */, const AViewType& A,        \
+                      const TWViewType& Tau, const TWViewType& Work,           \
+                      const RType& R) {	                                       \
+      Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR  \
                                     "]");                                      \
-      geqrf_print_specialization<AViewType, TWViewType>();            \
-      lapackGeqrfWrapper(A, Tau, Work);                                           \
+      geqrf_print_specialization<AViewType, TWViewType, RType>();              \
+      lapackGeqrfWrapper(A, Tau, Work, R);                                     \
       Kokkos::Profiling::popRegion();                                          \
     }                                                                          \
   };
@@ -132,6 +139,8 @@ KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<double>, Kokkos::LayoutLeft,
 }  // namespace KokkosLapack
 #endif  // KOKKOSKERNELS_ENABLE_TPL_LAPACK
 
+#if 0 // AquiEEP
+
 // MAGMA
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA
 #include <KokkosLapack_magma.hpp>
@@ -146,7 +155,7 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A,
 
   Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_MAGMA," +
                                 Kokkos::ArithTraits<scalar_type>::name() + "]");
-  geqrf_print_specialization<AViewType, TWViewType>();
+  geqrf_print_specialization<AViewType, TWViewType, RType>();
 
   magma_int_t N    = static_cast<magma_int_t>(A.extent(1));
   magma_int_t AST  = static_cast<magma_int_t>(A.stride(1));
@@ -321,38 +330,38 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
 }
 
 #define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE)                  \
-  template <>                                                                  \
+  template <>                                                                   \
   struct GEQRF<                                                                 \
-      Kokkos::Cuda,                                                            \
-      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,  \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
-      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,  \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
-      true,                                                                    \
+      Kokkos::Cuda,                                                             \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,   \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                    \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                    \
+      true,                                                                     \
       geqrf_eti_spec_avail<                                                     \
-          Kokkos::Cuda,                                                        \
-          Kokkos::View<SCALAR**, LAYOUT,                                       \
-                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-          Kokkos::View<SCALAR*, LAYOUT,                                       \
-                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
-    using AViewType = Kokkos::View<SCALAR**, LAYOUT,                           \
-                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
-                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;   \
-    using TWViewType = Kokkos::View<SCALAR*, LAYOUT,                           \
-                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
-                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;   \
-                                                                               \
+          Kokkos::Cuda,                                                         \
+          Kokkos::View<SCALAR**, LAYOUT,                                        \
+                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                 \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                \
+          Kokkos::View<SCALAR*, LAYOUT,                                         \
+                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                 \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {      \
+    using AViewType = Kokkos::View<SCALAR**, LAYOUT,                            \
+                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,     \
+                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;    \
+    using TWViewType = Kokkos::View<SCALAR*, LAYOUT,                            \
+                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,     \
+                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;    \
+                                                                                \
     static void geqrf(const Kokkos::Cuda& space, const AViewType& A,            \
-                     const TWViewType& Tau, const TWViewType& Work) {              \
+                     const TWViewType& Tau, const TWViewType& Work) {           \
       Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR \
-                                    "]");                                      \
-      geqrf_print_specialization<AViewType, TWViewType>();            \
-                                                                               \
+                                    "]");                                       \
+      geqrf_print_specialization<AViewType, TWViewType, RType>();               \
+                                                                                \
       cusolverGeqrfWrapper(space, IPIV, A, B);                                  \
-      Kokkos::Profiling::popRegion();                                          \
-    }                                                                          \
+      Kokkos::Profiling::popRegion();                                           \
+    }                                                                           \
   };
 
 KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace)
@@ -430,38 +439,38 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
 }
 
 #define KOKKOSLAPACK_GEQRF_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE)                 \
-  template <>                                                                  \
+  template <>                                                                   \
   struct GEQRF<                                                                 \
-      Kokkos::HIP,                                                             \
-      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,   \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
-      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,   \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
-      true,                                                                    \
+      Kokkos::HIP,                                                              \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,    \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                    \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,     \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                    \
+      true,                                                                     \
       geqrf_eti_spec_avail<                                                     \
-          Kokkos::HIP,                                                         \
-          Kokkos::View<SCALAR**, LAYOUT,                                       \
-                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-          Kokkos::View<SCALAR*, LAYOUT,                                       \
-                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
-    using AViewType =                                                          \
-        Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \
-                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+          Kokkos::HIP,                                                          \
+          Kokkos::View<SCALAR**, LAYOUT,                                        \
+                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                  \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                \
+          Kokkos::View<SCALAR*, LAYOUT,                                         \
+                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                  \ 
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {      \
+    using AViewType =                                                           \
+        Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,  \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                  \
     using TWViewType =                                                          \
-        Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \
-                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-                                                                               \
+        Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,   \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                  \
+                                                                                \
     static void geqrf(const Kokkos::HIP& space, const AViewType& A,             \
-                     const TWViewType& Tau, const TWViewType& Work) {              \
-      Kokkos::Profiling::pushRegion(                                           \
+                     const TWViewType& Tau, const TWViewType& Work) {           \
+      Kokkos::Profiling::pushRegion(                                            \
           "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]");                    \
-      geqrf_print_specialization<AViewType, TWViewType>();            \
-                                                                               \
+      geqrf_print_specialization<AViewType, TWViewType, RType>();               \
+                                                                                \
       rocsolverGeqrfWrapper(space, IPIV, A, B);                                 \
-      Kokkos::Profiling::popRegion();                                          \
-    }                                                                          \
+      Kokkos::Profiling::popRegion();                                           \
+    }                                                                           \
   };
 
 KOKKOSLAPACK_GEQRF_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace)
@@ -475,4 +484,6 @@ KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
 }  // namespace KokkosLapack
 #endif  // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER
 
+#endif // AquiEEP
+
 #endif
diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
index c246b86a29..bb8183f32c 100644
--- a/lapack/unit_test/Test_Lapack_geqrf.hpp
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -38,8 +38,8 @@
 
 namespace Test {
 
-template <class ViewTypeA, class ViewTypeB, class Device, bool MAGMA>
-void impl_test_geqrf(const char* mode, const char* padding, int N) {
+template <class ViewTypeA, class ViewTypeTW, class Device, bool MAGMA>
+void impl_test_geqrf(int M, int N) {
   using execution_space = typename Device::execution_space;
   using ScalarA         = typename ViewTypeA::value_type;
   using ats             = Kokkos::ArithTraits<ScalarA>;
@@ -48,32 +48,24 @@ void impl_test_geqrf(const char* mode, const char* padding, int N) {
 
   Kokkos::Random_XorShift64_Pool<execution_space> rand_pool(13718);
 
-  int ldda, lddb;
-
-  if (padding[0] == 'Y') {  // rounded up to multiple of 32
-    ldda = ((N + 32 - 1) / 32) * 32;
-    lddb = ldda;
-  } else {
-    ldda = N;
-    lddb = N;
+  int lwork(1);
+  if (std::min(M,N) != 0) {
+    lwork = N;
   }
 
   // Create device views
-  ViewTypeA A("A", ldda, N);
-  ViewTypeB X0("X0", N);
-  ViewTypeB B("B", lddb);
+  ViewTypeA  A   ("A", M, N);
+  ViewTypeTW Tau ("Tau", std::min(M,N));
+  ViewTypeTW Work("Work", lddb);
 
   // Create host mirrors of device views.
-  typename ViewTypeB::HostMirror h_X0 = Kokkos::create_mirror_view(X0);
-  typename ViewTypeB::HostMirror h_B  = Kokkos::create_mirror(B);
+  typename ViewTypeTW::HostMirror h_X0 = Kokkos::create_mirror_view(X0);
+  typename ViewTypeTW::HostMirror h_B  = Kokkos::create_mirror(B);
 
   // Initialize data.
   Kokkos::fill_random(
       A, rand_pool,
       Kokkos::rand<Kokkos::Random_XorShift64<execution_space>, ScalarA>::max());
-  Kokkos::fill_random(
-      X0, rand_pool,
-      Kokkos::rand<Kokkos::Random_XorShift64<execution_space>, ScalarA>::max());
 
   // Generate RHS B = A*X0.
   ScalarA alpha = 1.0;
@@ -98,7 +90,7 @@ void impl_test_geqrf(const char* mode, const char* padding, int N) {
 
   // Solve.
   try {
-    KokkosLapack::geqrf(space, A, B, ipiv);
+    KokkosLapack::geqrf(space, A, Tau, Work);
   } catch (const std::runtime_error& error) {
     // Check for expected runtime errors due to:
     // no-pivoting case (note: only MAGMA supports no-pivoting interface)
@@ -148,118 +140,6 @@ void impl_test_geqrf(const char* mode, const char* padding, int N) {
   ASSERT_EQ(test_flag, true);
 }
 
-template <class ViewTypeA, class ViewTypeB, class Device, bool MAGMA>
-void impl_test_geqrf_mrhs(const char* mode, const char* padding, int N,
-                         int nrhs) {
-  using execution_space = typename Device::execution_space;
-  using ScalarA         = typename ViewTypeA::value_type;
-  using ats             = Kokkos::ArithTraits<ScalarA>;
-
-  execution_space space{};
-
-  Kokkos::Random_XorShift64_Pool<execution_space> rand_pool(13718);
-
-  int ldda, lddb;
-
-  if (padding[0] == 'Y') {  // rounded up to multiple of 32
-    ldda = ((N + 32 - 1) / 32) * 32;
-    lddb = ldda;
-  } else {
-    ldda = N;
-    lddb = N;
-  }
-
-  // Create device views
-  ViewTypeA A("A", ldda, N);
-  ViewTypeB X0("X0", N, nrhs);
-  ViewTypeB B("B", lddb, nrhs);
-
-  // Create host mirrors of device views.
-  typename ViewTypeB::HostMirror h_X0 = Kokkos::create_mirror_view(X0);
-  typename ViewTypeB::HostMirror h_B  = Kokkos::create_mirror(B);
-
-  // Initialize data.
-  Kokkos::fill_random(
-      A, rand_pool,
-      Kokkos::rand<Kokkos::Random_XorShift64<execution_space>, ScalarA>::max());
-  Kokkos::fill_random(
-      X0, rand_pool,
-      Kokkos::rand<Kokkos::Random_XorShift64<execution_space>, ScalarA>::max());
-
-  // Generate RHS B = A*X0.
-  ScalarA alpha = 1.0;
-  ScalarA beta  = 0.0;
-
-  KokkosBlas::gemm("N", "N", alpha, A, X0, beta, B);
-  Kokkos::fence();
-
-  // Deep copy device view to host view.
-  Kokkos::deep_copy(h_X0, X0);
-
-  // Allocate IPIV view on host
-  using ViewTypeP = typename std::conditional<
-      MAGMA, Kokkos::View<int*, Kokkos::LayoutLeft, Kokkos::HostSpace>,
-      Kokkos::View<int*, Kokkos::LayoutLeft, execution_space>>::type;
-  ViewTypeP ipiv;
-  int Nt = 0;
-  if (mode[0] == 'Y') {
-    Nt   = N;
-    ipiv = ViewTypeP("IPIV", Nt);
-  }
-
-  // Solve.
-  try {
-    KokkosLapack::geqrf(space, A, B, ipiv);
-  } catch (const std::runtime_error& error) {
-    // Check for expected runtime errors due to:
-    // no-pivoting case (note: only MAGMA supports no-pivoting interface)
-    // and no-tpl case
-    bool nopivot_runtime_err = false;
-    bool notpl_runtime_err   = false;
-#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA   // have MAGMA TPL
-#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK  // and have LAPACK TPL
-    nopivot_runtime_err = (!std::is_same<typename Device::memory_space,
-                                         Kokkos::CudaSpace>::value) &&
-                          (ipiv.extent(0) == 0) && (ipiv.data() == nullptr);
-    notpl_runtime_err = false;
-#else
-    notpl_runtime_err = true;
-#endif
-#else                                   // not have MAGMA TPL
-#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK  // but have LAPACK TPL
-    nopivot_runtime_err = (ipiv.extent(0) == 0) && (ipiv.data() == nullptr);
-    notpl_runtime_err   = false;
-#else
-    notpl_runtime_err = true;
-#endif
-#endif
-    if (!nopivot_runtime_err && !notpl_runtime_err) FAIL();
-    return;
-  }
-  Kokkos::fence();
-
-  // Get the solution vector.
-  Kokkos::deep_copy(h_B, B);
-
-  // Checking vs ref on CPU, this eps is about 10^-9
-  typedef typename ats::mag_type mag_type;
-  const mag_type eps = 1.0e7 * ats::epsilon();
-  bool test_flag     = true;
-  for (int j = 0; j < nrhs; j++) {
-    for (int i = 0; i < N; i++) {
-      if (ats::abs(h_B(i, j) - h_X0(i, j)) > eps) {
-        test_flag = false;
-        // printf( "    Error %d, pivot %c, padding %c: result( %.15lf ) !=
-        // solution( %.15lf ) at (%ld) at rhs %d\n", N, mode[0], padding[0],
-        // ats::abs(h_B(i,j)), ats::abs(h_X0(i,j)), i, j );
-        break;
-      }
-    }
-    if (test_flag == false) break;
-  }
-  ASSERT_EQ(test_flag, true);
-}
-
 }  // namespace Test
 
 template <class Scalar, class Device>
@@ -317,60 +197,6 @@ int test_geqrf(const char* mode) {
   return 1;
 }
 
-template <class Scalar, class Device>
-int test_geqrf_mrhs(const char* mode) {
-#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \
-    (!defined(KOKKOSKERNELS_ETI_ONLY) &&      \
-     !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
-  using view_type_a_ll = Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device>;
-  using view_type_b_ll = Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device>;
-
-#if (defined(TEST_CUDA_LAPACK_CPP) &&                                       \
-     defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) ||                         \
-    (defined(TEST_HIP_LAPACK_CPP) &&                                        \
-     defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) ||                        \
-    (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) &&                            \
-     (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \
-      defined(TEST_THREADS_LAPACK_CPP)))
-  Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, false>(
-      &mode[0], "N", 2, 5);  // no padding
-  Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, false>(
-      &mode[0], "N", 13, 5);  // no padding
-  Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, false>(
-      &mode[0], "N", 179, 5);  // no padding
-  Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, false>(
-      &mode[0], "N", 64, 5);  // no padding
-  Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, false>(
-      &mode[0], "N", 1024, 5);  // no padding
-
-// When appropriate run MAGMA specific tests
-#elif defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && defined(KOKKOS_ENABLE_CUDA)
-  if constexpr (std::is_same_v<Kokkos::Cuda,
-                               typename Device::execution_space>) {
-    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "N", 2, 5);  // no padding
-    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "N", 13, 5);  // no padding
-    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "N", 179, 5);  // no padding
-    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "N", 64, 5);  // no padding
-    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "N", 1024, 5);  // no padding
-
-    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "Y", 13, 5);  // padding
-    Test::impl_test_geqrf_mrhs<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "Y", 179, 5);  // padding
-  }
-#endif
-#endif
-
-  // Supress unused parameters on CUDA10
-  (void)mode;
-  return 1;
-}
-
 #if defined(KOKKOSKERNELS_INST_FLOAT) || \
     (!defined(KOKKOSKERNELS_ETI_ONLY) && \
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
@@ -380,13 +206,6 @@ TEST_F(TestCategory, geqrf_float) {
   test_geqrf<float, TestDevice>("Y");  // Partial pivoting
   Kokkos::Profiling::popRegion();
 }
-
-TEST_F(TestCategory, geqrf_mrhs_float) {
-  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_float");
-  test_geqrf_mrhs<float, TestDevice>("N");  // No pivoting
-  test_geqrf_mrhs<float, TestDevice>("Y");  // Partial pivoting
-  Kokkos::Profiling::popRegion();
-}
 #endif
 
 #if defined(KOKKOSKERNELS_INST_DOUBLE) || \
@@ -398,13 +217,6 @@ TEST_F(TestCategory, geqrf_double) {
   test_geqrf<double, TestDevice>("Y");  // Partial pivoting
   Kokkos::Profiling::popRegion();
 }
-
-TEST_F(TestCategory, geqrf_mrhs_double) {
-  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_double");
-  test_geqrf_mrhs<double, TestDevice>("N");  // No pivoting
-  test_geqrf_mrhs<double, TestDevice>("Y");  // Partial pivoting
-  Kokkos::Profiling::popRegion();
-}
 #endif
 
 #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \
@@ -416,13 +228,6 @@ TEST_F(TestCategory, geqrf_complex_double) {
   test_geqrf<Kokkos::complex<double>, TestDevice>("Y");  // Partial pivoting
   Kokkos::Profiling::popRegion();
 }
-
-TEST_F(TestCategory, geqrf_mrhs_complex_double) {
-  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_complex_double");
-  test_geqrf_mrhs<Kokkos::complex<double>, TestDevice>("N");  // No pivoting
-  test_geqrf_mrhs<Kokkos::complex<double>, TestDevice>("Y");  // Partial pivoting
-  Kokkos::Profiling::popRegion();
-}
 #endif
 
 #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \
@@ -434,13 +239,6 @@ TEST_F(TestCategory, geqrf_complex_float) {
   test_geqrf<Kokkos::complex<float>, TestDevice>("Y");  // Partial pivoting
   Kokkos::Profiling::popRegion();
 }
-
-TEST_F(TestCategory, geqrf_mrhs_complex_float) {
-  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_complex_float");
-  test_geqrf_mrhs<Kokkos::complex<float>, TestDevice>("N");  // No pivoting
-  test_geqrf_mrhs<Kokkos::complex<float>, TestDevice>("Y");  // Partial pivoting
-  Kokkos::Profiling::popRegion();
-}
 #endif
 
 #endif  // CUDA+(MAGMA or CUSOLVER) or HIP+ROCSOLVER or LAPACK+HOST

From a8fc00d279992783bb07e6b92b072ec9be9070f7 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Sun, 19 May 2024 04:35:54 -0600
Subject: [PATCH 04/27] Backup

---
 lapack/src/KokkosLapack_geqrf.hpp             |  18 +-
 .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp |  14 +-
 lapack/unit_test/Test_Lapack.hpp              |   1 +
 lapack/unit_test/Test_Lapack_geqrf.hpp        | 155 +++++-------------
 4 files changed, 64 insertions(+), 124 deletions(-)

diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp
index 78bea3a4e7..8e422aaa11 100644
--- a/lapack/src/KokkosLapack_geqrf.hpp
+++ b/lapack/src/KokkosLapack_geqrf.hpp
@@ -80,29 +80,31 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
   static_assert(static_cast<int>(TWArray::rank) == 1,
                 "KokkosLapack::geqrf: Tau and Work must have rank 1.");
 
-  int64_t m = A.extent(0);
-  int64_t n = A.extent(1);
+  int64_t m     = A.extent(0);
+  int64_t n     = A.extent(1);
+  int64_t tau0  = Tau.extent(0);
+  int64_t work0 = Work.extent(0);
 
   // Check validity of dimensions
-  if (Tau.extent(0) != std::min(m,n)) {
+  if (tau0 != std::min(m,n)) {
     std::ostringstream os;
     os << "KokkosLapack::geqrf: length of Tau must be equal to min(m,n): "
-       << " A: " << m << " x " << n << ", Tau length = " << Tau.extent(0);
+       << " A: " << m << " x " << n << ", Tau length = " << tau0;
     KokkosKernels::Impl::throw_runtime_exception(os.str());
   }
   if ((m == 0) || (n == 0)) {
-    if (Work.extent(0) < 1) {
+    if (work0 < 1) {
       std::ostringstream os;
       os << "KokkosLapack::geqrf: In case min(m,n) == 0, then Work must have length >= 1: "
-         << " A: " << m << " x " << n << ", Work length = " << Work.extent(0);
+         << " A: " << m << " x " << n << ", Work length = " << work0;
       KokkosKernels::Impl::throw_runtime_exception(os.str());
     }
   }
   else {
-    if (Work.extent(0) < n) {
+    if (work0 < n) {
       std::ostringstream os;
       os << "KokkosLapack::geqrf: In case min(m,n) != 0, then Work must have length >= n: "
-         << " A: " << m << " x " << n << ", Work length = " << Work.extent(0);
+         << " A: " << m << " x " << n << ", Work length = " << work0;
       KokkosKernels::Impl::throw_runtime_exception(os.str());
     }
   }
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index 559e2854a6..69cd6fb4f3 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -58,8 +58,18 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau,
   const int LDA   = A.stride(1);
   const int LWORK = static_cast<int>(Work.extent(0));
 
-  R() = HostLapack<Scalar>::geqrf(M, N, A.data(), LDA, Tau.data(), Work.data(),
-                                  LWORK);
+  if constexpr (Kokkos::ArithTraits<Scalar>::is_complex) {
+    using MagType = typename Kokkos::ArithTraits<Scalar>::mag_type;
+
+    R() = HostLapack<std::complex<MagType>>::geqrf(M, N,
+             reinterpret_cast<std::complex<MagType>*>(A.data()), LDA,
+             reinterpret_cast<std::complex<MagType>*>(Tau.data()),
+             reinterpret_cast<std::complex<MagType>*>(Work.data()), LWORK);
+  }
+  else {
+    R() = HostLapack<Scalar>::geqrf(M, N, A.data(), LDA, Tau.data(), Work.data(),
+                                    LWORK);
+  }
 }
 
 #define KOKKOSLAPACK_GEQRF_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE)        \
diff --git a/lapack/unit_test/Test_Lapack.hpp b/lapack/unit_test/Test_Lapack.hpp
index 1a717521f8..2bcecaceae 100644
--- a/lapack/unit_test/Test_Lapack.hpp
+++ b/lapack/unit_test/Test_Lapack.hpp
@@ -19,5 +19,6 @@
 #include "Test_Lapack_gesv.hpp"
 #include "Test_Lapack_trtri.hpp"
 #include "Test_Lapack_svd.hpp"
+#include "Test_Lapack_geqrf.hpp"
 
 #endif  // TEST_LAPACK_HPP
diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
index bb8183f32c..b4e8dc9b9d 100644
--- a/lapack/unit_test/Test_Lapack_geqrf.hpp
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -14,11 +14,10 @@
 //
 //@HEADER
 
-// only enable this test where KokkosLapack supports geqrf:
-// CUDA+(MAGMA or CUSOLVER), HIP+ROCSOLVER and HOST+LAPACK
+// Only enable this test where KokkosLapack supports geqrf:
+// CUDA+CUSOLVER, HIP+ROCSOLVER and HOST+LAPACK
 #if (defined(TEST_CUDA_LAPACK_CPP) &&                                       \
-     (defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) ||                            \
-      defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER))) ||                       \
+      defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) ||                        \
     (defined(TEST_HIP_LAPACK_CPP) &&                                        \
      defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) ||                        \
     (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) &&                            \
@@ -38,95 +37,69 @@
 
 namespace Test {
 
-template <class ViewTypeA, class ViewTypeTW, class Device, bool MAGMA>
-void impl_test_geqrf(int M, int N) {
+template <class ViewTypeA, class ViewTypeTW, class Device>
+void impl_test_geqrf(int m, int n) {
   using execution_space = typename Device::execution_space;
   using ScalarA         = typename ViewTypeA::value_type;
-  using ats             = Kokkos::ArithTraits<ScalarA>;
+  //using ats             = Kokkos::ArithTraits<ScalarA>;
 
   execution_space space{};
 
   Kokkos::Random_XorShift64_Pool<execution_space> rand_pool(13718);
 
   int lwork(1);
-  if (std::min(M,N) != 0) {
-    lwork = N;
+  if (std::min(m,n) != 0) {
+    lwork = n;
   }
 
   // Create device views
-  ViewTypeA  A   ("A", M, N);
-  ViewTypeTW Tau ("Tau", std::min(M,N));
-  ViewTypeTW Work("Work", lddb);
+  ViewTypeA  A   ("A", m, n);
+  ViewTypeTW Tau ("Tau", std::min(m,n));
+  ViewTypeTW Work("Work", lwork);
 
   // Create host mirrors of device views.
-  typename ViewTypeTW::HostMirror h_X0 = Kokkos::create_mirror_view(X0);
-  typename ViewTypeTW::HostMirror h_B  = Kokkos::create_mirror(B);
+  typename ViewTypeTW::HostMirror h_tau  = Kokkos::create_mirror_view(Tau);
+  typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror(Work);
 
   // Initialize data.
-  Kokkos::fill_random(
-      A, rand_pool,
-      Kokkos::rand<Kokkos::Random_XorShift64<execution_space>, ScalarA>::max());
-
-  // Generate RHS B = A*X0.
-  ScalarA alpha = 1.0;
-  ScalarA beta  = 0.0;
+  if ((m == 3) && (n == 3)) {
+  }
+  else {
+    Kokkos::fill_random( A
+                       , rand_pool
+                       , Kokkos::rand<Kokkos::Random_XorShift64<execution_space>, ScalarA>::max()
+                       );
+  }
 
-  KokkosBlas::gemv("N", alpha, A, X0, beta, B);
   Kokkos::fence();
 
   // Deep copy device view to host view.
-  Kokkos::deep_copy(h_X0, X0);
+  //Kokkos::deep_copy(h_X0, X0);
 
   // Allocate IPIV view on host
-  using ViewTypeP = typename std::conditional<
-      MAGMA, Kokkos::View<int*, Kokkos::LayoutLeft, Kokkos::HostSpace>,
-      Kokkos::View<int*, Kokkos::LayoutLeft, execution_space>>::type;
+  using ViewTypeP = Kokkos::View<int*, Kokkos::LayoutLeft, execution_space>;
   ViewTypeP ipiv;
-  int Nt = 0;
-  if (mode[0] == 'Y') {
-    Nt   = N;
-    ipiv = ViewTypeP("IPIV", Nt);
-  }
+  int Nt = n;
+  ipiv = ViewTypeP("IPIV", Nt);
 
   // Solve.
   try {
     KokkosLapack::geqrf(space, A, Tau, Work);
-  } catch (const std::runtime_error& error) {
-    // Check for expected runtime errors due to:
-    // no-pivoting case (note: only MAGMA supports no-pivoting interface)
-    // and no-tpl case
-    bool nopivot_runtime_err = false;
-    bool notpl_runtime_err   = false;
-#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA   // have MAGMA TPL
-#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK  // and have LAPACK TPL
-    nopivot_runtime_err = (!std::is_same<typename Device::memory_space,
-                                         Kokkos::CudaSpace>::value) &&
-                          (ipiv.extent(0) == 0) && (ipiv.data() == nullptr);
-    notpl_runtime_err = false;
-#else
-    notpl_runtime_err = true;
-#endif
-#else                                   // not have MAGMA TPL
-#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK  // but have LAPACK TPL
-    nopivot_runtime_err = (ipiv.extent(0) == 0) && (ipiv.data() == nullptr);
-    notpl_runtime_err   = false;
-#else
-    notpl_runtime_err = true;
-#endif
-#endif
-    if (!nopivot_runtime_err && !notpl_runtime_err) FAIL();
+  }
+  catch (const std::runtime_error& error) {
     return;
   }
   Kokkos::fence();
 
   // Get the solution vector.
-  Kokkos::deep_copy(h_B, B);
+  //Kokkos::deep_copy(h_B, B);
 
   // Checking vs ref on CPU, this eps is about 10^-9
-  typedef typename ats::mag_type mag_type;
-  const mag_type eps = 3.0e7 * ats::epsilon();
+  //typedef typename ats::mag_type mag_type;
+  //const mag_type eps = 3.0e7 * ats::epsilon();
   bool test_flag     = true;
-  for (int i = 0; i < N; i++) {
+  for (int i = 0; i < n; i++) {
+#if 0
     if (ats::abs(h_B(i) - h_X0(i)) > eps) {
       test_flag = false;
       printf(
@@ -136,6 +109,7 @@ void impl_test_geqrf(int M, int N) {
           ats::abs(h_B(i) - h_X0(i)), eps);
       break;
     }
+#endif
   }
   ASSERT_EQ(test_flag, true);
 }
@@ -143,58 +117,15 @@ void impl_test_geqrf(int M, int N) {
 }  // namespace Test
 
 template <class Scalar, class Device>
-int test_geqrf(const char* mode) {
+void test_geqrf() {
 #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \
     (!defined(KOKKOSKERNELS_ETI_ONLY) &&      \
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
   using view_type_a_ll = Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device>;
-  using view_type_b_ll = Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device>;
+  using view_type_tw_ll = Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device>;
 
-#if (defined(TEST_CUDA_LAPACK_CPP) &&                                       \
-     defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) ||                         \
-    (defined(TEST_HIP_LAPACK_CPP) &&                                        \
-     defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) ||                        \
-    (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) &&                            \
-     (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \
-      defined(TEST_THREADS_LAPACK_CPP)))
-  Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, false>(
-      &mode[0], "N", 2);  // no padding
-  Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, false>(
-      &mode[0], "N", 13);  // no padding
-  Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, false>(
-      &mode[0], "N", 179);  // no padding
-  Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, false>(
-      &mode[0], "N", 64);  // no padding
-  Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, false>(
-      &mode[0], "N", 1024);  // no padding
-
-#elif defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && defined(KOKKOS_ENABLE_CUDA)
-  if constexpr (std::is_same_v<Kokkos::Cuda,
-                               typename Device::execution_space>) {
-    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "N", 2);  // no padding
-    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "N", 13);  // no padding
-    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "N", 179);  // no padding
-    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "N", 64);  // no padding
-    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "N", 1024);  // no padding
-
-    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "Y",
-        13);  // padding
-    Test::impl_test_geqrf<view_type_a_ll, view_type_b_ll, Device, true>(
-        &mode[0], "Y",
-        179);  // padding
-  }
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tw_ll, Device>(3, 3);
 #endif
-#endif
-
-  // Supress unused parameters on CUDA10
-  (void)mode;
-  return 1;
 }
 
 #if defined(KOKKOSKERNELS_INST_FLOAT) || \
@@ -202,8 +133,7 @@ int test_geqrf(const char* mode) {
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
 TEST_F(TestCategory, geqrf_float) {
   Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_float");
-  test_geqrf<float, TestDevice>("N");  // No pivoting
-  test_geqrf<float, TestDevice>("Y");  // Partial pivoting
+  test_geqrf<float, TestDevice>();
   Kokkos::Profiling::popRegion();
 }
 #endif
@@ -213,8 +143,7 @@ TEST_F(TestCategory, geqrf_float) {
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
 TEST_F(TestCategory, geqrf_double) {
   Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_double");
-  test_geqrf<double, TestDevice>("N");  // No pivoting
-  test_geqrf<double, TestDevice>("Y");  // Partial pivoting
+  test_geqrf<double, TestDevice>();
   Kokkos::Profiling::popRegion();
 }
 #endif
@@ -224,8 +153,7 @@ TEST_F(TestCategory, geqrf_double) {
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
 TEST_F(TestCategory, geqrf_complex_double) {
   Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_double");
-  test_geqrf<Kokkos::complex<double>, TestDevice>("N");  // No pivoting
-  test_geqrf<Kokkos::complex<double>, TestDevice>("Y");  // Partial pivoting
+  test_geqrf<Kokkos::complex<double>, TestDevice>();
   Kokkos::Profiling::popRegion();
 }
 #endif
@@ -235,10 +163,9 @@ TEST_F(TestCategory, geqrf_complex_double) {
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
 TEST_F(TestCategory, geqrf_complex_float) {
   Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_float");
-  test_geqrf<Kokkos::complex<float>, TestDevice>("N");  // No pivoting
-  test_geqrf<Kokkos::complex<float>, TestDevice>("Y");  // Partial pivoting
+  test_geqrf<Kokkos::complex<float>, TestDevice>();
   Kokkos::Profiling::popRegion();
 }
 #endif
 
-#endif  // CUDA+(MAGMA or CUSOLVER) or HIP+ROCSOLVER or LAPACK+HOST
+#endif  // CUDA+CUSOLVER or HIP+ROCSOLVER or LAPACK+HOST

From b75c07e82c7151fd69e88bf2b77dbc2bf14c5a48 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Sun, 19 May 2024 21:21:56 -0600
Subject: [PATCH 05/27] Backup

---
 lapack/src/KokkosLapack_geqrf.hpp      |  15 +++-
 lapack/unit_test/Test_Lapack_geqrf.hpp | 108 ++++++++++++++++++++-----
 2 files changed, 101 insertions(+), 22 deletions(-)

diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp
index 8e422aaa11..334dbef682 100644
--- a/lapack/src/KokkosLapack_geqrf.hpp
+++ b/lapack/src/KokkosLapack_geqrf.hpp
@@ -40,10 +40,17 @@ namespace KokkosLapack {
 ///                   the geqrf kernels.
 /// \param A [in,out] On entry, the M-by-N matrix to be factorized.
 ///                   On exit, the elements on and above the diagonal contain
-///                   the min(M,N)-by-N upper trapezoidal matrix R (R is
-///                   upper triangular if M >= N); the elements below the
-///                   diagonal, with the array Tau, represent the unitary
-///                   matrix Q as a product of min(M,N) elementary reflectors.
+///                   the min(M,N)-by-N upper trapezoidal matrix R (R is upper
+///                   triangular if M >= N); the elements below the diagonal,
+///                   with the array Tau, represent the unitary matrix Q as a
+///                   product of min(M,N) elementary reflectors. The matrix Q
+///                   is represented as a product of elementary reflectors
+///                     Q = H(1) H(2) . . . H(k), where k = min(M,N).
+///                   Each H(i) has the form
+///                     H(i) = I - Tau * v * v**H
+///                   where tau is a complex scalar, and v is a complex vector
+///                   with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on
+///                   exit in A(i+1:M,i), and tau in Tau(i).
 /// \param Tau [out]  One-dimensional array of size min(M,N) that contains
 ///                   the scalar factors of the elementary reflectors.
 /// \param Work [out] One-dimensional array of size max(1,LWORK).
diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
index b4e8dc9b9d..0ec9388dd4 100644
--- a/lapack/unit_test/Test_Lapack_geqrf.hpp
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -47,52 +47,124 @@ void impl_test_geqrf(int m, int n) {
 
   Kokkos::Random_XorShift64_Pool<execution_space> rand_pool(13718);
 
-  int lwork(1);
-  if (std::min(m,n) != 0) {
+  int minMN( std::min(m,n) );
+  int lwork( 1 );
+  if (minMN != 0) {
     lwork = n;
   }
 
   // Create device views
   ViewTypeA  A   ("A", m, n);
-  ViewTypeTW Tau ("Tau", std::min(m,n));
+  ViewTypeTW Tau ("Tau", minMN);
   ViewTypeTW Work("Work", lwork);
 
   // Create host mirrors of device views.
+  typename ViewTypeA::HostMirror  h_A    = Kokkos::create_mirror_view(A);
   typename ViewTypeTW::HostMirror h_tau  = Kokkos::create_mirror_view(Tau);
-  typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror(Work);
+  typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror_view(Work);
 
   // Initialize data.
   if ((m == 3) && (n == 3)) {
+    if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
+      h_A(0, 0).real() = 12.;
+      h_A(0, 1).real() = -51.;
+      h_A(0, 2).real() = 4.;
+
+      h_A(1, 0).real() = 6.;
+      h_A(1, 1).real() = 167.;
+      h_A(1, 2).real() = -68.;
+
+      h_A(2, 0).real() = -4.;
+      h_A(2, 1).real() = 24.;
+      h_A(2, 2).real() = -41.;
+
+      for (int i(0); i < m; ++i) {
+        for (int j(0); j < n; ++j) {
+          h_A(i, j).imag() = 0.;
+	}
+      }
+    }
+    else {
+      h_A(0, 0) = 12.;
+      h_A(0, 1) = -51.;
+      h_A(0, 2) = 4.;
+
+      h_A(1, 0) = 6.;
+      h_A(1, 1) = 167.;
+      h_A(1, 2) = -68.;
+
+      h_A(2, 0) = -4.;
+      h_A(2, 1) = 24.;
+      h_A(2, 2) = -41.;
+    }
+
+    Kokkos::deep_copy(A, h_A);
   }
   else {
     Kokkos::fill_random( A
                        , rand_pool
                        , Kokkos::rand<Kokkos::Random_XorShift64<execution_space>, ScalarA>::max()
                        );
+    Kokkos::deep_copy(h_A, A);
   }
 
-  Kokkos::fence();
-
-  // Deep copy device view to host view.
-  //Kokkos::deep_copy(h_X0, X0);
+  for (int i(0); i < m; ++i) {
+    for (int j(0); j < n; ++j) {
+      std::cout << "A(" << i << "," << j << ") = " << h_A(i,j) << std::endl;
+    }
+  }
 
-  // Allocate IPIV view on host
-  using ViewTypeP = Kokkos::View<int*, Kokkos::LayoutLeft, execution_space>;
-  ViewTypeP ipiv;
-  int Nt = n;
-  ipiv = ViewTypeP("IPIV", Nt);
+  Kokkos::fence();
 
-  // Solve.
+  // Perform the QR factorization
+  int rc(0);
   try {
-    KokkosLapack::geqrf(space, A, Tau, Work);
+    rc = KokkosLapack::geqrf(space, A, Tau, Work);
   }
-  catch (const std::runtime_error& error) {
+  catch (const std::runtime_error & e) {
+    std::cout << "KokkosLapack::geqrf(): caught exception '" << e.what() << "'" << std::endl;
+    FAIL();
     return;
   }
   Kokkos::fence();
 
-  // Get the solution vector.
-  //Kokkos::deep_copy(h_B, B);
+  // Get the results
+  Kokkos::deep_copy(h_A, A);
+  Kokkos::deep_copy(h_tau, Tau);
+  Kokkos::deep_copy(h_work, Work);
+
+  std::cout << "rc = " << rc << std::endl;
+  for (int i(0); i < minMN; ++i) {
+    for (int j(0); j < n; ++j) {
+      std::cout << "R(" << i << "," << j << ") = " << h_A(i,j) << std::endl;
+    }
+  }
+  for (int i(0); i < minMN; ++i) {
+    std::cout << "tau(" << i << ") = " << h_tau[i] << std::endl;
+  }
+  for (int i(0); i < lwork; ++i) {
+    std::cout << "work(" << i << ") = " << h_work[i] << std::endl;
+  }
+
+  // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B).
+  // void gemm( const execution_space                & space
+  //          , const char                             transA[]
+  //          , const char                             transB[]
+  //          , typename AViewType::const_value_type & alpha
+  //          , const AViewType                      & A
+  //          , const BViewType                      & B
+  //          , typename CViewType::const_value_type & beta
+  //          , const CViewType                      & C
+  //          );
+
+  // Rank-1 update of a general matrix: A = A + alpha * x * y^{T,H}.
+  // void ger( const ExecutionSpace                       & space
+  //         , const char                                   trans[]
+  //         , const typename AViewType::const_value_type & alpha
+  //         , const XViewType                            & x
+  //         , const YViewType                            & y
+  //         , const AViewType                            & A
+  //         );
 
   // Checking vs ref on CPU, this eps is about 10^-9
   //typedef typename ats::mag_type mag_type;

From f9c0c8ef1208e327850a06c73363f712c397ee14 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Sun, 19 May 2024 22:50:09 -0600
Subject: [PATCH 06/27] Backup

---
 lapack/src/KokkosLapack_geqrf.hpp      |  23 ++--
 lapack/unit_test/Test_Lapack_geqrf.hpp | 139 ++++++++++++++++++++-----
 2 files changed, 131 insertions(+), 31 deletions(-)

diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp
index 334dbef682..15a5522f37 100644
--- a/lapack/src/KokkosLapack_geqrf.hpp
+++ b/lapack/src/KokkosLapack_geqrf.hpp
@@ -32,9 +32,9 @@ namespace KokkosLapack {
 
 /// \brief Computes a QR factorization of a matrix A
 ///
-/// \tparam ExecutionSpace the space where the kernel will run.
-/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View.
-/// \tparam TWArray Type of arrays Tau and Work, as a 1-D Kokkos::View.
+/// \tparam ExecutionSpace The space where the kernel will run.
+/// \tparam AMatrix        Type of matrix A, as a 2-D Kokkos::View.
+/// \tparam TWArray        Type of arrays Tau and Work, as a 1-D Kokkos::View.
 ///
 /// \param space [in] Execution space instance used to specified how to execute
 ///                   the geqrf kernels.
@@ -58,6 +58,7 @@ namespace KokkosLapack {
 ///                   If min(M,N) != 0, then LWORK must be >= N.
 ///                   If the QR factorization is successful, then the first
 ///                   position of Work contains the optimal LWORK.
+///
 /// \return           = 0: successfull exit
 ///                   < 0: if equal to '-i', the i-th argument had an illegal
 ///                        value
@@ -148,10 +149,17 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
 ///
 /// \param A [in,out] On entry, the M-by-N matrix to be factorized.
 ///                   On exit, the elements on and above the diagonal contain
-///                   the min(M,N)-by-N upper trapezoidal matrix R (R is
-///                   upper triangular if M >= N); the elements below the
-///                   diagonal, with the array Tau, represent the unitary
-///                   matrix Q as a product of min(M,N) elementary reflectors.
+///                   the min(M,N)-by-N upper trapezoidal matrix R (R is upper
+///                   triangular if M >= N); the elements below the diagonal,
+///                   with the array Tau, represent the unitary matrix Q as a
+///                   product of min(M,N) elementary reflectors. The matrix Q
+///                   is represented as a product of elementary reflectors
+///                     Q = H(1) H(2) . . . H(k), where k = min(M,N).
+///                   Each H(i) has the form
+///                     H(i) = I - Tau * v * v**H
+///                   where tau is a complex scalar, and v is a complex vector
+///                   with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on
+///                   exit in A(i+1:M,i), and tau in Tau(i).
 /// \param Tau [out]  One-dimensional array of size min(M,N) that contains
 ///                   the scalar factors of the elementary reflectors.
 /// \param Work [out] One-dimensional array of size max(1,LWORK).
@@ -159,6 +167,7 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
 ///                   If min(M,N) != 0, then LWORK must be >= N.
 ///                   If the QR factorization is successful, then the first
 ///                   position of Work contains the optimal LWORK.
+///
 /// \return           = 0: successfull exit
 ///                   < 0: if equal to '-i', the i-th argument had an illegal
 ///                        value
diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
index 0ec9388dd4..dfcc7566e9 100644
--- a/lapack/unit_test/Test_Lapack_geqrf.hpp
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -31,12 +31,62 @@
 #include <Kokkos_Random.hpp>
 
 #include <KokkosLapack_geqrf.hpp>
-#include <KokkosBlas2_gemv.hpp>
-#include <KokkosBlas3_gemm.hpp>
+//#include <KokkosBlas2_gemv.hpp>
+//#include <KokkosBlas3_gemm.hpp>
 #include <KokkosKernels_TestUtils.hpp>
 
 namespace Test {
 
+template <class ViewTypeA, class ViewTypeTW>
+void getQR( int                             const   m
+          , int                             const   n
+          , typename ViewTypeA::HostMirror  const & //h_A
+          , typename ViewTypeTW::HostMirror const & //h_tau
+          , typename ViewTypeTW::HostMirror const & //h_work
+          , typename ViewTypeA::HostMirror        & //h_Q
+          , typename ViewTypeA::HostMirror        & h_R
+          , typename ViewTypeA::HostMirror        & //h_QR
+          )
+{
+  using ScalarA = typename ViewTypeA::value_type;
+
+  for (int i(0); i < m; ++i) {
+    for (int j(0); j < n; ++j) {
+      if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
+        h_R(i,j).real() = 0.;
+        h_R(i,j).imag() = 0.;
+      }
+      else {
+        h_R(i,j) = 0.;
+      }
+    }
+  }
+
+  ViewTypeA I("I", m, m);
+  typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I);
+  for (int i(0); i < m; ++i) {
+    for (int j(0); j < m; ++j) {
+      if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
+	if (i == j) {
+          h_I(i,j).real() = 1.;
+	}
+	else {
+          h_I(i,j).real() = 0.;
+	}
+        h_I(i,j).imag() = 0.;
+      }
+      else {
+	if (i == j) {
+          h_I(i,j) = 1.;
+	}
+	else {
+          h_I(i,j) = 0.;
+	}
+      }
+    }
+  }
+}
+
 template <class ViewTypeA, class ViewTypeTW, class Device>
 void impl_test_geqrf(int m, int n) {
   using execution_space = typename Device::execution_space;
@@ -59,43 +109,44 @@ void impl_test_geqrf(int m, int n) {
   ViewTypeTW Work("Work", lwork);
 
   // Create host mirrors of device views.
-  typename ViewTypeA::HostMirror  h_A    = Kokkos::create_mirror_view(A);
-  typename ViewTypeTW::HostMirror h_tau  = Kokkos::create_mirror_view(Tau);
-  typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror_view(Work);
+  typename ViewTypeA::HostMirror  h_A     = Kokkos::create_mirror_view(A);
+  typename ViewTypeA::HostMirror  h_Aorig = Kokkos::create_mirror_view(A);
+  typename ViewTypeTW::HostMirror h_tau   = Kokkos::create_mirror_view(Tau);
+  typename ViewTypeTW::HostMirror h_work  = Kokkos::create_mirror_view(Work);
 
   // Initialize data.
   if ((m == 3) && (n == 3)) {
     if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
-      h_A(0, 0).real() = 12.;
-      h_A(0, 1).real() = -51.;
-      h_A(0, 2).real() = 4.;
+      h_A(0,0).real() = 12.;
+      h_A(0,1).real() = -51.;
+      h_A(0,2).real() = 4.;
 
-      h_A(1, 0).real() = 6.;
-      h_A(1, 1).real() = 167.;
-      h_A(1, 2).real() = -68.;
+      h_A(1,0).real() = 6.;
+      h_A(1,1).real() = 167.;
+      h_A(1,2).real() = -68.;
 
-      h_A(2, 0).real() = -4.;
-      h_A(2, 1).real() = 24.;
-      h_A(2, 2).real() = -41.;
+      h_A(2,0).real() = -4.;
+      h_A(2,1).real() = 24.;
+      h_A(2,2).real() = -41.;
 
       for (int i(0); i < m; ++i) {
         for (int j(0); j < n; ++j) {
-          h_A(i, j).imag() = 0.;
+          h_A(i,j).imag() = 0.;
 	}
       }
     }
     else {
-      h_A(0, 0) = 12.;
-      h_A(0, 1) = -51.;
-      h_A(0, 2) = 4.;
+      h_A(0,0) = 12.;
+      h_A(0,1) = -51.;
+      h_A(0,2) = 4.;
 
-      h_A(1, 0) = 6.;
-      h_A(1, 1) = 167.;
-      h_A(1, 2) = -68.;
+      h_A(1,0) = 6.;
+      h_A(1,1) = 167.;
+      h_A(1,2) = -68.;
 
-      h_A(2, 0) = -4.;
-      h_A(2, 1) = 24.;
-      h_A(2, 2) = -41.;
+      h_A(2,0) = -4.;
+      h_A(2,1) = 24.;
+      h_A(2,2) = -41.;
     }
 
     Kokkos::deep_copy(A, h_A);
@@ -108,11 +159,15 @@ void impl_test_geqrf(int m, int n) {
     Kokkos::deep_copy(h_A, A);
   }
 
+  Kokkos::deep_copy(h_Aorig, h_A);
+
+#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
       std::cout << "A(" << i << "," << j << ") = " << h_A(i,j) << std::endl;
     }
   }
+#endif
 
   Kokkos::fence();
 
@@ -126,13 +181,17 @@ void impl_test_geqrf(int m, int n) {
     FAIL();
     return;
   }
+
   Kokkos::fence();
 
+  EXPECT_EQ(rc, 0) << "Failed geqrf() test: rc = " << rc;
+
   // Get the results
   Kokkos::deep_copy(h_A, A);
   Kokkos::deep_copy(h_tau, Tau);
   Kokkos::deep_copy(h_work, Work);
 
+#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
   std::cout << "rc = " << rc << std::endl;
   for (int i(0); i < minMN; ++i) {
     for (int j(0); j < n; ++j) {
@@ -145,6 +204,38 @@ void impl_test_geqrf(int m, int n) {
   for (int i(0); i < lwork; ++i) {
     std::cout << "work(" << i << ") = " << h_work[i] << std::endl;
   }
+#endif
+
+  ViewTypeA Q ("Q",  m, m);
+  ViewTypeA R ("R",  m, n);
+  ViewTypeA QR("QR", m, n);
+
+  typename ViewTypeA::HostMirror h_Q  = Kokkos::create_mirror_view(Q);
+  typename ViewTypeA::HostMirror h_R  = Kokkos::create_mirror_view(R);
+  typename ViewTypeA::HostMirror h_QR = Kokkos::create_mirror_view(QR);
+
+  getQR<ViewTypeA, ViewTypeTW>(m, n, h_A, h_tau, h_work, h_Q, h_R, h_QR);
+
+#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+  for (int i(0); i < m; ++i) {
+    for (int j(0); j < m; ++j) {
+      std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl;
+    }
+  }
+  for (int i(0); i < m; ++i) {
+    for (int j(0); j < n; ++j) {
+      std::cout << "R(" << i << "," << j << ") = " << h_R(i,j) << std::endl;
+    }
+  }
+  for (int i(0); i < m; ++i) {
+    for (int j(0); j < n; ++j) {
+      std::cout << "QR(" << i << "," << j << ") = " << h_QR(i,j) << std::endl;
+    }
+  }
+#endif
+
+  if ((m == 3) && (n == 3)) {
+  }
 
   // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B).
   // void gemm( const execution_space                & space

From 4988a3572cd157935758ccacb837b40052bf7706 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Mon, 20 May 2024 10:26:55 -0600
Subject: [PATCH 07/27] Formatting

---
 .../KokkosLapack_geqrf_eti_spec_inst.cpp.in   |   7 +-
 .../KokkosLapack_geqrf_eti_spec_avail.hpp.in  |   6 +-
 lapack/impl/KokkosLapack_geqrf_spec.hpp       |  96 +++----
 lapack/src/KokkosLapack_geqrf.hpp             |  21 +-
 .../KokkosLapack_geqrf_tpl_spec_avail.hpp     |  60 ++---
 .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 237 +++++++++---------
 lapack/unit_test/Test_Lapack_geqrf.hpp        | 169 ++++++-------
 7 files changed, 300 insertions(+), 296 deletions(-)

diff --git a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in
index 9558d0f6cc..2015898d13 100644
--- a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in
+++ b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in
@@ -14,13 +14,12 @@
 //
 //@HEADER
 
-
 #define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true
 #include "KokkosKernels_config.h"
 #include "KokkosLapack_geqrf_spec.hpp"
 
 namespace KokkosLapack {
 namespace Impl {
-@LAPACK_GEQRF_ETI_INST_BLOCK@
-  } //IMPL 
-} //Kokkos
+@LAPACK_GEQRF_ETI_INST_BLOCK @
+}  // namespace Impl
+}  // namespace KokkosLapack
diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in
index c4619b9c07..2726dddd80 100644
--- a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in
+++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in
@@ -18,7 +18,7 @@
 #define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_
 namespace KokkosLapack {
 namespace Impl {
-@LAPACK_GEQRF_ETI_AVAIL_BLOCK@
-  } //IMPL 
-} //Kokkos
+@LAPACK_GEQRF_ETI_AVAIL_BLOCK @
+}  // namespace Impl
+}  // namespace KokkosLapack
 #endif
diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp
index 98d532489b..6970c6dd2c 100644
--- a/lapack/impl/KokkosLapack_geqrf_spec.hpp
+++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp
@@ -42,20 +42,20 @@ struct geqrf_eti_spec_avail {
 // We may spread out definitions (see _INST macro below) across one or
 // more .cpp files.
 //
-#define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE,       \
-                                         EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \
-  template <>                                                             \
-  struct geqrf_eti_spec_avail<                                            \
-      EXEC_SPACE_TYPE,                                                    \
-      Kokkos::View<SCALAR_TYPE **, LAYOUT_TYPE,                           \
-                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
-      Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                            \
-                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
-      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,           \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {            \
-    enum : bool { value = true };                                         \
+#define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE,        \
+                                          EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \
+  template <>                                                              \
+  struct geqrf_eti_spec_avail<                                             \
+      EXEC_SPACE_TYPE,                                                     \
+      Kokkos::View<SCALAR_TYPE **, LAYOUT_TYPE,                            \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,        \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+      Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                             \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,        \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,            \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {             \
+    enum : bool { value = true };                                          \
   };
 
 // Include the actual specialization declarations
@@ -66,14 +66,15 @@ namespace KokkosLapack {
 namespace Impl {
 
 // Unification layer
-template <class ExecutionSpace, class AMatrix, class TWArray, class RType,
-          bool tpl_spec_avail =
-              geqrf_tpl_spec_avail<ExecutionSpace, AMatrix, TWArray, RType>::value,
-          bool eti_spec_avail =
-              geqrf_eti_spec_avail<ExecutionSpace, AMatrix, TWArray, RType>::value>
+template <
+    class ExecutionSpace, class AMatrix, class TWArray, class RType,
+    bool tpl_spec_avail =
+        geqrf_tpl_spec_avail<ExecutionSpace, AMatrix, TWArray, RType>::value,
+    bool eti_spec_avail =
+        geqrf_eti_spec_avail<ExecutionSpace, AMatrix, TWArray, RType>::value>
 struct GEQRF {
-  static void geqrf(const ExecutionSpace &space, const AMatrix &A, const TWArray &Tau,
-                    const TWArray &Work, const RType &R);
+  static void geqrf(const ExecutionSpace &space, const AMatrix &A,
+                    const TWArray &Tau, const TWArray &Work, const RType &R);
 };
 
 #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY
@@ -81,9 +82,10 @@ struct GEQRF {
 // Unification layer
 template <class ExecutionSpace, class AMatrix, class TWArray, class RType>
 struct GEQRF<ExecutionSpace, AMatrix, TWArray, RType, false,
-            KOKKOSKERNELS_IMPL_COMPILE_LIBRARY> {
+             KOKKOSKERNELS_IMPL_COMPILE_LIBRARY> {
   static void geqrf(const ExecutionSpace & /* space */, const AMatrix & /* A */,
-                    const TWArray & /* Tau */, const TWArray & /* Work */, const RType & /* R */) {
+                    const TWArray & /* Tau */, const TWArray & /* Work */,
+                    const RType & /* R */) {
     // NOTE: Might add the implementation of KokkosLapack::geqrf later
     throw std::runtime_error(
         "No fallback implementation of GEQRF (general QR factorization) "
@@ -102,32 +104,32 @@ struct GEQRF<ExecutionSpace, AMatrix, TWArray, RType, false,
 // We may spread out definitions (see _DEF macro below) across one or
 // more .cpp files.
 //
-#define KOKKOSLAPACK_GEQRF_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE,       \
-                                        EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \
-  extern template struct GEQRF<                                          \
-      EXEC_SPACE_TYPE,                                                   \
-      Kokkos::View<SCALAR_TYPE **, LAYOUT_TYPE,                          \
-                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
-      Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                           \
-                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
-      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,          \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
+#define KOKKOSLAPACK_GEQRF_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE,        \
+                                         EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \
+  extern template struct GEQRF<                                           \
+      EXEC_SPACE_TYPE,                                                    \
+      Kokkos::View<SCALAR_TYPE **, LAYOUT_TYPE,                           \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
+      Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                            \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
+      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,           \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
       false, true>;
 
-#define KOKKOSLAPACK_GEQRF_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE,       \
-                                        EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \
-  template struct GEQRF<                                                 \
-      EXEC_SPACE_TYPE,                                                   \
-      Kokkos::View<SCALAR_TYPE **, LAYOUT_TYPE,                          \
-                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
-      Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                           \
-                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,      \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
-      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,          \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,             \
+#define KOKKOSLAPACK_GEQRF_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE,        \
+                                         EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \
+  template struct GEQRF<                                                  \
+      EXEC_SPACE_TYPE,                                                    \
+      Kokkos::View<SCALAR_TYPE **, LAYOUT_TYPE,                           \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
+      Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                            \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
+      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,           \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
       false, true>;
 
 #include <KokkosLapack_geqrf_tpl_spec_decl.hpp>
diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp
index 15a5522f37..506d3c60b7 100644
--- a/lapack/src/KokkosLapack_geqrf.hpp
+++ b/lapack/src/KokkosLapack_geqrf.hpp
@@ -94,7 +94,7 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
   int64_t work0 = Work.extent(0);
 
   // Check validity of dimensions
-  if (tau0 != std::min(m,n)) {
+  if (tau0 != std::min(m, n)) {
     std::ostringstream os;
     os << "KokkosLapack::geqrf: length of Tau must be equal to min(m,n): "
        << " A: " << m << " x " << n << ", Tau length = " << tau0;
@@ -103,15 +103,16 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
   if ((m == 0) || (n == 0)) {
     if (work0 < 1) {
       std::ostringstream os;
-      os << "KokkosLapack::geqrf: In case min(m,n) == 0, then Work must have length >= 1: "
+      os << "KokkosLapack::geqrf: In case min(m,n) == 0, then Work must have "
+            "length >= 1: "
          << " A: " << m << " x " << n << ", Work length = " << work0;
       KokkosKernels::Impl::throw_runtime_exception(os.str());
     }
-  }
-  else {
+  } else {
     if (work0 < n) {
       std::ostringstream os;
-      os << "KokkosLapack::geqrf: In case min(m,n) != 0, then Work must have length >= n: "
+      os << "KokkosLapack::geqrf: In case min(m,n) != 0, then Work must have "
+            "length >= n: "
          << " A: " << m << " x " << n << ", Work length = " << work0;
       KokkosKernels::Impl::throw_runtime_exception(os.str());
     }
@@ -121,9 +122,9 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
       typename AMatrix::non_const_value_type**, typename AMatrix::array_layout,
       typename AMatrix::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
       AMatrix_Internal;
-  typedef Kokkos::View<typename TWArray::non_const_value_type*,
-                       typename TWArray::array_layout, typename TWArray::device_type,
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged> >
+  typedef Kokkos::View<
+      typename TWArray::non_const_value_type*, typename TWArray::array_layout,
+      typename TWArray::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
       TWArray_Internal;
   AMatrix_Internal A_i    = A;
   TWArray_Internal Tau_i  = Tau;
@@ -137,7 +138,9 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
   int result;
   RViewInternalType R = RViewInternalType(&result);
 
-  KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, TWArray_Internal, RViewInternalType>::geqrf(space, A_i, Tau_i, Work_i, R);
+  KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, TWArray_Internal,
+                            RViewInternalType>::geqrf(space, A_i, Tau_i, Work_i,
+                                                      R);
 
   return result;
 }
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
index aaa465a814..8a1fcf618d 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
@@ -42,13 +42,13 @@ struct geqrf_tpl_spec_avail {
   };
 
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft,
-                                        Kokkos::HostSpace)
+                                         Kokkos::HostSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft,
-                                        Kokkos::HostSpace)
+                                         Kokkos::HostSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex<double>,
-                                        Kokkos::LayoutLeft, Kokkos::HostSpace)
+                                         Kokkos::LayoutLeft, Kokkos::HostSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex<float>,
-                                        Kokkos::LayoutLeft, Kokkos::HostSpace)
+                                         Kokkos::LayoutLeft, Kokkos::HostSpace)
 #endif
 }  // namespace Impl
 }  // namespace KokkosLapack
@@ -64,20 +64,20 @@ namespace Impl {
   struct geqrf_tpl_spec_avail<                                               \
       Kokkos::Cuda,                                                          \
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>, \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                 \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>,  \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged> > > {             \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {               \
     enum : bool { value = true };                                            \
   };
 
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft,
-                                       Kokkos::CudaSpace)
+                                        Kokkos::CudaSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft,
-                                       Kokkos::CudaSpace)
+                                        Kokkos::CudaSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex<double>,
-                                       Kokkos::LayoutLeft, Kokkos::CudaSpace)
+                                        Kokkos::LayoutLeft, Kokkos::CudaSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex<float>,
-                                       Kokkos::LayoutLeft, Kokkos::CudaSpace)
+                                        Kokkos::LayoutLeft, Kokkos::CudaSpace)
 }  // namespace Impl
 }  // namespace KokkosLapack
 #endif  // KOKKOSKERNELS_ENABLE_TPL_MAGMA
@@ -92,32 +92,34 @@ namespace Impl {
   struct geqrf_tpl_spec_avail<                                               \
       Kokkos::Cuda,                                                          \
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>, \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                 \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>,  \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged> > > {             \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {               \
     enum : bool { value = true };                                            \
   };
 
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft,
-                                          Kokkos::CudaSpace)
+                                           Kokkos::CudaSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft,
-                                          Kokkos::CudaSpace)
+                                           Kokkos::CudaSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex<double>,
-                                          Kokkos::LayoutLeft, Kokkos::CudaSpace)
+                                           Kokkos::LayoutLeft,
+                                           Kokkos::CudaSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex<float>,
-                                          Kokkos::LayoutLeft, Kokkos::CudaSpace)
+                                           Kokkos::LayoutLeft,
+                                           Kokkos::CudaSpace)
 
 #if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft,
-                                          Kokkos::CudaUVMSpace)
+                                           Kokkos::CudaUVMSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft,
-                                          Kokkos::CudaUVMSpace)
+                                           Kokkos::CudaUVMSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex<double>,
-                                          Kokkos::LayoutLeft,
-                                          Kokkos::CudaUVMSpace)
+                                           Kokkos::LayoutLeft,
+                                           Kokkos::CudaUVMSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex<float>,
-                                          Kokkos::LayoutLeft,
-                                          Kokkos::CudaUVMSpace)
+                                           Kokkos::LayoutLeft,
+                                           Kokkos::CudaUVMSpace)
 #endif
 
 }  // namespace Impl
@@ -135,20 +137,22 @@ namespace Impl {
   struct geqrf_tpl_spec_avail<                                                \
       Kokkos::HIP,                                                            \
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEMSPACE>,   \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >,                 \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                  \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEMSPACE>,    \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged> > > {              \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {                \
     enum : bool { value = true };                                             \
   };
 
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft,
-                                           Kokkos::HIPSpace)
+                                            Kokkos::HIPSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft,
-                                           Kokkos::HIPSpace)
+                                            Kokkos::HIPSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex<double>,
-                                           Kokkos::LayoutLeft, Kokkos::HIPSpace)
+                                            Kokkos::LayoutLeft,
+                                            Kokkos::HIPSpace)
 KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex<float>,
-                                           Kokkos::LayoutLeft, Kokkos::HIPSpace)
+                                            Kokkos::LayoutLeft,
+                                            Kokkos::HIPSpace)
 
 }  // namespace Impl
 }  // namespace KokkosLapack
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index 69cd6fb4f3..7fbc5ff391 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -26,11 +26,14 @@ inline void geqrf_print_specialization() {
 #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA
   printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s, %s >\n",
-         typeid(AViewType).name(), typeid(TWViewType).name(), typeid(RType).name());
+         typeid(AViewType).name(), typeid(TWViewType).name(),
+         typeid(RType).name());
 #else
 #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK
-  printf("KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n",
-         typeid(AViewType).name(), typeid(TWViewType).name(), typeid(RType).name());
+  printf(
+      "KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n",
+      typeid(AViewType).name(), typeid(TWViewType).name(),
+      typeid(RType).name());
 #endif
 #endif
 #endif
@@ -61,14 +64,13 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau,
   if constexpr (Kokkos::ArithTraits<Scalar>::is_complex) {
     using MagType = typename Kokkos::ArithTraits<Scalar>::mag_type;
 
-    R() = HostLapack<std::complex<MagType>>::geqrf(M, N,
-             reinterpret_cast<std::complex<MagType>*>(A.data()), LDA,
-             reinterpret_cast<std::complex<MagType>*>(Tau.data()),
-             reinterpret_cast<std::complex<MagType>*>(Work.data()), LWORK);
-  }
-  else {
-    R() = HostLapack<Scalar>::geqrf(M, N, A.data(), LDA, Tau.data(), Work.data(),
-                                    LWORK);
+    R() = HostLapack<std::complex<MagType>>::geqrf(
+        M, N, reinterpret_cast<std::complex<MagType>*>(A.data()), LDA,
+        reinterpret_cast<std::complex<MagType>*>(Tau.data()),
+        reinterpret_cast<std::complex<MagType>*>(Work.data()), LWORK);
+  } else {
+    R() = HostLapack<Scalar>::geqrf(M, N, A.data(), LDA, Tau.data(),
+                                    Work.data(), LWORK);
   }
 }
 
@@ -97,13 +99,12 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau,
     using TWViewType =                                                         \
         Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,    \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-    using RType =                                                              \
-        Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,              \
-                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+    using RType = Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,    \
+                               Kokkos::MemoryTraits<Kokkos::Unmanaged>>;       \
                                                                                \
     static void geqrf(const EXECSPACE& /* space */, const AViewType& A,        \
                       const TWViewType& Tau, const TWViewType& Work,           \
-                      const RType& R) {	                                       \
+                      const RType& R) {                                        \
       Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR  \
                                     "]");                                      \
       geqrf_print_specialization<AViewType, TWViewType, RType>();              \
@@ -114,42 +115,42 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau,
 
 #if defined(KOKKOS_ENABLE_SERIAL)
 KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial,
-                         Kokkos::HostSpace)
+                          Kokkos::HostSpace)
 KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial,
-                         Kokkos::HostSpace)
+                          Kokkos::HostSpace)
 KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<float>, Kokkos::LayoutLeft,
-                         Kokkos::Serial, Kokkos::HostSpace)
+                          Kokkos::Serial, Kokkos::HostSpace)
 KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<double>, Kokkos::LayoutLeft,
-                         Kokkos::Serial, Kokkos::HostSpace)
+                          Kokkos::Serial, Kokkos::HostSpace)
 #endif
 
 #if defined(KOKKOS_ENABLE_OPENMP)
 KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP,
-                         Kokkos::HostSpace)
+                          Kokkos::HostSpace)
 KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP,
-                         Kokkos::HostSpace)
+                          Kokkos::HostSpace)
 KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<float>, Kokkos::LayoutLeft,
-                         Kokkos::OpenMP, Kokkos::HostSpace)
+                          Kokkos::OpenMP, Kokkos::HostSpace)
 KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<double>, Kokkos::LayoutLeft,
-                         Kokkos::OpenMP, Kokkos::HostSpace)
+                          Kokkos::OpenMP, Kokkos::HostSpace)
 #endif
 
 #if defined(KOKKOS_ENABLE_THREADS)
 KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Threads,
-                         Kokkos::HostSpace)
+                          Kokkos::HostSpace)
 KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Threads,
-                         Kokkos::HostSpace)
+                          Kokkos::HostSpace)
 KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<float>, Kokkos::LayoutLeft,
-                         Kokkos::Threads, Kokkos::HostSpace)
+                          Kokkos::Threads, Kokkos::HostSpace)
 KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<double>, Kokkos::LayoutLeft,
-                         Kokkos::Threads, Kokkos::HostSpace)
+                          Kokkos::Threads, Kokkos::HostSpace)
 #endif
 
 }  // namespace Impl
 }  // namespace KokkosLapack
 #endif  // KOKKOSKERNELS_ENABLE_TPL_LAPACK
 
-#if 0 // AquiEEP
+#if 0  // AquiEEP
 
 // MAGMA
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA
@@ -207,33 +208,33 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A,
 }
 
 #define KOKKOSLAPACK_GEQRF_MAGMA(SCALAR, LAYOUT, MEM_SPACE)                    \
-  template <>                                                                 \
+  template <>                                                                  \
   struct GEQRF<                                                                \
-      Kokkos::Cuda,                                                           \
-      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>, \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                  \
-      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>, \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                  \
-      true,                                                                   \
+      Kokkos::Cuda,                                                            \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,  \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,   \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      true,                                                                    \
       geqrf_eti_spec_avail<                                                    \
-          Kokkos::Cuda,                                                       \
-          Kokkos::View<SCALAR**, LAYOUT,                                      \
-                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,               \
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
-          Kokkos::View<SCALAR*, LAYOUT,                                      \
-                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,               \
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {    \
-    using AViewType = Kokkos::View<SCALAR**, LAYOUT,                          \
-                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,   \
-                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;  \
-    using TWViewType = Kokkos::View<SCALAR*, LAYOUT,                          \
-                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,   \
-                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;  \
-                                                                              \
+          Kokkos::Cuda,                                                        \
+          Kokkos::View<SCALAR**, LAYOUT,                                       \
+                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+          Kokkos::View<SCALAR*, LAYOUT,                                        \
+                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
+    using AViewType = Kokkos::View<SCALAR**, LAYOUT,                           \
+                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
+                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;   \
+    using TWViewType =                                                         \
+        Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>, \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+                                                                               \
     static void geqrf(const Kokkos::Cuda& space, const AViewType& A,           \
-                     const TWViewType& Tau, const TWViewType& Work) {             \
-      magmaGeqrfWrapper(space, A, Tau, Work);                                    \
-    }                                                                         \
+                      const TWViewType& Tau, const TWViewType& Work) {         \
+      magmaGeqrfWrapper(space, A, Tau, Work);                                  \
+    }                                                                          \
   };
 
 KOKKOSLAPACK_GEQRF_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace)
@@ -339,39 +340,39 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
   KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL));
 }
 
-#define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE)                  \
-  template <>                                                                   \
-  struct GEQRF<                                                                 \
-      Kokkos::Cuda,                                                             \
-      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,   \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                    \
-      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                    \
-      true,                                                                     \
-      geqrf_eti_spec_avail<                                                     \
-          Kokkos::Cuda,                                                         \
-          Kokkos::View<SCALAR**, LAYOUT,                                        \
-                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                 \
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                \
-          Kokkos::View<SCALAR*, LAYOUT,                                         \
-                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                 \
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {      \
-    using AViewType = Kokkos::View<SCALAR**, LAYOUT,                            \
-                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,     \
-                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;    \
-    using TWViewType = Kokkos::View<SCALAR*, LAYOUT,                            \
-                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,     \
-                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;    \
-                                                                                \
-    static void geqrf(const Kokkos::Cuda& space, const AViewType& A,            \
-                     const TWViewType& Tau, const TWViewType& Work) {           \
-      Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR \
-                                    "]");                                       \
-      geqrf_print_specialization<AViewType, TWViewType, RType>();               \
-                                                                                \
-      cusolverGeqrfWrapper(space, IPIV, A, B);                                  \
-      Kokkos::Profiling::popRegion();                                           \
-    }                                                                           \
+#define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE)                 \
+  template <>                                                                  \
+  struct GEQRF<                                                                \
+      Kokkos::Cuda,                                                            \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,  \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,   \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      true,                                                                    \
+      geqrf_eti_spec_avail<                                                    \
+          Kokkos::Cuda,                                                        \
+          Kokkos::View<SCALAR**, LAYOUT,                                       \
+                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+          Kokkos::View<SCALAR*, LAYOUT,                                        \
+                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
+    using AViewType = Kokkos::View<SCALAR**, LAYOUT,                           \
+                                   Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
+                                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;   \
+    using TWViewType =                                                         \
+        Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>, \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+                                                                               \
+    static void geqrf(const Kokkos::Cuda& space, const AViewType& A,           \
+                      const TWViewType& Tau, const TWViewType& Work) {         \
+      Kokkos::Profiling::pushRegion(                                           \
+          "KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR "]");                    \
+      geqrf_print_specialization<AViewType, TWViewType, RType>();              \
+                                                                               \
+      cusolverGeqrfWrapper(space, IPIV, A, B);                                 \
+      Kokkos::Profiling::popRegion();                                          \
+    }                                                                          \
   };
 
 KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace)
@@ -448,39 +449,39 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
   KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL));
 }
 
-#define KOKKOSLAPACK_GEQRF_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE)                 \
-  template <>                                                                   \
-  struct GEQRF<                                                                 \
-      Kokkos::HIP,                                                              \
-      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,    \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                    \
-      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,     \
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                    \
-      true,                                                                     \
-      geqrf_eti_spec_avail<                                                     \
-          Kokkos::HIP,                                                          \
-          Kokkos::View<SCALAR**, LAYOUT,                                        \
-                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                  \
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                \
-          Kokkos::View<SCALAR*, LAYOUT,                                         \
+#define KOKKOSLAPACK_GEQRF_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE)                \
+  template <>                                                                  \
+  struct GEQRF<                                                                \
+      Kokkos::HIP,                                                             \
+      Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,   \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,    \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      true,                                                                    \
+      geqrf_eti_spec_avail<                                                    \
+          Kokkos::HIP,                                                         \
+          Kokkos::View<SCALAR**, LAYOUT,                                       \
+                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+          Kokkos::View<SCALAR*, LAYOUT,                                        \
                        Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                  \ 
-                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {      \
-    using AViewType =                                                           \
-        Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,  \
-                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                  \
-    using TWViewType =                                                          \
-        Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,   \
-                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                  \
-                                                                                \
-    static void geqrf(const Kokkos::HIP& space, const AViewType& A,             \
-                     const TWViewType& Tau, const TWViewType& Work) {           \
-      Kokkos::Profiling::pushRegion(                                            \
-          "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]");                    \
-      geqrf_print_specialization<AViewType, TWViewType, RType>();               \
-                                                                                \
-      rocsolverGeqrfWrapper(space, IPIV, A, B);                                 \
-      Kokkos::Profiling::popRegion();                                           \
-    }                                                                           \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
+    using AViewType =                                                          \
+        Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+    using TWViewType =                                                         \
+        Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,  \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+                                                                               \
+    static void geqrf(const Kokkos::HIP& space, const AViewType& A,            \
+                      const TWViewType& Tau, const TWViewType& Work) {         \
+      Kokkos::Profiling::pushRegion(                                           \
+          "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]");                   \
+      geqrf_print_specialization<AViewType, TWViewType, RType>();              \
+                                                                               \
+      rocsolverGeqrfWrapper(space, IPIV, A, B);                                \
+      Kokkos::Profiling::popRegion();                                          \
+    }                                                                          \
   };
 
 KOKKOSLAPACK_GEQRF_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace)
@@ -494,6 +495,6 @@ KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
 }  // namespace KokkosLapack
 #endif  // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER
 
-#endif // AquiEEP
+#endif  // AquiEEP
 
 #endif
diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
index dfcc7566e9..de662365ac 100644
--- a/lapack/unit_test/Test_Lapack_geqrf.hpp
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -17,7 +17,7 @@
 // Only enable this test where KokkosLapack supports geqrf:
 // CUDA+CUSOLVER, HIP+ROCSOLVER and HOST+LAPACK
 #if (defined(TEST_CUDA_LAPACK_CPP) &&                                       \
-      defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) ||                        \
+     defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) ||                         \
     (defined(TEST_HIP_LAPACK_CPP) &&                                        \
      defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) ||                        \
     (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) &&                            \
@@ -38,26 +38,27 @@
 namespace Test {
 
 template <class ViewTypeA, class ViewTypeTW>
-void getQR( int                             const   m
-          , int                             const   n
-          , typename ViewTypeA::HostMirror  const & //h_A
-          , typename ViewTypeTW::HostMirror const & //h_tau
-          , typename ViewTypeTW::HostMirror const & //h_work
-          , typename ViewTypeA::HostMirror        & //h_Q
-          , typename ViewTypeA::HostMirror        & h_R
-          , typename ViewTypeA::HostMirror        & //h_QR
-          )
-{
+void getQR(int const m, int const n,
+           typename ViewTypeA::HostMirror const&  // h_A
+           ,
+           typename ViewTypeTW::HostMirror const&  // h_tau
+           ,
+           typename ViewTypeTW::HostMirror const&  // h_work
+           ,
+           typename ViewTypeA::HostMirror&  // h_Q
+           ,
+           typename ViewTypeA::HostMirror& h_R,
+           typename ViewTypeA::HostMirror&  // h_QR
+) {
   using ScalarA = typename ViewTypeA::value_type;
 
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
       if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
-        h_R(i,j).real() = 0.;
-        h_R(i,j).imag() = 0.;
-      }
-      else {
-        h_R(i,j) = 0.;
+        h_R(i, j).real() = 0.;
+        h_R(i, j).imag() = 0.;
+      } else {
+        h_R(i, j) = 0.;
       }
     }
   }
@@ -67,21 +68,18 @@ void getQR( int                             const   m
   for (int i(0); i < m; ++i) {
     for (int j(0); j < m; ++j) {
       if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
-	if (i == j) {
-          h_I(i,j).real() = 1.;
-	}
-	else {
-          h_I(i,j).real() = 0.;
-	}
-        h_I(i,j).imag() = 0.;
-      }
-      else {
-	if (i == j) {
-          h_I(i,j) = 1.;
-	}
-	else {
-          h_I(i,j) = 0.;
-	}
+        if (i == j) {
+          h_I(i, j).real() = 1.;
+        } else {
+          h_I(i, j).real() = 0.;
+        }
+        h_I(i, j).imag() = 0.;
+      } else {
+        if (i == j) {
+          h_I(i, j) = 1.;
+        } else {
+          h_I(i, j) = 0.;
+        }
       }
     }
   }
@@ -91,80 +89,77 @@ template <class ViewTypeA, class ViewTypeTW, class Device>
 void impl_test_geqrf(int m, int n) {
   using execution_space = typename Device::execution_space;
   using ScalarA         = typename ViewTypeA::value_type;
-  //using ats             = Kokkos::ArithTraits<ScalarA>;
+  // using ats             = Kokkos::ArithTraits<ScalarA>;
 
   execution_space space{};
 
   Kokkos::Random_XorShift64_Pool<execution_space> rand_pool(13718);
 
-  int minMN( std::min(m,n) );
-  int lwork( 1 );
+  int minMN(std::min(m, n));
+  int lwork(1);
   if (minMN != 0) {
     lwork = n;
   }
 
   // Create device views
-  ViewTypeA  A   ("A", m, n);
-  ViewTypeTW Tau ("Tau", minMN);
+  ViewTypeA A("A", m, n);
+  ViewTypeTW Tau("Tau", minMN);
   ViewTypeTW Work("Work", lwork);
 
   // Create host mirrors of device views.
-  typename ViewTypeA::HostMirror  h_A     = Kokkos::create_mirror_view(A);
-  typename ViewTypeA::HostMirror  h_Aorig = Kokkos::create_mirror_view(A);
-  typename ViewTypeTW::HostMirror h_tau   = Kokkos::create_mirror_view(Tau);
-  typename ViewTypeTW::HostMirror h_work  = Kokkos::create_mirror_view(Work);
+  typename ViewTypeA::HostMirror h_A     = Kokkos::create_mirror_view(A);
+  typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(A);
+  typename ViewTypeTW::HostMirror h_tau  = Kokkos::create_mirror_view(Tau);
+  typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror_view(Work);
 
   // Initialize data.
   if ((m == 3) && (n == 3)) {
     if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
-      h_A(0,0).real() = 12.;
-      h_A(0,1).real() = -51.;
-      h_A(0,2).real() = 4.;
+      h_A(0, 0).real() = 12.;
+      h_A(0, 1).real() = -51.;
+      h_A(0, 2).real() = 4.;
 
-      h_A(1,0).real() = 6.;
-      h_A(1,1).real() = 167.;
-      h_A(1,2).real() = -68.;
+      h_A(1, 0).real() = 6.;
+      h_A(1, 1).real() = 167.;
+      h_A(1, 2).real() = -68.;
 
-      h_A(2,0).real() = -4.;
-      h_A(2,1).real() = 24.;
-      h_A(2,2).real() = -41.;
+      h_A(2, 0).real() = -4.;
+      h_A(2, 1).real() = 24.;
+      h_A(2, 2).real() = -41.;
 
       for (int i(0); i < m; ++i) {
         for (int j(0); j < n; ++j) {
-          h_A(i,j).imag() = 0.;
-	}
+          h_A(i, j).imag() = 0.;
+        }
       }
-    }
-    else {
-      h_A(0,0) = 12.;
-      h_A(0,1) = -51.;
-      h_A(0,2) = 4.;
-
-      h_A(1,0) = 6.;
-      h_A(1,1) = 167.;
-      h_A(1,2) = -68.;
-
-      h_A(2,0) = -4.;
-      h_A(2,1) = 24.;
-      h_A(2,2) = -41.;
+    } else {
+      h_A(0, 0) = 12.;
+      h_A(0, 1) = -51.;
+      h_A(0, 2) = 4.;
+
+      h_A(1, 0) = 6.;
+      h_A(1, 1) = 167.;
+      h_A(1, 2) = -68.;
+
+      h_A(2, 0) = -4.;
+      h_A(2, 1) = 24.;
+      h_A(2, 2) = -41.;
     }
 
     Kokkos::deep_copy(A, h_A);
-  }
-  else {
-    Kokkos::fill_random( A
-                       , rand_pool
-                       , Kokkos::rand<Kokkos::Random_XorShift64<execution_space>, ScalarA>::max()
-                       );
+  } else {
+    Kokkos::fill_random(A, rand_pool,
+                        Kokkos::rand<Kokkos::Random_XorShift64<execution_space>,
+                                     ScalarA>::max());
     Kokkos::deep_copy(h_A, A);
   }
 
   Kokkos::deep_copy(h_Aorig, h_A);
 
-#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 1  // def HAVE_KOKKOSKERNELS_DEBUG
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
-      std::cout << "A(" << i << "," << j << ") = " << h_A(i,j) << std::endl;
+      std::cout << "A(" << i << "," << j << ") = " << h_A(i, j) << std::endl;
     }
   }
 #endif
@@ -175,9 +170,9 @@ void impl_test_geqrf(int m, int n) {
   int rc(0);
   try {
     rc = KokkosLapack::geqrf(space, A, Tau, Work);
-  }
-  catch (const std::runtime_error & e) {
-    std::cout << "KokkosLapack::geqrf(): caught exception '" << e.what() << "'" << std::endl;
+  } catch (const std::runtime_error& e) {
+    std::cout << "KokkosLapack::geqrf(): caught exception '" << e.what() << "'"
+              << std::endl;
     FAIL();
     return;
   }
@@ -191,11 +186,11 @@ void impl_test_geqrf(int m, int n) {
   Kokkos::deep_copy(h_tau, Tau);
   Kokkos::deep_copy(h_work, Work);
 
-#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 1  // def HAVE_KOKKOSKERNELS_DEBUG
   std::cout << "rc = " << rc << std::endl;
   for (int i(0); i < minMN; ++i) {
     for (int j(0); j < n; ++j) {
-      std::cout << "R(" << i << "," << j << ") = " << h_A(i,j) << std::endl;
+      std::cout << "R(" << i << "," << j << ") = " << h_A(i, j) << std::endl;
     }
   }
   for (int i(0); i < minMN; ++i) {
@@ -206,8 +201,8 @@ void impl_test_geqrf(int m, int n) {
   }
 #endif
 
-  ViewTypeA Q ("Q",  m, m);
-  ViewTypeA R ("R",  m, n);
+  ViewTypeA Q("Q", m, m);
+  ViewTypeA R("R", m, n);
   ViewTypeA QR("QR", m, n);
 
   typename ViewTypeA::HostMirror h_Q  = Kokkos::create_mirror_view(Q);
@@ -216,20 +211,20 @@ void impl_test_geqrf(int m, int n) {
 
   getQR<ViewTypeA, ViewTypeTW>(m, n, h_A, h_tau, h_work, h_Q, h_R, h_QR);
 
-#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 1  // def HAVE_KOKKOSKERNELS_DEBUG
   for (int i(0); i < m; ++i) {
     for (int j(0); j < m; ++j) {
-      std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl;
+      std::cout << "Q(" << i << "," << j << ") = " << h_Q(i, j) << std::endl;
     }
   }
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
-      std::cout << "R(" << i << "," << j << ") = " << h_R(i,j) << std::endl;
+      std::cout << "R(" << i << "," << j << ") = " << h_R(i, j) << std::endl;
     }
   }
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
-      std::cout << "QR(" << i << "," << j << ") = " << h_QR(i,j) << std::endl;
+      std::cout << "QR(" << i << "," << j << ") = " << h_QR(i, j) << std::endl;
     }
   }
 #endif
@@ -258,9 +253,9 @@ void impl_test_geqrf(int m, int n) {
   //         );
 
   // Checking vs ref on CPU, this eps is about 10^-9
-  //typedef typename ats::mag_type mag_type;
-  //const mag_type eps = 3.0e7 * ats::epsilon();
-  bool test_flag     = true;
+  // typedef typename ats::mag_type mag_type;
+  // const mag_type eps = 3.0e7 * ats::epsilon();
+  bool test_flag = true;
   for (int i = 0; i < n; i++) {
 #if 0
     if (ats::abs(h_B(i) - h_X0(i)) > eps) {
@@ -284,7 +279,7 @@ void test_geqrf() {
 #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \
     (!defined(KOKKOSKERNELS_ETI_ONLY) &&      \
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
-  using view_type_a_ll = Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device>;
+  using view_type_a_ll  = Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device>;
   using view_type_tw_ll = Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device>;
 
   Test::impl_test_geqrf<view_type_a_ll, view_type_tw_ll, Device>(3, 3);

From 05c8b958e2cce1352136ea05dcac3540d2348334 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Mon, 20 May 2024 10:29:58 -0600
Subject: [PATCH 08/27] Backup

---
 lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index 7fbc5ff391..739517ae15 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -464,7 +464,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
                        Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
           Kokkos::View<SCALAR*, LAYOUT,                                        \
-                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                  \ 
+                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \ 
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
     using AViewType =                                                          \
         Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \

From b9fb93d3d47e4e1427634f271ef64c47f913316b Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Mon, 20 May 2024 10:39:27 -0600
Subject: [PATCH 09/27] Formatting

---
 lapack/tpls/KokkosLapack_Host_tpl.cpp | 64 +++++++++++++++------------
 1 file changed, 35 insertions(+), 29 deletions(-)

diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp
index 89085619e8..9d751f75b6 100644
--- a/lapack/tpls/KokkosLapack_Host_tpl.cpp
+++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp
@@ -87,15 +87,16 @@ void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*,
 /// Geqrf
 ///
 
-void F77_BLAS_MANGLE(sgeqrf, SGEQRF)(int*, int*, float*, int*, float*, float*, int*,
-                                   int*);
-void F77_BLAS_MANGLE(dgeqrf, DGEQRF)(int*, int*, double*, int*, double*, double*,
-                                   int*, int*);
-void F77_BLAS_MANGLE(cgeqrf, CGEQRF)(int*, int*, std::complex<float>*, int*, std::complex<float>*,
-                                   std::complex<float>*, int*, int*);
+void F77_BLAS_MANGLE(sgeqrf, SGEQRF)(int*, int*, float*, int*, float*, float*,
+                                     int*, int*);
+void F77_BLAS_MANGLE(dgeqrf, DGEQRF)(int*, int*, double*, int*, double*,
+                                     double*, int*, int*);
+void F77_BLAS_MANGLE(cgeqrf, CGEQRF)(int*, int*, std::complex<float>*, int*,
+                                     std::complex<float>*, std::complex<float>*,
+                                     int*, int*);
 void F77_BLAS_MANGLE(zgeqrf, ZGEQRF)(int*, int*, std::complex<double>*, int*,
-                                   std::complex<double>*, std::complex<double>*, int*, int*);
-
+                                     std::complex<double>*,
+                                     std::complex<double>*, int*, int*);
 }
 
 #define F77_FUNC_SGESV F77_BLAS_MANGLE(sgesv, SGESV)
@@ -192,14 +193,14 @@ int HostLapack<double>::geqrf(int m, int n, double* a, int lda, double* tau,
 ///
 
 template <>
-void HostLapack<std::complex<float> >::gesv(int n, int rhs,
-                                            std::complex<float>* a, int lda,
-                                            int* ipiv, std::complex<float>* b,
-                                            int ldb, int info) {
+void HostLapack<std::complex<float>>::gesv(int n, int rhs,
+                                           std::complex<float>* a, int lda,
+                                           int* ipiv, std::complex<float>* b,
+                                           int ldb, int info) {
   F77_FUNC_CGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info);
 }
 template <>
-void HostLapack<std::complex<float> >::gesvd(
+void HostLapack<std::complex<float>>::gesvd(
     const char jobu, const char jobvt, const int m, const int n,
     std::complex<float>* a, const int lda, float* s, std::complex<float>* u,
     const int ldu, std::complex<float>* vt, const int ldvt,
@@ -208,16 +209,18 @@ void HostLapack<std::complex<float> >::gesvd(
                   &lwork, rwork, &info);
 }
 template <>
-int HostLapack<std::complex<float> >::trtri(const char uplo, const char diag,
-                                            int n, const std::complex<float>* a,
-                                            int lda) {
+int HostLapack<std::complex<float>>::trtri(const char uplo, const char diag,
+                                           int n, const std::complex<float>* a,
+                                           int lda) {
   int info = 0;
   F77_FUNC_CTRTRI(&uplo, &diag, &n, a, &lda, &info);
   return info;
 }
 template <>
-int HostLapack<std::complex<float>>::geqrf(int m, int n, std::complex<float>* a, int lda, std::complex<float>* tau,
-                                           std::complex<float>* work, int lwork) {
+int HostLapack<std::complex<float>>::geqrf(int m, int n, std::complex<float>* a,
+                                           int lda, std::complex<float>* tau,
+                                           std::complex<float>* work,
+                                           int lwork) {
   int info = 0;
   F77_FUNC_CGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info);
   return info;
@@ -228,14 +231,14 @@ int HostLapack<std::complex<float>>::geqrf(int m, int n, std::complex<float>* a,
 ///
 
 template <>
-void HostLapack<std::complex<double> >::gesv(int n, int rhs,
-                                             std::complex<double>* a, int lda,
-                                             int* ipiv, std::complex<double>* b,
-                                             int ldb, int info) {
+void HostLapack<std::complex<double>>::gesv(int n, int rhs,
+                                            std::complex<double>* a, int lda,
+                                            int* ipiv, std::complex<double>* b,
+                                            int ldb, int info) {
   F77_FUNC_ZGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info);
 }
 template <>
-void HostLapack<std::complex<double> >::gesvd(
+void HostLapack<std::complex<double>>::gesvd(
     const char jobu, const char jobvt, const int m, const int n,
     std::complex<double>* a, const int lda, double* s, std::complex<double>* u,
     const int ldu, std::complex<double>* vt, const int ldvt,
@@ -244,17 +247,20 @@ void HostLapack<std::complex<double> >::gesvd(
                   &lwork, rwork, &info);
 }
 template <>
-int HostLapack<std::complex<double> >::trtri(const char uplo, const char diag,
-                                             int n,
-                                             const std::complex<double>* a,
-                                             int lda) {
+int HostLapack<std::complex<double>>::trtri(const char uplo, const char diag,
+                                            int n,
+                                            const std::complex<double>* a,
+                                            int lda) {
   int info = 0;
   F77_FUNC_ZTRTRI(&uplo, &diag, &n, a, &lda, &info);
   return info;
 }
 template <>
-int HostLapack<std::complex<double>>::geqrf(int m, int n, std::complex<double>* a, int lda, std::complex<double>* tau,
-                                            std::complex<double>* work, int lwork) {
+int HostLapack<std::complex<double>>::geqrf(int m, int n,
+                                            std::complex<double>* a, int lda,
+                                            std::complex<double>* tau,
+                                            std::complex<double>* work,
+                                            int lwork) {
   int info = 0;
   F77_FUNC_ZGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info);
   return info;

From 11221186a36df4a8051c97b55e2e460b0b7c0f95 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Mon, 20 May 2024 11:20:26 -0600
Subject: [PATCH 10/27] Formatting

---
 .../geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in             | 4 ++--
 .../KokkosLapack_geqrf_eti_spec_avail.hpp.in                  | 4 ++--
 lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp              | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in
index 2015898d13..4f4ad91cb6 100644
--- a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in
+++ b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in
@@ -20,6 +20,6 @@
 
 namespace KokkosLapack {
 namespace Impl {
-@LAPACK_GEQRF_ETI_INST_BLOCK @
-}  // namespace Impl
+@LAPACK_GEQRF_ETI_INST_BLOCK@
+  }  // namespace Impl
 }  // namespace KokkosLapack
diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in
index 2726dddd80..899a8b7604 100644
--- a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in
+++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in
@@ -18,7 +18,7 @@
 #define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_
 namespace KokkosLapack {
 namespace Impl {
-@LAPACK_GEQRF_ETI_AVAIL_BLOCK @
-}  // namespace Impl
+@LAPACK_GEQRF_ETI_AVAIL_BLOCK@
+  }  // namespace Impl
 }  // namespace KokkosLapack
 #endif
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index 739517ae15..4e040cc358 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -464,7 +464,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
                        Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
           Kokkos::View<SCALAR*, LAYOUT,                                        \
-                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \ 
+                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
     using AViewType =                                                          \
         Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \

From f578072005288625bc5337c8a6a5cf2c8048c721 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Wed, 22 May 2024 22:12:00 -0600
Subject: [PATCH 11/27] Backup

---
 lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
index 8a1fcf618d..f291bbe2a8 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
@@ -94,6 +94,8 @@ namespace Impl {
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>, \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                 \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>,  \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                 \
+      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,              \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {               \
     enum : bool { value = true };                                            \
   };
@@ -139,6 +141,8 @@ namespace Impl {
       Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEMSPACE>,   \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                  \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEMSPACE>,    \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                  \
+      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,               \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {                \
     enum : bool { value = true };                                             \
   };

From e953547eebeb0c990f291e57f5ad2aceb3a13e00 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Wed, 22 May 2024 22:13:52 -0600
Subject: [PATCH 12/27] Backup

---
 lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index 4e040cc358..056eef24da 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -17,8 +17,6 @@
 #ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_
 #define KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_
 
-// AquiEEP
-
 namespace KokkosLapack {
 namespace Impl {
 template <class AViewType, class TWViewType, class RType>
@@ -248,6 +246,8 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex<double>, Kokkos::LayoutLeft,
 }  // namespace KokkosLapack
 #endif  // KOKKOSKERNELS_ENABLE_TPL_MAGMA
 
+#endif // AquiEEP
+
 // CUSOLVER
 #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER
 #include "KokkosLapack_cusolver.hpp"
@@ -395,6 +395,8 @@ KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
 }  // namespace KokkosLapack
 #endif  // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER
 
+#if 0  // AquiEEP
+
 // ROCSOLVER
 #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER
 #include <KokkosBlas_tpl_spec.hpp>

From 1459f5e20417f8c3e4d2872c3ea1a4665908923f Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Thu, 23 May 2024 01:29:58 -0600
Subject: [PATCH 13/27] Backup

---
 lapack/impl/KokkosLapack_geqrf_spec.hpp       |  10 +-
 lapack/src/KokkosLapack_geqrf.hpp             |  42 +++---
 .../KokkosLapack_geqrf_tpl_spec_avail.hpp     |   6 +-
 .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 139 +++++++++---------
 4 files changed, 100 insertions(+), 97 deletions(-)

diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp
index 6970c6dd2c..5410520c1c 100644
--- a/lapack/impl/KokkosLapack_geqrf_spec.hpp
+++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp
@@ -53,7 +53,8 @@ struct geqrf_eti_spec_avail {
       Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                             \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,        \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,            \
+      Kokkos::View<int*, LAYOUT_TYPE,                                      \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,        \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {             \
     enum : bool { value = true };                                          \
   };
@@ -78,7 +79,6 @@ struct GEQRF {
 };
 
 #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY
-//! Full specialization of geqrf for multi vectors.
 // Unification layer
 template <class ExecutionSpace, class AMatrix, class TWArray, class RType>
 struct GEQRF<ExecutionSpace, AMatrix, TWArray, RType, false,
@@ -114,7 +114,8 @@ struct GEQRF<ExecutionSpace, AMatrix, TWArray, RType, false,
       Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                            \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
-      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,           \
+      Kokkos::View<int*, LAYOUT_TYPE,                                     \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
       false, true>;
 
@@ -128,7 +129,8 @@ struct GEQRF<ExecutionSpace, AMatrix, TWArray, RType, false,
       Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                            \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
-      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,           \
+      Kokkos::View<int*, LAYOUT_TYPE,                                     \
+                   Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
       false, true>;
 
diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp
index 506d3c60b7..1d26747cd3 100644
--- a/lapack/src/KokkosLapack_geqrf.hpp
+++ b/lapack/src/KokkosLapack_geqrf.hpp
@@ -15,7 +15,7 @@
 //@HEADER
 
 /// \file KokkosLapack_geqrf.hpp
-/// \brief Local dense linear solve
+/// \brief QR factorization
 ///
 /// This file provides KokkosLapack::geqrf. This function performs a
 /// local (no MPI) QR factorization of a M-by-N matrix A.
@@ -118,31 +118,33 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
     }
   }
 
-  typedef Kokkos::View<
+  using RetArray = Kokkos::View<int*, typename TWArray::array_layout, typename TWArray::device_type>;
+  RetArray rc("rc", 1);
+
+  using AMatrix_Internal = Kokkos::View<
       typename AMatrix::non_const_value_type**, typename AMatrix::array_layout,
-      typename AMatrix::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
-      AMatrix_Internal;
-  typedef Kokkos::View<
+      typename AMatrix::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
+  using TWArray_Internal = Kokkos::View<
       typename TWArray::non_const_value_type*, typename TWArray::array_layout,
-      typename TWArray::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
-      TWArray_Internal;
-  AMatrix_Internal A_i    = A;
-  TWArray_Internal Tau_i  = Tau;
-  TWArray_Internal Work_i = Work;
-
-  // This is the return value type and should always reside on host
-  using RViewInternalType =
-      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
+      typename TWArray::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
+  using RetArray_Internal = Kokkos::View<
+      int*, typename TWArray::array_layout,
+      typename TWArray::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
 
-  int result;
-  RViewInternalType R = RViewInternalType(&result);
+  AMatrix_Internal  A_i    = A;
+  TWArray_Internal  Tau_i  = Tau;
+  TWArray_Internal  Work_i = Work;
+  RetArray_Internal rc_i   = rc;
 
   KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, TWArray_Internal,
-                            RViewInternalType>::geqrf(space, A_i, Tau_i, Work_i,
-                                                      R);
+                            RetArray_Internal>::geqrf(space, A_i, Tau_i, Work_i,
+                                                      rc_i);
+
+  typename RetArray_Internal::HostMirror h_rc = Kokkos::create_mirror_view(rc_i);
+
+  Kokkos::deep_copy(h_rc, rc_i);
 
-  return result;
+  return h_rc[0];
 }
 
 /// \brief Computes a QR factorization of a matrix A
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
index f291bbe2a8..cc6f1e78a4 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp
@@ -36,7 +36,7 @@ struct geqrf_tpl_spec_avail {
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>,   \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,            \
+      Kokkos::View<int*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>,      \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {             \
     enum : bool { value = true };                                          \
   };
@@ -95,7 +95,7 @@ namespace Impl {
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                 \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>,  \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                 \
-      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,              \
+      Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEMSPACE>,     \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {               \
     enum : bool { value = true };                                            \
   };
@@ -142,7 +142,7 @@ namespace Impl {
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                  \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEMSPACE>,    \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                  \
-      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,               \
+      Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEMSPACE>,       \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {                \
     enum : bool { value = true };                                             \
   };
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index 056eef24da..fe25ce19a0 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -54,21 +54,21 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau,
   using ALayout_t = typename AViewType::array_layout;
   static_assert(std::is_same_v<ALayout_t, Kokkos::LayoutLeft>,
                 "KokkosLapack - geqrf: A needs to have a Kokkos::LayoutLeft");
-  const int M     = A.extent_int(0);
-  const int N     = A.extent_int(1);
-  const int LDA   = A.stride(1);
-  const int LWORK = static_cast<int>(Work.extent(0));
+  const int m     = A.extent_int(0);
+  const int n     = A.extent_int(1);
+  const int lda   = A.stride(1);
+  const int lwork = static_cast<int>(Work.extent(0));
 
   if constexpr (Kokkos::ArithTraits<Scalar>::is_complex) {
     using MagType = typename Kokkos::ArithTraits<Scalar>::mag_type;
 
-    R() = HostLapack<std::complex<MagType>>::geqrf(
-        M, N, reinterpret_cast<std::complex<MagType>*>(A.data()), LDA,
+    R[0] = HostLapack<std::complex<MagType>>::geqrf(
+        m, n, reinterpret_cast<std::complex<MagType>*>(A.data()), lda,
         reinterpret_cast<std::complex<MagType>*>(Tau.data()),
-        reinterpret_cast<std::complex<MagType>*>(Work.data()), LWORK);
+        reinterpret_cast<std::complex<MagType>*>(Work.data()), lwork);
   } else {
-    R() = HostLapack<Scalar>::geqrf(M, N, A.data(), LDA, Tau.data(),
-                                    Work.data(), LWORK);
+    R[0] = HostLapack<Scalar>::geqrf(m, n, A.data(), lda, Tau.data(),
+                                    Work.data(), lwork);
   }
 }
 
@@ -80,7 +80,7 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau,
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,      \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
-      Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,                \
+      Kokkos::View<int*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,         \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
       true,                                                                    \
       geqrf_eti_spec_avail<                                                    \
@@ -89,7 +89,7 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau,
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
           Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,  \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-          Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,            \
+          Kokkos::View<int*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,     \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
     using AViewType =                                                          \
         Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,   \
@@ -97,7 +97,7 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau,
     using TWViewType =                                                         \
         Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,    \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-    using RType = Kokkos::View<int, Kokkos::LayoutRight, Kokkos::HostSpace,    \
+    using RType = Kokkos::View<int*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>, \
                                Kokkos::MemoryTraits<Kokkos::Unmanaged>>;       \
                                                                                \
     static void geqrf(const EXECSPACE& /* space */, const AViewType& A,        \
@@ -255,87 +255,68 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex<double>, Kokkos::LayoutLeft,
 namespace KokkosLapack {
 namespace Impl {
 
-template <class ExecutionSpace, class AViewType, class TWViewType>
-void cusolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
-                         const AViewType& A, const TWViewType& Tau) {
+template <class ExecutionSpace, class AViewType, class TWViewType, class RType>
+void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
+			  const TWViewType& /* Work */, const TWViewType& Tau,
+                          class RType& R) {
+
   using memory_space = typename AViewType::memory_space;
-  using Scalar       = typename TWViewType::non_const_value_type;
-  using ALayout_t    = typename AViewType::array_layout;
-  using BLayout_t    = typename TWViewType::array_layout;
+  using Scalar = typename AViewType::non_const_value_type;
 
+  using ALayout_t = typename AViewType::array_layout;
+  static_assert(std::is_same_v<ALayout_t, Kokkos::LayoutLeft>,
+                "KokkosLapack - cusolver geqrf: A needs to have a Kokkos::LayoutLeft");
   const int m   = A.extent_int(0);
   const int n   = A.extent_int(1);
-  const int lda = std::is_same_v<ALayout_t, Kokkos::LayoutRight> ? A.stride(0)
-                                                                 : A.stride(1);
-
-  (void)B;
-
-  const int nrhs = B.extent_int(1);
-  const int ldb  = std::is_same_v<BLayout_t, Kokkos::LayoutRight> ? B.stride(0)
-                                                                 : B.stride(1);
+  const int lda = A.stride(1);
   int lwork = 0;
-  Kokkos::View<int, memory_space> info("getrf info");
+
+  //Kokkos::View<int, memory_space> info("cusolver geqrf info");
 
   CudaLapackSingleton& s = CudaLapackSingleton::singleton();
   KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
       cusolverDnSetStream(s.handle, space.cuda_stream()));
   if constexpr (std::is_same_v<Scalar, float>) {
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
-        cusolverDnSgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork));
-    Kokkos::View<float*, memory_space> Workspace("getrf workspace", lwork);
-
-    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgetrf(s.handle, m, n, A.data(),
-                                                    lda, Workspace.data(),
-                                                    IPIV.data(), info.data()));
+        cusolverDnSgeqrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork));
+    Kokkos::View<float*, memory_space> Workspace("cusolver sgeqrf workspace", lwork);
 
-    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
-        cusolverDnSgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda,
-                         IPIV.data(), B.data(), ldb, info.data()));
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgeqrf(s.handle, m, n, A.data(),
+                                                    lda, Tau.data(),
+                                                    Workspace.data(), lwork, /*info*/R.data()));
   }
   if constexpr (std::is_same_v<Scalar, double>) {
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
-        cusolverDnDgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork));
-    Kokkos::View<double*, memory_space> Workspace("getrf workspace", lwork);
-
-    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgetrf(s.handle, m, n, A.data(),
-                                                    lda, Workspace.data(),
-                                                    IPIV.data(), info.data()));
+        cusolverDnDgeqrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork));
+    Kokkos::View<double*, memory_space> Workspace("cusolver dgeqrf workspace", lwork);
 
-    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
-        cusolverDnDgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda,
-                         IPIV.data(), B.data(), ldb, info.data()));
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgeqrf(s.handle, m, n, A.data(),
+                                                    lda, Tau.data(),
+                                                    Workspace.data(), lwork, /*info*/R.data()));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<float>>) {
-    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrf_bufferSize(
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf_bufferSize(
         s.handle, m, n, reinterpret_cast<cuComplex*>(A.data()), lda, &lwork));
-    Kokkos::View<cuComplex*, memory_space> Workspace("getrf workspace", lwork);
+    Kokkos::View<cuComplex*, memory_space> Workspace("cusolver cgeqrf workspace", lwork);
 
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
-        cusolverDnCgetrf(s.handle, m, n, reinterpret_cast<cuComplex*>(A.data()),
-                         lda, reinterpret_cast<cuComplex*>(Workspace.data()),
-                         IPIV.data(), info.data()));
-
-    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrs(
-        s.handle, CUBLAS_OP_N, m, nrhs, reinterpret_cast<cuComplex*>(A.data()),
-        lda, IPIV.data(), reinterpret_cast<cuComplex*>(B.data()), ldb,
-        info.data()));
+                         cusolverDnCgeqrf(s.handle, m, n, reinterpret_cast<cuComplex*>(A.data()), lda,
+                         reinterpret_cast<cuComplex*>(Tau.data()),
+                         reinterpret_cast<cuComplex*>(Workspace.data()),
+                         lwork, /*info*/R.data()));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<double>>) {
-    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrf_bufferSize(
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf_bufferSize(
         s.handle, m, n, reinterpret_cast<cuDoubleComplex*>(A.data()), lda,
         &lwork));
-    Kokkos::View<cuDoubleComplex*, memory_space> Workspace("getrf workspace",
+    Kokkos::View<cuDoubleComplex*, memory_space> Workspace("cusolver zgeqrf workspace",
                                                            lwork);
 
-    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrf(
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf(
         s.handle, m, n, reinterpret_cast<cuDoubleComplex*>(A.data()), lda,
-        reinterpret_cast<cuDoubleComplex*>(Workspace.data()), IPIV.data(),
-        info.data()));
-
-    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrs(
-        s.handle, CUBLAS_OP_N, m, nrhs,
-        reinterpret_cast<cuDoubleComplex*>(A.data()), lda, IPIV.data(),
-        reinterpret_cast<cuDoubleComplex*>(B.data()), ldb, info.data()));
+        reinterpret_cast<cuDoubleComplex*>(Tau.data()),
+        reinterpret_cast<cuDoubleComplex*>(Workspace.data()),
+        lwork, /*info*/R.data()));
   }
   KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL));
 }
@@ -348,6 +329,8 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,   \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,      \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
       true,                                                                    \
       geqrf_eti_spec_avail<                                                    \
           Kokkos::Cuda,                                                        \
@@ -355,6 +338,9 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
                        Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
           Kokkos::View<SCALAR*, LAYOUT,                                        \
+                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+          Kokkos::View<int*, LAYOUT,                                           \
                        Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
     using AViewType = Kokkos::View<SCALAR**, LAYOUT,                           \
@@ -362,15 +348,19 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
                                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>;   \
     using TWViewType =                                                         \
         Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>, \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+    using RType =                                                              \
+        Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
                                                                                \
     static void geqrf(const Kokkos::Cuda& space, const AViewType& A,           \
-                      const TWViewType& Tau, const TWViewType& Work) {         \
+                      const TWViewType& Tau, const TWViewType& Work,           \
+                      const RType& R) {                                        \
       Kokkos::Profiling::pushRegion(                                           \
           "KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR "]");                    \
       geqrf_print_specialization<AViewType, TWViewType, RType>();              \
                                                                                \
-      cusolverGeqrfWrapper(space, IPIV, A, B);                                 \
+      cusolverGeqrfWrapper(space, A, Tau, Work, R);                            \
       Kokkos::Profiling::popRegion();                                          \
     }                                                                          \
   };
@@ -420,7 +410,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
   const rocblas_int ldb = std::is_same_v<BLayout_t, Kokkos::LayoutRight>
                               ? B.stride(0)
                               : B.stride(1);
-  Kokkos::View<rocblas_int, ExecutionSpace> info("rocsolver info");
+  Kokkos::View<rocblas_int, ExecutionSpace> info("rocsolver geqrf info");
 
   KokkosBlas::Impl::RocBlasSingleton& s =
       KokkosBlas::Impl::RocBlasSingleton::singleton();
@@ -459,6 +449,8 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,    \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
+      Kokkos::View<int*, LAYOUT, MEM_SPACE,                                    \
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
       true,                                                                    \
       geqrf_eti_spec_avail<                                                    \
           Kokkos::HIP,                                                         \
@@ -467,21 +459,28 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
           Kokkos::View<SCALAR*, LAYOUT,                                        \
                        Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
+                       Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
+          Kokkos::View<int*, LAYOUT,                                           \
+                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
     using AViewType =                                                          \
         Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
     using TWViewType =                                                         \
         Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,  \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
+    using RType =                                                              \
+        Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,     \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
                                                                                \
     static void geqrf(const Kokkos::HIP& space, const AViewType& A,            \
-                      const TWViewType& Tau, const TWViewType& Work) {         \
+                      const TWViewType& Tau, const TWViewType& Work,           \
+                      const RType& R) {                                        \
       Kokkos::Profiling::pushRegion(                                           \
           "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]");                   \
       geqrf_print_specialization<AViewType, TWViewType, RType>();              \
                                                                                \
-      rocsolverGeqrfWrapper(space, IPIV, A, B);                                \
+      rocsolverGeqrfWrapper(space, A, Tau, Work, R);                           \
       Kokkos::Profiling::popRegion();                                          \
     }                                                                          \
   };

From ad08d09d701ab3f02a5e970eabd872464dd929a1 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Thu, 23 May 2024 02:41:11 -0600
Subject: [PATCH 14/27] Backup

---
 lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index fe25ce19a0..8841440a04 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -257,8 +257,8 @@ namespace Impl {
 
 template <class ExecutionSpace, class AViewType, class TWViewType, class RType>
 void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
-			  const TWViewType& /* Work */, const TWViewType& Tau,
-                          class RType& R) {
+			  const TWViewType& Tau, const TWViewType& /* Work */,
+                          const RType& R) {
 
   using memory_space = typename AViewType::memory_space;
   using Scalar = typename AViewType::non_const_value_type;
@@ -271,7 +271,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
   const int lda = A.stride(1);
   int lwork = 0;
 
-  //Kokkos::View<int, memory_space> info("cusolver geqrf info");
+  //Kokkos::View<int, memory_space> info("cusolver geqrf info"); // AquiEEP
 
   CudaLapackSingleton& s = CudaLapackSingleton::singleton();
   KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
@@ -283,7 +283,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
 
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgeqrf(s.handle, m, n, A.data(),
                                                     lda, Tau.data(),
-                                                    Workspace.data(), lwork, /*info*/R.data()));
+                                                    Workspace.data(), lwork, R.data()));
   }
   if constexpr (std::is_same_v<Scalar, double>) {
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
@@ -292,7 +292,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
 
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgeqrf(s.handle, m, n, A.data(),
                                                     lda, Tau.data(),
-                                                    Workspace.data(), lwork, /*info*/R.data()));
+                                                    Workspace.data(), lwork, R.data()));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<float>>) {
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf_bufferSize(
@@ -303,7 +303,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
                          cusolverDnCgeqrf(s.handle, m, n, reinterpret_cast<cuComplex*>(A.data()), lda,
                          reinterpret_cast<cuComplex*>(Tau.data()),
                          reinterpret_cast<cuComplex*>(Workspace.data()),
-                         lwork, /*info*/R.data()));
+                         lwork, R.data()));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<double>>) {
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf_bufferSize(
@@ -316,9 +316,11 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
         s.handle, m, n, reinterpret_cast<cuDoubleComplex*>(A.data()), lda,
         reinterpret_cast<cuDoubleComplex*>(Tau.data()),
         reinterpret_cast<cuDoubleComplex*>(Workspace.data()),
-        lwork, /*info*/R.data()));
+        lwork, R.data()));
   }
   KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL));
+
+  //Kokkos::deep_copy(R, info); // AquiEEP
 }
 
 #define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE)                 \

From 399a18f0258c48e3785e7e946dfba879dd7302d0 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Fri, 24 May 2024 01:06:02 -0600
Subject: [PATCH 15/27] Backup

---
 lapack/src/KokkosLapack_geqrf.hpp             | 135 ++++++++----------
 .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 131 +++++++++--------
 lapack/unit_test/Test_Lapack_geqrf.hpp        |  47 +++---
 3 files changed, 149 insertions(+), 164 deletions(-)

diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp
index 1d26747cd3..7a81818d79 100644
--- a/lapack/src/KokkosLapack_geqrf.hpp
+++ b/lapack/src/KokkosLapack_geqrf.hpp
@@ -34,7 +34,8 @@ namespace KokkosLapack {
 ///
 /// \tparam ExecutionSpace The space where the kernel will run.
 /// \tparam AMatrix        Type of matrix A, as a 2-D Kokkos::View.
-/// \tparam TWArray        Type of arrays Tau and Work, as a 1-D Kokkos::View.
+/// \tparam TArray         Type of array Tau, as a 1-D Kokkos::View.
+/// \tparam InfoArray      Type of array Info, as a 1-D Kokkos::View.
 ///
 /// \param space [in] Execution space instance used to specified how to execute
 ///                   the geqrf kernels.
@@ -51,21 +52,15 @@ namespace KokkosLapack {
 ///                   where tau is a complex scalar, and v is a complex vector
 ///                   with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on
 ///                   exit in A(i+1:M,i), and tau in Tau(i).
-/// \param Tau [out]  One-dimensional array of size min(M,N) that contains
-///                   the scalar factors of the elementary reflectors.
-/// \param Work [out] One-dimensional array of size max(1,LWORK).
-///                   If min(M,N) == 0, then LWORK must be >= 1.
-///                   If min(M,N) != 0, then LWORK must be >= N.
-///                   If the QR factorization is successful, then the first
-///                   position of Work contains the optimal LWORK.
+/// \param Tau [out]  One-dimensional array of size min(M,N) that contains the
+///                   scalar factors of the elementary reflectors.
+/// \param Info [out] One-dimensional array of integers and of size 1:
+///                   Info[0] = 0: successfull exit
+///                   Info[0] < 0: if equal to '-i', the i-th argument had an
+///                                illegal value
 ///
-/// \return           = 0: successfull exit
-///                   < 0: if equal to '-i', the i-th argument had an illegal
-///                        value
-///
-template <class ExecutionSpace, class AMatrix, class TWArray>
-int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
-          const TWArray& Work) {
+template <class ExecutionSpace, class AMatrix, class TArray, class InfoArray>
+void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, const InfoArray& Info) {
   // NOTE: Currently, KokkosLapack::geqrf only supports LAPACK, MAGMA and
   // rocSOLVER TPLs.
   //       MAGMA/rocSOLVER TPL should be enabled to call the MAGMA/rocSOLVER GPU
@@ -77,21 +72,32 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
                                  typename AMatrix::memory_space>::accessible);
   static_assert(
       Kokkos::SpaceAccessibility<ExecutionSpace,
-                                 typename TWArray::memory_space>::accessible);
+                                 typename TArray::memory_space>::accessible);
+  static_assert(
+      Kokkos::SpaceAccessibility<ExecutionSpace,
+                                 typename InfoArray::memory_space>::accessible);
 
   static_assert(Kokkos::is_view<AMatrix>::value,
                 "KokkosLapack::geqrf: A must be a Kokkos::View.");
-  static_assert(Kokkos::is_view<TWArray>::value,
-                "KokkosLapack::geqrf: Tau and Work must be Kokkos::View.");
+  static_assert(Kokkos::is_view<TArray>::value,
+                "KokkosLapack::geqrf: Tau must be Kokkos::View.");
+  static_assert(Kokkos::is_view<InfoArray>::value,
+                "KokkosLapack::geqrf: Info must be Kokkos::View.");
+
   static_assert(static_cast<int>(AMatrix::rank) == 2,
                 "KokkosLapack::geqrf: A must have rank 2.");
-  static_assert(static_cast<int>(TWArray::rank) == 1,
-                "KokkosLapack::geqrf: Tau and Work must have rank 1.");
+  static_assert(static_cast<int>(TArray::rank) == 1,
+                "KokkosLapack::geqrf: Tau must have rank 1.");
+  static_assert(static_cast<int>(InfoArray::rank) == 1,
+                "KokkosLapack::geqrf: Info must have rank 1.");
+
+  static_assert(std::is_same_v<typename InfoArray::non_const_value_type, int>,
+                "KokkosLapack::geqrf: Info must be an array of integers.");
 
   int64_t m     = A.extent(0);
   int64_t n     = A.extent(1);
   int64_t tau0  = Tau.extent(0);
-  int64_t work0 = Work.extent(0);
+  int64_t info0 = Info.extent(0);
 
   // Check validity of dimensions
   if (tau0 != std::min(m, n)) {
@@ -100,57 +106,37 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
        << " A: " << m << " x " << n << ", Tau length = " << tau0;
     KokkosKernels::Impl::throw_runtime_exception(os.str());
   }
-  if ((m == 0) || (n == 0)) {
-    if (work0 < 1) {
-      std::ostringstream os;
-      os << "KokkosLapack::geqrf: In case min(m,n) == 0, then Work must have "
-            "length >= 1: "
-         << " A: " << m << " x " << n << ", Work length = " << work0;
-      KokkosKernels::Impl::throw_runtime_exception(os.str());
-    }
-  } else {
-    if (work0 < n) {
-      std::ostringstream os;
-      os << "KokkosLapack::geqrf: In case min(m,n) != 0, then Work must have "
-            "length >= n: "
-         << " A: " << m << " x " << n << ", Work length = " << work0;
-      KokkosKernels::Impl::throw_runtime_exception(os.str());
-    }
-  }
 
-  using RetArray = Kokkos::View<int*, typename TWArray::array_layout, typename TWArray::device_type>;
-  RetArray rc("rc", 1);
+  if (info0 == 0) {
+    std::ostringstream os;
+    os << "KokkosLapack::geqrf: length of Info must be at least 1: "
+       << " A: " << m << " x " << n << ", Info length = " << info0;
+    KokkosKernels::Impl::throw_runtime_exception(os.str());
+  }
 
   using AMatrix_Internal = Kokkos::View<
       typename AMatrix::non_const_value_type**, typename AMatrix::array_layout,
       typename AMatrix::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
-  using TWArray_Internal = Kokkos::View<
-      typename TWArray::non_const_value_type*, typename TWArray::array_layout,
-      typename TWArray::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
-  using RetArray_Internal = Kokkos::View<
-      int*, typename TWArray::array_layout,
-      typename TWArray::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
-
-  AMatrix_Internal  A_i    = A;
-  TWArray_Internal  Tau_i  = Tau;
-  TWArray_Internal  Work_i = Work;
-  RetArray_Internal rc_i   = rc;
-
-  KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, TWArray_Internal,
-                            RetArray_Internal>::geqrf(space, A_i, Tau_i, Work_i,
-                                                      rc_i);
-
-  typename RetArray_Internal::HostMirror h_rc = Kokkos::create_mirror_view(rc_i);
-
-  Kokkos::deep_copy(h_rc, rc_i);
-
-  return h_rc[0];
+  using TArray_Internal = Kokkos::View<
+      typename TArray::non_const_value_type*, typename TArray::array_layout,
+      typename TArray::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
+  using InfoArray_Internal = Kokkos::View<
+      typename InfoArray::non_const_value_type*, typename InfoArray::array_layout,
+      typename InfoArray::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
+
+  AMatrix_Internal   A_i    = A;
+  TArray_Internal    Tau_i  = Tau;
+  InfoArray_Internal Info_i = Info;
+
+  KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, TArray_Internal,
+                            InfoArray_Internal>::geqrf(space, A_i, Tau_i, Info_i);
 }
 
 /// \brief Computes a QR factorization of a matrix A
 ///
-/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View.
-/// \tparam TWArray Type of arrays Tau and Work, as a 1-D Kokkos::View.
+/// \tparam AMatrix   Type of matrix A, as a 2-D Kokkos::View.
+/// \tparam TArray    Type of array Tau, as a 1-D Kokkos::View.
+/// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View.
 ///
 /// \param A [in,out] On entry, the M-by-N matrix to be factorized.
 ///                   On exit, the elements on and above the diagonal contain
@@ -165,22 +151,17 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
 ///                   where tau is a complex scalar, and v is a complex vector
 ///                   with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on
 ///                   exit in A(i+1:M,i), and tau in Tau(i).
-/// \param Tau [out]  One-dimensional array of size min(M,N) that contains
-///                   the scalar factors of the elementary reflectors.
-/// \param Work [out] One-dimensional array of size max(1,LWORK).
-///                   If min(M,N) == 0, then LWORK must be >= 1.
-///                   If min(M,N) != 0, then LWORK must be >= N.
-///                   If the QR factorization is successful, then the first
-///                   position of Work contains the optimal LWORK.
-///
-/// \return           = 0: successfull exit
-///                   < 0: if equal to '-i', the i-th argument had an illegal
-///                        value
+/// \param Tau [out]  One-dimensional array of size min(M,N) that contains the
+///                   scalar factors of the elementary reflectors.
+/// \param Info [out] One-dimensional array of integers and of size 1:
+///                   Info[0] = 0: successfull exit
+///                   Info[0] < 0: if equal to '-i', the i-th argument had an
+///                                illegal value
 ///
-template <class AMatrix, class TWArray>
-int geqrf(const AMatrix& A, const TWArray& Tau, const TWArray& Work) {
+template <class AMatrix, class TArray, class InfoArray>
+void geqrf(const AMatrix& A, const TArray& Tau, const InfoArray& Info) {
   typename AMatrix::execution_space space{};
-  return geqrf(space, A, Tau, Work);
+  geqrf(space, A, Tau, Info);
 }
 
 }  // namespace KokkosLapack
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index 8841440a04..b10edbdac6 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -19,19 +19,19 @@
 
 namespace KokkosLapack {
 namespace Impl {
-template <class AViewType, class TWViewType, class RType>
+template <class AViewType, class TauViewType, class InfoViewType>
 inline void geqrf_print_specialization() {
 #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA
   printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s, %s >\n",
-         typeid(AViewType).name(), typeid(TWViewType).name(),
-         typeid(RType).name());
+         typeid(AViewType).name(), typeid(TauViewType).name(),
+         typeid(InfoViewType).name());
 #else
 #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK
   printf(
       "KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n",
-      typeid(AViewType).name(), typeid(TWViewType).name(),
-      typeid(RType).name());
+      typeid(AViewType).name(), typeid(TauViewType).name(),
+      typeid(InfoViewType).name());
 #endif
 #endif
 #endif
@@ -46,29 +46,51 @@ inline void geqrf_print_specialization() {
 namespace KokkosLapack {
 namespace Impl {
 
-template <class AViewType, class TWViewType, class RType>
-void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau,
-                        const TWViewType& Work, const RType& R) {
-  using Scalar = typename AViewType::non_const_value_type;
-
-  using ALayout_t = typename AViewType::array_layout;
+template <class AViewType, class TauViewType, class InfoViewType>
+void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau,
+                        const InfoViewType& Info) {
+  using memory_space = typename AViewType::memory_space;
+  using Scalar       = typename AViewType::non_const_value_type;
+  using ALayout_t    = typename AViewType::array_layout;
   static_assert(std::is_same_v<ALayout_t, Kokkos::LayoutLeft>,
                 "KokkosLapack - geqrf: A needs to have a Kokkos::LayoutLeft");
-  const int m     = A.extent_int(0);
-  const int n     = A.extent_int(1);
-  const int lda   = A.stride(1);
-  const int lwork = static_cast<int>(Work.extent(0));
+  const int m   = A.extent_int(0);
+  const int n   = A.extent_int(1);
+  const int lda = A.stride(1);
+
+  int lwork = -1;
+  Kokkos::View<Scalar*, memory_space> work("geqrf work buffer", 1);
 
   if constexpr (Kokkos::ArithTraits<Scalar>::is_complex) {
     using MagType = typename Kokkos::ArithTraits<Scalar>::mag_type;
 
-    R[0] = HostLapack<std::complex<MagType>>::geqrf(
+    Info[0] = HostLapack<std::complex<MagType>>::geqrf(
         m, n, reinterpret_cast<std::complex<MagType>*>(A.data()), lda,
         reinterpret_cast<std::complex<MagType>*>(Tau.data()),
-        reinterpret_cast<std::complex<MagType>*>(Work.data()), lwork);
+        reinterpret_cast<std::complex<MagType>*>(work.data()), lwork);
+
+    if (Info[0] < 0) return;
+    
+    lwork = static_cast<int>(work(0).real());
+
+    work = Kokkos::View<Scalar*, memory_space>("geqrf work buffer", lwork);
+
+    Info[0] = HostLapack<std::complex<MagType>>::geqrf(
+        m, n, reinterpret_cast<std::complex<MagType>*>(A.data()), lda,
+        reinterpret_cast<std::complex<MagType>*>(Tau.data()),
+        reinterpret_cast<std::complex<MagType>*>(work.data()), lwork);
   } else {
-    R[0] = HostLapack<Scalar>::geqrf(m, n, A.data(), lda, Tau.data(),
-                                    Work.data(), lwork);
+    Info[0] = HostLapack<Scalar>::geqrf(m, n, A.data(), lda, Tau.data(),
+                                    work.data(), lwork);
+
+    if (Info[0] < 0) return;
+
+    lwork = static_cast<int>(work(0));
+
+    work = Kokkos::View<Scalar*, memory_space>("geqrf work buffer", lwork);
+
+    Info[0] = HostLapack<Scalar>::geqrf(m, n, A.data(), lda, Tau.data(),
+                                    work.data(), lwork);
   }
 }
 
@@ -94,19 +116,18 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau,
     using AViewType =                                                          \
         Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,   \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-    using TWViewType =                                                         \
+    using TauViewType =                                                        \
         Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,    \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-    using RType = Kokkos::View<int*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>, \
+    using InfoViewType = Kokkos::View<int*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>, \
                                Kokkos::MemoryTraits<Kokkos::Unmanaged>>;       \
                                                                                \
     static void geqrf(const EXECSPACE& /* space */, const AViewType& A,        \
-                      const TWViewType& Tau, const TWViewType& Work,           \
-                      const RType& R) {                                        \
+                      const TauViewType& Tau, const InfoViewType& Info) {      \
       Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR  \
                                     "]");                                      \
-      geqrf_print_specialization<AViewType, TWViewType, RType>();              \
-      lapackGeqrfWrapper(A, Tau, Work, R);                                     \
+      geqrf_print_specialization<AViewType, TauViewType, InfoViewType>();      \
+      lapackGeqrfWrapper(A, Tau, Info);                                        \
       Kokkos::Profiling::popRegion();                                          \
     }                                                                          \
   };
@@ -157,14 +178,14 @@ KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<double>, Kokkos::LayoutLeft,
 namespace KokkosLapack {
 namespace Impl {
 
-template <class ExecSpace, class AViewType, class TWViewType>
+template <class ExecSpace, class AViewType, class TauViewType>
 void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A,
-                      const TWViewType& Tau, const TWViewType& Work) {
+                      const TauViewType& Tau) {
   using scalar_type = typename AViewType::non_const_value_type;
 
   Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_MAGMA," +
                                 Kokkos::ArithTraits<scalar_type>::name() + "]");
-  geqrf_print_specialization<AViewType, TWViewType, RType>();
+  geqrf_print_specialization<AViewType, TauViewType, InfoViewType>();
 
   magma_int_t N    = static_cast<magma_int_t>(A.extent(1));
   magma_int_t AST  = static_cast<magma_int_t>(A.stride(1));
@@ -225,13 +246,13 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A,
     using AViewType = Kokkos::View<SCALAR**, LAYOUT,                           \
                                    Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
                                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>;   \
-    using TWViewType =                                                         \
+    using TauViewType =                                                        \
         Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>, \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
                                                                                \
     static void geqrf(const Kokkos::Cuda& space, const AViewType& A,           \
-                      const TWViewType& Tau, const TWViewType& Work) {         \
-      magmaGeqrfWrapper(space, A, Tau, Work);                                  \
+                      const TauViewType& Tau) {                                \
+      magmaGeqrfWrapper(space, A, Tau);                                        \
     }                                                                          \
   };
 
@@ -255,10 +276,9 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex<double>, Kokkos::LayoutLeft,
 namespace KokkosLapack {
 namespace Impl {
 
-template <class ExecutionSpace, class AViewType, class TWViewType, class RType>
+template <class ExecutionSpace, class AViewType, class TauViewType, class InfoViewType>
 void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
-			  const TWViewType& Tau, const TWViewType& /* Work */,
-                          const RType& R) {
+			  const TauViewType& Tau, const InfoViewType& Info) {
 
   using memory_space = typename AViewType::memory_space;
   using Scalar = typename AViewType::non_const_value_type;
@@ -271,8 +291,6 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
   const int lda = A.stride(1);
   int lwork = 0;
 
-  //Kokkos::View<int, memory_space> info("cusolver geqrf info"); // AquiEEP
-
   CudaLapackSingleton& s = CudaLapackSingleton::singleton();
   KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
       cusolverDnSetStream(s.handle, space.cuda_stream()));
@@ -283,7 +301,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
 
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgeqrf(s.handle, m, n, A.data(),
                                                     lda, Tau.data(),
-                                                    Workspace.data(), lwork, R.data()));
+                                                    Workspace.data(), lwork, Info.data()));
   }
   if constexpr (std::is_same_v<Scalar, double>) {
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
@@ -292,7 +310,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
 
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgeqrf(s.handle, m, n, A.data(),
                                                     lda, Tau.data(),
-                                                    Workspace.data(), lwork, R.data()));
+                                                    Workspace.data(), lwork, Info.data()));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<float>>) {
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf_bufferSize(
@@ -303,7 +321,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
                          cusolverDnCgeqrf(s.handle, m, n, reinterpret_cast<cuComplex*>(A.data()), lda,
                          reinterpret_cast<cuComplex*>(Tau.data()),
                          reinterpret_cast<cuComplex*>(Workspace.data()),
-                         lwork, R.data()));
+                         lwork, Info.data()));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<double>>) {
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf_bufferSize(
@@ -316,11 +334,9 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
         s.handle, m, n, reinterpret_cast<cuDoubleComplex*>(A.data()), lda,
         reinterpret_cast<cuDoubleComplex*>(Tau.data()),
         reinterpret_cast<cuDoubleComplex*>(Workspace.data()),
-        lwork, R.data()));
+        lwork, Info.data()));
   }
   KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL));
-
-  //Kokkos::deep_copy(R, info); // AquiEEP
 }
 
 #define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE)                 \
@@ -348,21 +364,20 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
     using AViewType = Kokkos::View<SCALAR**, LAYOUT,                           \
                                    Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
                                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>;   \
-    using TWViewType =                                                         \
+    using TauViewType =                                                        \
         Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>, \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-    using RType =                                                              \
+    using InfoViewType =                                                       \
         Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
                                                                                \
     static void geqrf(const Kokkos::Cuda& space, const AViewType& A,           \
-                      const TWViewType& Tau, const TWViewType& Work,           \
-                      const RType& R) {                                        \
+                      const TauViewType& Tau, const InfoViewType& Info) {      \
       Kokkos::Profiling::pushRegion(                                           \
           "KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR "]");                    \
-      geqrf_print_specialization<AViewType, TWViewType, RType>();              \
+      geqrf_print_specialization<AViewType, TauViewType, InfoViewType>();      \
                                                                                \
-      cusolverGeqrfWrapper(space, A, Tau, Work, R);                            \
+      cusolverGeqrfWrapper(space, A, Tau, Info);                               \
       Kokkos::Profiling::popRegion();                                          \
     }                                                                          \
   };
@@ -397,12 +412,11 @@ KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
 namespace KokkosLapack {
 namespace Impl {
 
-template <class ExecutionSpace, class AViewType, class TWViewType>
-void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
-                          const AViewType& A, const TWViewType& Tau) {
-  using Scalar    = typename TWViewType::non_const_value_type;
+template <class ExecutionSpace, class AViewType, class TauViewType>
+void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const TauViewType& Tau) {
+  using Scalar    = typename TauViewType::non_const_value_type;
   using ALayout_t = typename AViewType::array_layout;
-  using BLayout_t = typename TWViewType::array_layout;
+  using BLayout_t = typename TauViewType::array_layout;
 
   const rocblas_int N    = static_cast<rocblas_int>(A.extent(0));
   const rocblas_int nrhs = static_cast<rocblas_int>(B.extent(1));
@@ -468,21 +482,20 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work,
     using AViewType =                                                          \
         Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-    using TWViewType =                                                         \
+    using TauViewType =                                                        \
         Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,  \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-    using RType =                                                              \
+    using InfoViewType =                                                       \
         Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,     \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
                                                                                \
     static void geqrf(const Kokkos::HIP& space, const AViewType& A,            \
-                      const TWViewType& Tau, const TWViewType& Work,           \
-                      const RType& R) {                                        \
+                      const TauViewType& Tau, const InfoViewType& Info) {      \
       Kokkos::Profiling::pushRegion(                                           \
           "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]");                   \
-      geqrf_print_specialization<AViewType, TWViewType, RType>();              \
+      geqrf_print_specialization<AViewType, TauViewType, InfoViewType>();      \
                                                                                \
-      rocsolverGeqrfWrapper(space, A, Tau, Work, R);                           \
+      rocsolverGeqrfWrapper(space, A, Tau, Info);		               \
       Kokkos::Profiling::popRegion();                                          \
     }                                                                          \
   };
diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
index de662365ac..3aedb0b984 100644
--- a/lapack/unit_test/Test_Lapack_geqrf.hpp
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -37,13 +37,11 @@
 
 namespace Test {
 
-template <class ViewTypeA, class ViewTypeTW>
+template <class ViewTypeA, class ViewTypeTau>
 void getQR(int const m, int const n,
            typename ViewTypeA::HostMirror const&  // h_A
            ,
-           typename ViewTypeTW::HostMirror const&  // h_tau
-           ,
-           typename ViewTypeTW::HostMirror const&  // h_work
+           typename ViewTypeTau::HostMirror const&  // h_tau
            ,
            typename ViewTypeA::HostMirror&  // h_Q
            ,
@@ -85,8 +83,9 @@ void getQR(int const m, int const n,
   }
 }
 
-template <class ViewTypeA, class ViewTypeTW, class Device>
+template <class ViewTypeA, class ViewTypeTau, class Device>
 void impl_test_geqrf(int m, int n) {
+  using ViewTypeInfo = Kokkos::View<int*, Kokkos::LayoutLeft, Device>;
   using execution_space = typename Device::execution_space;
   using ScalarA         = typename ViewTypeA::value_type;
   // using ats             = Kokkos::ArithTraits<ScalarA>;
@@ -96,21 +95,17 @@ void impl_test_geqrf(int m, int n) {
   Kokkos::Random_XorShift64_Pool<execution_space> rand_pool(13718);
 
   int minMN(std::min(m, n));
-  int lwork(1);
-  if (minMN != 0) {
-    lwork = n;
-  }
 
   // Create device views
-  ViewTypeA A("A", m, n);
-  ViewTypeTW Tau("Tau", minMN);
-  ViewTypeTW Work("Work", lwork);
+  ViewTypeA    A   ("A", m, n);
+  ViewTypeTau  Tau ("Tau", minMN);
+  ViewTypeInfo Info("Info", 1);
 
   // Create host mirrors of device views.
-  typename ViewTypeA::HostMirror h_A     = Kokkos::create_mirror_view(A);
-  typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(A);
-  typename ViewTypeTW::HostMirror h_tau  = Kokkos::create_mirror_view(Tau);
-  typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror_view(Work);
+  typename ViewTypeA::HostMirror    h_A     = Kokkos::create_mirror_view(A);
+  typename ViewTypeA::HostMirror    h_Aorig = Kokkos::create_mirror_view(A);
+  typename ViewTypeTau::HostMirror  h_tau   = Kokkos::create_mirror_view(Tau);
+  typename ViewTypeInfo::HostMirror h_info  = Kokkos::create_mirror_view(Info);
 
   // Initialize data.
   if ((m == 3) && (n == 3)) {
@@ -167,9 +162,8 @@ void impl_test_geqrf(int m, int n) {
   Kokkos::fence();
 
   // Perform the QR factorization
-  int rc(0);
   try {
-    rc = KokkosLapack::geqrf(space, A, Tau, Work);
+    KokkosLapack::geqrf(space, A, Tau, Info);
   } catch (const std::runtime_error& e) {
     std::cout << "KokkosLapack::geqrf(): caught exception '" << e.what() << "'"
               << std::endl;
@@ -179,15 +173,15 @@ void impl_test_geqrf(int m, int n) {
 
   Kokkos::fence();
 
-  EXPECT_EQ(rc, 0) << "Failed geqrf() test: rc = " << rc;
+  Kokkos::deep_copy(h_info, Info);
+  EXPECT_EQ(h_info[0], 0) << "Failed geqrf() test: Info[0] = " << h_info[0];
 
   // Get the results
   Kokkos::deep_copy(h_A, A);
   Kokkos::deep_copy(h_tau, Tau);
-  Kokkos::deep_copy(h_work, Work);
 
 #if 1  // def HAVE_KOKKOSKERNELS_DEBUG
-  std::cout << "rc = " << rc << std::endl;
+  std::cout << "info[0] = " << h_info[0] << std::endl;
   for (int i(0); i < minMN; ++i) {
     for (int j(0); j < n; ++j) {
       std::cout << "R(" << i << "," << j << ") = " << h_A(i, j) << std::endl;
@@ -196,9 +190,6 @@ void impl_test_geqrf(int m, int n) {
   for (int i(0); i < minMN; ++i) {
     std::cout << "tau(" << i << ") = " << h_tau[i] << std::endl;
   }
-  for (int i(0); i < lwork; ++i) {
-    std::cout << "work(" << i << ") = " << h_work[i] << std::endl;
-  }
 #endif
 
   ViewTypeA Q("Q", m, m);
@@ -209,7 +200,7 @@ void impl_test_geqrf(int m, int n) {
   typename ViewTypeA::HostMirror h_R  = Kokkos::create_mirror_view(R);
   typename ViewTypeA::HostMirror h_QR = Kokkos::create_mirror_view(QR);
 
-  getQR<ViewTypeA, ViewTypeTW>(m, n, h_A, h_tau, h_work, h_Q, h_R, h_QR);
+  getQR<ViewTypeA, ViewTypeTau>(m, n, h_A, h_tau, h_Q, h_R, h_QR);
 
 #if 1  // def HAVE_KOKKOSKERNELS_DEBUG
   for (int i(0); i < m; ++i) {
@@ -279,10 +270,10 @@ void test_geqrf() {
 #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \
     (!defined(KOKKOSKERNELS_ETI_ONLY) &&      \
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
-  using view_type_a_ll  = Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device>;
-  using view_type_tw_ll = Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device>;
+  using view_type_a_ll   = Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device>;
+  using view_type_tau_ll = Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device>;
 
-  Test::impl_test_geqrf<view_type_a_ll, view_type_tw_ll, Device>(3, 3);
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(3, 3);
 #endif
 }
 

From ec1115902ec59c6d3cbb900c3a940fc6c44cd85b Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Fri, 24 May 2024 01:32:20 -0600
Subject: [PATCH 16/27] Backup

---
 lapack/tpls/KokkosLapack_Host_tpl.cpp         | 32 +++++++------------
 lapack/tpls/KokkosLapack_Host_tpl.hpp         |  3 +-
 .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 18 ++++++-----
 3 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp
index 9d751f75b6..17bc2915a4 100644
--- a/lapack/tpls/KokkosLapack_Host_tpl.cpp
+++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp
@@ -148,11 +148,9 @@ int HostLapack<float>::trtri(const char uplo, const char diag, int n,
   return info;
 }
 template <>
-int HostLapack<float>::geqrf(int m, int n, float* a, int lda, float* tau,
-                             float* work, int lwork) {
-  int info = 0;
-  F77_FUNC_SGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info);
-  return info;
+void HostLapack<float>::geqrf(int m, int n, float* a, int lda, float* tau,
+			      float* work, int lwork, int *info) {
+  F77_FUNC_SGEQRF(&m, &n, a, &lda, tau, work, &lwork, info);
 }
 
 ///
@@ -181,11 +179,9 @@ int HostLapack<double>::trtri(const char uplo, const char diag, int n,
   return info;
 }
 template <>
-int HostLapack<double>::geqrf(int m, int n, double* a, int lda, double* tau,
-                              double* work, int lwork) {
-  int info = 0;
-  F77_FUNC_DGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info);
-  return info;
+void HostLapack<double>::geqrf(int m, int n, double* a, int lda, double* tau,
+                              double* work, int lwork, int *info) {
+  F77_FUNC_DGEQRF(&m, &n, a, &lda, tau, work, &lwork, info);
 }
 
 ///
@@ -217,13 +213,11 @@ int HostLapack<std::complex<float>>::trtri(const char uplo, const char diag,
   return info;
 }
 template <>
-int HostLapack<std::complex<float>>::geqrf(int m, int n, std::complex<float>* a,
+void HostLapack<std::complex<float>>::geqrf(int m, int n, std::complex<float>* a,
                                            int lda, std::complex<float>* tau,
                                            std::complex<float>* work,
-                                           int lwork) {
-  int info = 0;
-  F77_FUNC_CGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info);
-  return info;
+                                           int lwork, int *info) {
+  F77_FUNC_CGEQRF(&m, &n, a, &lda, tau, work, &lwork, info);
 }
 
 ///
@@ -256,14 +250,12 @@ int HostLapack<std::complex<double>>::trtri(const char uplo, const char diag,
   return info;
 }
 template <>
-int HostLapack<std::complex<double>>::geqrf(int m, int n,
+void HostLapack<std::complex<double>>::geqrf(int m, int n,
                                             std::complex<double>* a, int lda,
                                             std::complex<double>* tau,
                                             std::complex<double>* work,
-                                            int lwork) {
-  int info = 0;
-  F77_FUNC_ZGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info);
-  return info;
+                                            int lwork, int *info) {
+  F77_FUNC_ZGEQRF(&m, &n, a, &lda, tau, work, &lwork, info);
 }
 
 }  // namespace Impl
diff --git a/lapack/tpls/KokkosLapack_Host_tpl.hpp b/lapack/tpls/KokkosLapack_Host_tpl.hpp
index d651c9ca52..8797d2006c 100644
--- a/lapack/tpls/KokkosLapack_Host_tpl.hpp
+++ b/lapack/tpls/KokkosLapack_Host_tpl.hpp
@@ -42,7 +42,8 @@ struct HostLapack {
   static int trtri(const char uplo, const char diag, int n, const T *a,
                    int lda);
 
-  static int geqrf(int m, int n, T *a, int lda, T *tau, T *work, int lwork);
+  static void geqrf(int m, int n, T *a, int lda, T *tau, T *work, int lwork,
+		    int *info);
 };
 }  // namespace Impl
 }  // namespace KokkosLapack
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index b10edbdac6..415dfca32c 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -64,10 +64,11 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau,
   if constexpr (Kokkos::ArithTraits<Scalar>::is_complex) {
     using MagType = typename Kokkos::ArithTraits<Scalar>::mag_type;
 
-    Info[0] = HostLapack<std::complex<MagType>>::geqrf(
+    HostLapack<std::complex<MagType>>::geqrf(
         m, n, reinterpret_cast<std::complex<MagType>*>(A.data()), lda,
         reinterpret_cast<std::complex<MagType>*>(Tau.data()),
-        reinterpret_cast<std::complex<MagType>*>(work.data()), lwork);
+        reinterpret_cast<std::complex<MagType>*>(work.data()), lwork,
+	Info.data());
 
     if (Info[0] < 0) return;
     
@@ -75,13 +76,14 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau,
 
     work = Kokkos::View<Scalar*, memory_space>("geqrf work buffer", lwork);
 
-    Info[0] = HostLapack<std::complex<MagType>>::geqrf(
+    HostLapack<std::complex<MagType>>::geqrf(
         m, n, reinterpret_cast<std::complex<MagType>*>(A.data()), lda,
         reinterpret_cast<std::complex<MagType>*>(Tau.data()),
-        reinterpret_cast<std::complex<MagType>*>(work.data()), lwork);
+        reinterpret_cast<std::complex<MagType>*>(work.data()), lwork,
+	Info.data());
   } else {
-    Info[0] = HostLapack<Scalar>::geqrf(m, n, A.data(), lda, Tau.data(),
-                                    work.data(), lwork);
+    HostLapack<Scalar>::geqrf(m, n, A.data(), lda, Tau.data(),
+                                    work.data(), lwork,	Info.data());
 
     if (Info[0] < 0) return;
 
@@ -89,8 +91,8 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau,
 
     work = Kokkos::View<Scalar*, memory_space>("geqrf work buffer", lwork);
 
-    Info[0] = HostLapack<Scalar>::geqrf(m, n, A.data(), lda, Tau.data(),
-                                    work.data(), lwork);
+    HostLapack<Scalar>::geqrf(m, n, A.data(), lda, Tau.data(), work.data(),
+			      lwork, Info.data());
   }
 }
 

From 2c03206cd04186502888c7027afe6cd2d6e0de9c Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Fri, 24 May 2024 03:08:11 -0600
Subject: [PATCH 17/27] Backup

---
 .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 63 ++++++++-----------
 1 file changed, 26 insertions(+), 37 deletions(-)

diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index 415dfca32c..db28ab541d 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -182,7 +182,7 @@ namespace Impl {
 
 template <class ExecSpace, class AViewType, class TauViewType>
 void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A,
-                      const TauViewType& Tau) {
+                      const TauViewType& Tau, const InfoViewType& Info) {
   using scalar_type = typename AViewType::non_const_value_type;
 
   Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_MAGMA," +
@@ -253,8 +253,8 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A,
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
                                                                                \
     static void geqrf(const Kokkos::Cuda& space, const AViewType& A,           \
-                      const TauViewType& Tau) {                                \
-      magmaGeqrfWrapper(space, A, Tau);                                        \
+                      const TauViewType& Tau, const InfoViewType& Info) {      \
+      magmaGeqrfWrapper(space, A, Tau, Info);                                  \
     }                                                                          \
   };
 
@@ -281,7 +281,6 @@ namespace Impl {
 template <class ExecutionSpace, class AViewType, class TauViewType, class InfoViewType>
 void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
 			  const TauViewType& Tau, const InfoViewType& Info) {
-
   using memory_space = typename AViewType::memory_space;
   using Scalar = typename AViewType::non_const_value_type;
 
@@ -404,8 +403,6 @@ KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
 }  // namespace KokkosLapack
 #endif  // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER
 
-#if 0  // AquiEEP
-
 // ROCSOLVER
 #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER
 #include <KokkosBlas_tpl_spec.hpp>
@@ -415,47 +412,41 @@ namespace KokkosLapack {
 namespace Impl {
 
 template <class ExecutionSpace, class AViewType, class TauViewType>
-void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const TauViewType& Tau) {
-  using Scalar    = typename TauViewType::non_const_value_type;
+void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const TauViewType& Tau,
+                        const InfoViewType& Info) {
+  using Scalar = typename AViewType::non_const_value_type;
+
   using ALayout_t = typename AViewType::array_layout;
-  using BLayout_t = typename TauViewType::array_layout;
-
-  const rocblas_int N    = static_cast<rocblas_int>(A.extent(0));
-  const rocblas_int nrhs = static_cast<rocblas_int>(B.extent(1));
-  const rocblas_int lda  = std::is_same_v<ALayout_t, Kokkos::LayoutRight>
-                              ? A.stride(0)
-                              : A.stride(1);
-  const rocblas_int ldb = std::is_same_v<BLayout_t, Kokkos::LayoutRight>
-                              ? B.stride(0)
-                              : B.stride(1);
-  Kokkos::View<rocblas_int, ExecutionSpace> info("rocsolver geqrf info");
+  static_assert(std::is_same_v<ALayout_t, Kokkos::LayoutLeft>,
+                "KokkosLapack - rocsolver geqrf: A needs to have a Kokkos::LayoutLeft");
+  const rocblas_int m   = static_cast<rocblas_int>(A.extent(0));
+  const rocblas_int n   = static_cast<rocblas_int>(A.extent(1));
+  const rocblas_int lda = static_cast<rocblas_int>(A.stride(1));
+  rocblas_status rc = rocblas_status_success;
 
   KokkosBlas::Impl::RocBlasSingleton& s =
       KokkosBlas::Impl::RocBlasSingleton::singleton();
   KOKKOS_ROCBLAS_SAFE_CALL_IMPL(
       rocblas_set_stream(s.handle, space.hip_stream()));
   if constexpr (std::is_same_v<Scalar, float>) {
-    KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_sgeqrf(s.handle, N, nrhs, A.data(),
-                                                  lda, IPIV.data(), B.data(),
-                                                  ldb, info.data()));
+    rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_sgeqrf(s.handle, m, n, A.data(),
+                                                  lda, Tau.data()));
   }
   if constexpr (std::is_same_v<Scalar, double>) {
-    KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_dgeqrf(s.handle, N, nrhs, A.data(),
-                                                  lda, IPIV.data(), B.data(),
-                                                  ldb, info.data()));
+    rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_dgeqrf(s.handle, m, n, A.data(),
+                                                  lda, Tau.data()));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<float>>) {
-    KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgeqrf(
-        s.handle, N, nrhs, reinterpret_cast<rocblas_float_complex*>(A.data()),
-        lda, IPIV.data(), reinterpret_cast<rocblas_float_complex*>(B.data()),
-        ldb, info.data()));
+    rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgeqrf(
+        s.handle, m, n, reinterpret_cast<rocblas_float_complex*>(A.data()),
+        lda, reinterpret_cast<rocblas_float_complex*>(Tau.data())));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<double>>) {
-    KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgeqrf(
-        s.handle, N, nrhs, reinterpret_cast<rocblas_double_complex*>(A.data()),
-        lda, IPIV.data(), reinterpret_cast<rocblas_double_complex*>(B.data()),
-        ldb, info.data()));
+    rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgeqrf(
+        s.handle, m, n, reinterpret_cast<rocblas_double_complex*>(A.data()),
+        lda, reinterpret_cast<rocblas_double_complex*>(Tau.data())));
   }
+  Info[0] = static_cast<int>(rc);
   KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL));
 }
 
@@ -467,7 +458,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, cons
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
       Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,    \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
-      Kokkos::View<int*, LAYOUT, MEM_SPACE,                                    \
+      Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,       \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,                   \
       true,                                                                    \
       geqrf_eti_spec_avail<                                                    \
@@ -479,7 +470,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, cons
                        Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
           Kokkos::View<int*, LAYOUT,                                           \
-                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
+                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
     using AViewType =                                                          \
         Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \
@@ -513,6 +504,4 @@ KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
 }  // namespace KokkosLapack
 #endif  // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER
 
-#endif  // AquiEEP
-
 #endif

From 145fe1032aaa6680eb0f61c589a291ba120ac4cf Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Fri, 24 May 2024 03:14:14 -0600
Subject: [PATCH 18/27] Formatting

---
 lapack/impl/KokkosLapack_geqrf_spec.hpp       |   6 +-
 lapack/src/KokkosLapack_geqrf.hpp             |  27 +++--
 .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 114 +++++++++---------
 lapack/unit_test/Test_Lapack_geqrf.hpp        |  14 +--
 4 files changed, 85 insertions(+), 76 deletions(-)

diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp
index 5410520c1c..89a253b796 100644
--- a/lapack/impl/KokkosLapack_geqrf_spec.hpp
+++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp
@@ -53,7 +53,7 @@ struct geqrf_eti_spec_avail {
       Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                             \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,        \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-      Kokkos::View<int*, LAYOUT_TYPE,                                      \
+      Kokkos::View<int *, LAYOUT_TYPE,                                     \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,        \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>> {             \
     enum : bool { value = true };                                          \
@@ -114,7 +114,7 @@ struct GEQRF<ExecutionSpace, AMatrix, TWArray, RType, false,
       Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                            \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
-      Kokkos::View<int*, LAYOUT_TYPE,                                     \
+      Kokkos::View<int *, LAYOUT_TYPE,                                    \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
       false, true>;
@@ -129,7 +129,7 @@ struct GEQRF<ExecutionSpace, AMatrix, TWArray, RType, false,
       Kokkos::View<SCALAR_TYPE *, LAYOUT_TYPE,                            \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
-      Kokkos::View<int*, LAYOUT_TYPE,                                     \
+      Kokkos::View<int *, LAYOUT_TYPE,                                    \
                    Kokkos::Device<EXEC_SPACE_TYPE, MEM_SPACE_TYPE>,       \
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>,              \
       false, true>;
diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp
index 7a81818d79..4c920e9a74 100644
--- a/lapack/src/KokkosLapack_geqrf.hpp
+++ b/lapack/src/KokkosLapack_geqrf.hpp
@@ -60,7 +60,8 @@ namespace KokkosLapack {
 ///                                illegal value
 ///
 template <class ExecutionSpace, class AMatrix, class TArray, class InfoArray>
-void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, const InfoArray& Info) {
+void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau,
+           const InfoArray& Info) {
   // NOTE: Currently, KokkosLapack::geqrf only supports LAPACK, MAGMA and
   // rocSOLVER TPLs.
   //       MAGMA/rocSOLVER TPL should be enabled to call the MAGMA/rocSOLVER GPU
@@ -117,19 +118,23 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, con
   using AMatrix_Internal = Kokkos::View<
       typename AMatrix::non_const_value_type**, typename AMatrix::array_layout,
       typename AMatrix::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
-  using TArray_Internal = Kokkos::View<
-      typename TArray::non_const_value_type*, typename TArray::array_layout,
-      typename TArray::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
-  using InfoArray_Internal = Kokkos::View<
-      typename InfoArray::non_const_value_type*, typename InfoArray::array_layout,
-      typename InfoArray::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
-
-  AMatrix_Internal   A_i    = A;
-  TArray_Internal    Tau_i  = Tau;
+  using TArray_Internal =
+      Kokkos::View<typename TArray::non_const_value_type*,
+                   typename TArray::array_layout, typename TArray::device_type,
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
+  using InfoArray_Internal =
+      Kokkos::View<typename InfoArray::non_const_value_type*,
+                   typename InfoArray::array_layout,
+                   typename InfoArray::device_type,
+                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
+
+  AMatrix_Internal A_i      = A;
+  TArray_Internal Tau_i     = Tau;
   InfoArray_Internal Info_i = Info;
 
   KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, TArray_Internal,
-                            InfoArray_Internal>::geqrf(space, A_i, Tau_i, Info_i);
+                            InfoArray_Internal>::geqrf(space, A_i, Tau_i,
+                                                       Info_i);
 }
 
 /// \brief Computes a QR factorization of a matrix A
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index db28ab541d..f3d3be4506 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -68,10 +68,10 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau,
         m, n, reinterpret_cast<std::complex<MagType>*>(A.data()), lda,
         reinterpret_cast<std::complex<MagType>*>(Tau.data()),
         reinterpret_cast<std::complex<MagType>*>(work.data()), lwork,
-	Info.data());
+        Info.data());
 
     if (Info[0] < 0) return;
-    
+
     lwork = static_cast<int>(work(0).real());
 
     work = Kokkos::View<Scalar*, memory_space>("geqrf work buffer", lwork);
@@ -80,10 +80,10 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau,
         m, n, reinterpret_cast<std::complex<MagType>*>(A.data()), lda,
         reinterpret_cast<std::complex<MagType>*>(Tau.data()),
         reinterpret_cast<std::complex<MagType>*>(work.data()), lwork,
-	Info.data());
+        Info.data());
   } else {
-    HostLapack<Scalar>::geqrf(m, n, A.data(), lda, Tau.data(),
-                                    work.data(), lwork,	Info.data());
+    HostLapack<Scalar>::geqrf(m, n, A.data(), lda, Tau.data(), work.data(),
+                              lwork, Info.data());
 
     if (Info[0] < 0) return;
 
@@ -92,7 +92,7 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau,
     work = Kokkos::View<Scalar*, memory_space>("geqrf work buffer", lwork);
 
     HostLapack<Scalar>::geqrf(m, n, A.data(), lda, Tau.data(), work.data(),
-			      lwork, Info.data());
+                              lwork, Info.data());
   }
 }
 
@@ -121,8 +121,9 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau,
     using TauViewType =                                                        \
         Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,    \
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
-    using InfoViewType = Kokkos::View<int*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>, \
-                               Kokkos::MemoryTraits<Kokkos::Unmanaged>>;       \
+    using InfoViewType =                                                       \
+        Kokkos::View<int*, LAYOUT, Kokkos::Device<EXECSPACE, MEM_SPACE>,       \
+                     Kokkos::MemoryTraits<Kokkos::Unmanaged>>;                 \
                                                                                \
     static void geqrf(const EXECSPACE& /* space */, const AViewType& A,        \
                       const TauViewType& Tau, const InfoViewType& Info) {      \
@@ -269,7 +270,7 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex<double>, Kokkos::LayoutLeft,
 }  // namespace KokkosLapack
 #endif  // KOKKOSKERNELS_ENABLE_TPL_MAGMA
 
-#endif // AquiEEP
+#endif  // AquiEEP
 
 // CUSOLVER
 #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER
@@ -278,19 +279,21 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex<double>, Kokkos::LayoutLeft,
 namespace KokkosLapack {
 namespace Impl {
 
-template <class ExecutionSpace, class AViewType, class TauViewType, class InfoViewType>
+template <class ExecutionSpace, class AViewType, class TauViewType,
+          class InfoViewType>
 void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
-			  const TauViewType& Tau, const InfoViewType& Info) {
+                          const TauViewType& Tau, const InfoViewType& Info) {
   using memory_space = typename AViewType::memory_space;
-  using Scalar = typename AViewType::non_const_value_type;
+  using Scalar       = typename AViewType::non_const_value_type;
 
   using ALayout_t = typename AViewType::array_layout;
-  static_assert(std::is_same_v<ALayout_t, Kokkos::LayoutLeft>,
-                "KokkosLapack - cusolver geqrf: A needs to have a Kokkos::LayoutLeft");
+  static_assert(
+      std::is_same_v<ALayout_t, Kokkos::LayoutLeft>,
+      "KokkosLapack - cusolver geqrf: A needs to have a Kokkos::LayoutLeft");
   const int m   = A.extent_int(0);
   const int n   = A.extent_int(1);
   const int lda = A.stride(1);
-  int lwork = 0;
+  int lwork     = 0;
 
   CudaLapackSingleton& s = CudaLapackSingleton::singleton();
   KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
@@ -298,44 +301,46 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
   if constexpr (std::is_same_v<Scalar, float>) {
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
         cusolverDnSgeqrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork));
-    Kokkos::View<float*, memory_space> Workspace("cusolver sgeqrf workspace", lwork);
+    Kokkos::View<float*, memory_space> Workspace("cusolver sgeqrf workspace",
+                                                 lwork);
 
-    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgeqrf(s.handle, m, n, A.data(),
-                                                    lda, Tau.data(),
-                                                    Workspace.data(), lwork, Info.data()));
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
+        cusolverDnSgeqrf(s.handle, m, n, A.data(), lda, Tau.data(),
+                         Workspace.data(), lwork, Info.data()));
   }
   if constexpr (std::is_same_v<Scalar, double>) {
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
         cusolverDnDgeqrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork));
-    Kokkos::View<double*, memory_space> Workspace("cusolver dgeqrf workspace", lwork);
+    Kokkos::View<double*, memory_space> Workspace("cusolver dgeqrf workspace",
+                                                  lwork);
 
-    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgeqrf(s.handle, m, n, A.data(),
-                                                    lda, Tau.data(),
-                                                    Workspace.data(), lwork, Info.data()));
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
+        cusolverDnDgeqrf(s.handle, m, n, A.data(), lda, Tau.data(),
+                         Workspace.data(), lwork, Info.data()));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<float>>) {
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf_bufferSize(
         s.handle, m, n, reinterpret_cast<cuComplex*>(A.data()), lda, &lwork));
-    Kokkos::View<cuComplex*, memory_space> Workspace("cusolver cgeqrf workspace", lwork);
+    Kokkos::View<cuComplex*, memory_space> Workspace(
+        "cusolver cgeqrf workspace", lwork);
 
-    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(
-                         cusolverDnCgeqrf(s.handle, m, n, reinterpret_cast<cuComplex*>(A.data()), lda,
-                         reinterpret_cast<cuComplex*>(Tau.data()),
-                         reinterpret_cast<cuComplex*>(Workspace.data()),
-                         lwork, Info.data()));
+    KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf(
+        s.handle, m, n, reinterpret_cast<cuComplex*>(A.data()), lda,
+        reinterpret_cast<cuComplex*>(Tau.data()),
+        reinterpret_cast<cuComplex*>(Workspace.data()), lwork, Info.data()));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<double>>) {
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf_bufferSize(
         s.handle, m, n, reinterpret_cast<cuDoubleComplex*>(A.data()), lda,
         &lwork));
-    Kokkos::View<cuDoubleComplex*, memory_space> Workspace("cusolver zgeqrf workspace",
-                                                           lwork);
+    Kokkos::View<cuDoubleComplex*, memory_space> Workspace(
+        "cusolver zgeqrf workspace", lwork);
 
     KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf(
         s.handle, m, n, reinterpret_cast<cuDoubleComplex*>(A.data()), lda,
         reinterpret_cast<cuDoubleComplex*>(Tau.data()),
-        reinterpret_cast<cuDoubleComplex*>(Workspace.data()),
-        lwork, Info.data()));
+        reinterpret_cast<cuDoubleComplex*>(Workspace.data()), lwork,
+        Info.data()));
   }
   KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL));
 }
@@ -359,8 +364,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
           Kokkos::View<SCALAR*, LAYOUT,                                        \
                        Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-          Kokkos::View<int*, LAYOUT,                                           \
-                       Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,                \
+          Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,  \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
     using AViewType = Kokkos::View<SCALAR**, LAYOUT,                           \
                                    Kokkos::Device<Kokkos::Cuda, MEM_SPACE>,    \
@@ -386,17 +390,17 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
 KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace)
 KOKKOSLAPACK_GEQRF_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace)
 KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<float>, Kokkos::LayoutLeft,
-                           Kokkos::CudaSpace)
+                            Kokkos::CudaSpace)
 KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
-                           Kokkos::CudaSpace)
+                            Kokkos::CudaSpace)
 
 #if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE)
 KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace)
 KOKKOSLAPACK_GEQRF_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace)
 KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<float>, Kokkos::LayoutLeft,
-                           Kokkos::CudaUVMSpace)
+                            Kokkos::CudaUVMSpace)
 KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
-                           Kokkos::CudaUVMSpace)
+                            Kokkos::CudaUVMSpace)
 #endif
 
 }  // namespace Impl
@@ -412,34 +416,35 @@ namespace KokkosLapack {
 namespace Impl {
 
 template <class ExecutionSpace, class AViewType, class TauViewType>
-void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const TauViewType& Tau,
-                        const InfoViewType& Info) {
+void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
+                           const TauViewType& Tau, const InfoViewType& Info) {
   using Scalar = typename AViewType::non_const_value_type;
 
   using ALayout_t = typename AViewType::array_layout;
-  static_assert(std::is_same_v<ALayout_t, Kokkos::LayoutLeft>,
-                "KokkosLapack - rocsolver geqrf: A needs to have a Kokkos::LayoutLeft");
+  static_assert(
+      std::is_same_v<ALayout_t, Kokkos::LayoutLeft>,
+      "KokkosLapack - rocsolver geqrf: A needs to have a Kokkos::LayoutLeft");
   const rocblas_int m   = static_cast<rocblas_int>(A.extent(0));
   const rocblas_int n   = static_cast<rocblas_int>(A.extent(1));
   const rocblas_int lda = static_cast<rocblas_int>(A.stride(1));
-  rocblas_status rc = rocblas_status_success;
+  rocblas_status rc     = rocblas_status_success;
 
   KokkosBlas::Impl::RocBlasSingleton& s =
       KokkosBlas::Impl::RocBlasSingleton::singleton();
   KOKKOS_ROCBLAS_SAFE_CALL_IMPL(
       rocblas_set_stream(s.handle, space.hip_stream()));
   if constexpr (std::is_same_v<Scalar, float>) {
-    rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_sgeqrf(s.handle, m, n, A.data(),
-                                                  lda, Tau.data()));
+    rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(
+        rocsolver_sgeqrf(s.handle, m, n, A.data(), lda, Tau.data()));
   }
   if constexpr (std::is_same_v<Scalar, double>) {
-    rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_dgeqrf(s.handle, m, n, A.data(),
-                                                  lda, Tau.data()));
+    rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(
+        rocsolver_dgeqrf(s.handle, m, n, A.data(), lda, Tau.data()));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<float>>) {
     rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgeqrf(
-        s.handle, m, n, reinterpret_cast<rocblas_float_complex*>(A.data()),
-        lda, reinterpret_cast<rocblas_float_complex*>(Tau.data())));
+        s.handle, m, n, reinterpret_cast<rocblas_float_complex*>(A.data()), lda,
+        reinterpret_cast<rocblas_float_complex*>(Tau.data())));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<double>>) {
     rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgeqrf(
@@ -469,8 +474,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, cons
           Kokkos::View<SCALAR*, LAYOUT,                                        \
                        Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>,               \
-          Kokkos::View<int*, LAYOUT,                                           \
-                       Kokkos::Device<Kokkos::HIP, MEM_SPACE>,                 \
+          Kokkos::View<int*, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>,   \
                        Kokkos::MemoryTraits<Kokkos::Unmanaged>>>::value> {     \
     using AViewType =                                                          \
         Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<Kokkos::HIP, MEM_SPACE>, \
@@ -488,7 +492,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, cons
           "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]");                   \
       geqrf_print_specialization<AViewType, TauViewType, InfoViewType>();      \
                                                                                \
-      rocsolverGeqrfWrapper(space, A, Tau, Info);		               \
+      rocsolverGeqrfWrapper(space, A, Tau, Info);                              \
       Kokkos::Profiling::popRegion();                                          \
     }                                                                          \
   };
@@ -496,9 +500,9 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, cons
 KOKKOSLAPACK_GEQRF_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace)
 KOKKOSLAPACK_GEQRF_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace)
 KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex<float>, Kokkos::LayoutLeft,
-                            Kokkos::HIPSpace)
+                             Kokkos::HIPSpace)
 KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
-                            Kokkos::HIPSpace)
+                             Kokkos::HIPSpace)
 
 }  // namespace Impl
 }  // namespace KokkosLapack
diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
index 3aedb0b984..f619c8fba3 100644
--- a/lapack/unit_test/Test_Lapack_geqrf.hpp
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -85,7 +85,7 @@ void getQR(int const m, int const n,
 
 template <class ViewTypeA, class ViewTypeTau, class Device>
 void impl_test_geqrf(int m, int n) {
-  using ViewTypeInfo = Kokkos::View<int*, Kokkos::LayoutLeft, Device>;
+  using ViewTypeInfo    = Kokkos::View<int*, Kokkos::LayoutLeft, Device>;
   using execution_space = typename Device::execution_space;
   using ScalarA         = typename ViewTypeA::value_type;
   // using ats             = Kokkos::ArithTraits<ScalarA>;
@@ -97,15 +97,15 @@ void impl_test_geqrf(int m, int n) {
   int minMN(std::min(m, n));
 
   // Create device views
-  ViewTypeA    A   ("A", m, n);
-  ViewTypeTau  Tau ("Tau", minMN);
+  ViewTypeA A("A", m, n);
+  ViewTypeTau Tau("Tau", minMN);
   ViewTypeInfo Info("Info", 1);
 
   // Create host mirrors of device views.
-  typename ViewTypeA::HostMirror    h_A     = Kokkos::create_mirror_view(A);
-  typename ViewTypeA::HostMirror    h_Aorig = Kokkos::create_mirror_view(A);
-  typename ViewTypeTau::HostMirror  h_tau   = Kokkos::create_mirror_view(Tau);
-  typename ViewTypeInfo::HostMirror h_info  = Kokkos::create_mirror_view(Info);
+  typename ViewTypeA::HostMirror h_A       = Kokkos::create_mirror_view(A);
+  typename ViewTypeA::HostMirror h_Aorig   = Kokkos::create_mirror_view(A);
+  typename ViewTypeTau::HostMirror h_tau   = Kokkos::create_mirror_view(Tau);
+  typename ViewTypeInfo::HostMirror h_info = Kokkos::create_mirror_view(Info);
 
   // Initialize data.
   if ((m == 3) && (n == 3)) {

From 1608cf6c7723bf187b3a2a3d542e810e94237f2c Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Fri, 24 May 2024 04:16:27 -0600
Subject: [PATCH 19/27] Backup

---
 lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index f3d3be4506..fa42f81591 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -415,7 +415,7 @@ KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
 namespace KokkosLapack {
 namespace Impl {
 
-template <class ExecutionSpace, class AViewType, class TauViewType>
+  template <class ExecutionSpace, class AViewType, class TauViewType, class InfoViewType>
 void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
                            const TauViewType& Tau, const InfoViewType& Info) {
   using Scalar = typename AViewType::non_const_value_type;
@@ -427,31 +427,30 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
   const rocblas_int m   = static_cast<rocblas_int>(A.extent(0));
   const rocblas_int n   = static_cast<rocblas_int>(A.extent(1));
   const rocblas_int lda = static_cast<rocblas_int>(A.stride(1));
-  rocblas_status rc     = rocblas_status_success;
 
   KokkosBlas::Impl::RocBlasSingleton& s =
       KokkosBlas::Impl::RocBlasSingleton::singleton();
   KOKKOS_ROCBLAS_SAFE_CALL_IMPL(
       rocblas_set_stream(s.handle, space.hip_stream()));
   if constexpr (std::is_same_v<Scalar, float>) {
-    rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(
+    KOKKOS_ROCBLAS_SAFE_CALL_IMPL(
         rocsolver_sgeqrf(s.handle, m, n, A.data(), lda, Tau.data()));
   }
   if constexpr (std::is_same_v<Scalar, double>) {
-    rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(
+    KOKKOS_ROCBLAS_SAFE_CALL_IMPL(
         rocsolver_dgeqrf(s.handle, m, n, A.data(), lda, Tau.data()));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<float>>) {
-    rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgeqrf(
+    KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgeqrf(
         s.handle, m, n, reinterpret_cast<rocblas_float_complex*>(A.data()), lda,
         reinterpret_cast<rocblas_float_complex*>(Tau.data())));
   }
   if constexpr (std::is_same_v<Scalar, Kokkos::complex<double>>) {
-    rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgeqrf(
+    KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgeqrf(
         s.handle, m, n, reinterpret_cast<rocblas_double_complex*>(A.data()),
         lda, reinterpret_cast<rocblas_double_complex*>(Tau.data())));
   }
-  Info[0] = static_cast<int>(rc);
+  Info[0] = 0; // success
   KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL));
 }
 

From df805105c7fa318f9a9db20cad8fbebcf0d57898 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Fri, 24 May 2024 04:18:06 -0600
Subject: [PATCH 20/27] Formatting

---
 lapack/tpls/KokkosLapack_Host_tpl.cpp | 21 +++++++++++----------
 lapack/tpls/KokkosLapack_Host_tpl.hpp |  2 +-
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp
index 17bc2915a4..f72d781e5b 100644
--- a/lapack/tpls/KokkosLapack_Host_tpl.cpp
+++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp
@@ -149,7 +149,7 @@ int HostLapack<float>::trtri(const char uplo, const char diag, int n,
 }
 template <>
 void HostLapack<float>::geqrf(int m, int n, float* a, int lda, float* tau,
-			      float* work, int lwork, int *info) {
+                              float* work, int lwork, int* info) {
   F77_FUNC_SGEQRF(&m, &n, a, &lda, tau, work, &lwork, info);
 }
 
@@ -180,7 +180,7 @@ int HostLapack<double>::trtri(const char uplo, const char diag, int n,
 }
 template <>
 void HostLapack<double>::geqrf(int m, int n, double* a, int lda, double* tau,
-                              double* work, int lwork, int *info) {
+                               double* work, int lwork, int* info) {
   F77_FUNC_DGEQRF(&m, &n, a, &lda, tau, work, &lwork, info);
 }
 
@@ -213,10 +213,11 @@ int HostLapack<std::complex<float>>::trtri(const char uplo, const char diag,
   return info;
 }
 template <>
-void HostLapack<std::complex<float>>::geqrf(int m, int n, std::complex<float>* a,
-                                           int lda, std::complex<float>* tau,
-                                           std::complex<float>* work,
-                                           int lwork, int *info) {
+void HostLapack<std::complex<float>>::geqrf(int m, int n,
+                                            std::complex<float>* a, int lda,
+                                            std::complex<float>* tau,
+                                            std::complex<float>* work,
+                                            int lwork, int* info) {
   F77_FUNC_CGEQRF(&m, &n, a, &lda, tau, work, &lwork, info);
 }
 
@@ -251,10 +252,10 @@ int HostLapack<std::complex<double>>::trtri(const char uplo, const char diag,
 }
 template <>
 void HostLapack<std::complex<double>>::geqrf(int m, int n,
-                                            std::complex<double>* a, int lda,
-                                            std::complex<double>* tau,
-                                            std::complex<double>* work,
-                                            int lwork, int *info) {
+                                             std::complex<double>* a, int lda,
+                                             std::complex<double>* tau,
+                                             std::complex<double>* work,
+                                             int lwork, int* info) {
   F77_FUNC_ZGEQRF(&m, &n, a, &lda, tau, work, &lwork, info);
 }
 
diff --git a/lapack/tpls/KokkosLapack_Host_tpl.hpp b/lapack/tpls/KokkosLapack_Host_tpl.hpp
index 8797d2006c..23f6dbc3d6 100644
--- a/lapack/tpls/KokkosLapack_Host_tpl.hpp
+++ b/lapack/tpls/KokkosLapack_Host_tpl.hpp
@@ -43,7 +43,7 @@ struct HostLapack {
                    int lda);
 
   static void geqrf(int m, int n, T *a, int lda, T *tau, T *work, int lwork,
-		    int *info);
+                    int *info);
 };
 }  // namespace Impl
 }  // namespace KokkosLapack

From 29472f54233fb941062393492fd0c17997fa0948 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Fri, 24 May 2024 04:19:07 -0600
Subject: [PATCH 21/27] Formatting

---
 lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index fa42f81591..7c54a358ff 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -415,7 +415,8 @@ KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex<double>, Kokkos::LayoutLeft,
 namespace KokkosLapack {
 namespace Impl {
 
-  template <class ExecutionSpace, class AViewType, class TauViewType, class InfoViewType>
+template <class ExecutionSpace, class AViewType, class TauViewType,
+          class InfoViewType>
 void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
                            const TauViewType& Tau, const InfoViewType& Info) {
   using Scalar = typename AViewType::non_const_value_type;
@@ -450,7 +451,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
         s.handle, m, n, reinterpret_cast<rocblas_double_complex*>(A.data()),
         lda, reinterpret_cast<rocblas_double_complex*>(Tau.data())));
   }
-  Info[0] = 0; // success
+  Info[0] = 0;  // success
   KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL));
 }
 

From 746fa3c1050dc997589a6acabe5ea73c5405419c Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Fri, 24 May 2024 12:40:10 -0600
Subject: [PATCH 22/27] Backup

---
 lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index 7c54a358ff..d9f88549aa 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -451,7 +451,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A,
         s.handle, m, n, reinterpret_cast<rocblas_double_complex*>(A.data()),
         lda, reinterpret_cast<rocblas_double_complex*>(Tau.data())));
   }
-  Info[0] = 0;  // success
+  Kokkos::deep_copy(Info, 0);  // Success
   KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL));
 }
 

From 8f0a9079e4daf6100b69051a4a4db706e3b3c577 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Sat, 25 May 2024 17:00:11 -0600
Subject: [PATCH 23/27] Backup

---
 lapack/src/KokkosLapack_geqrf.hpp             |  44 +--
 .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp |   4 +-
 lapack/unit_test/Test_Lapack_geqrf.hpp        | 370 ++++++++++++++----
 3 files changed, 311 insertions(+), 107 deletions(-)

diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp
index 4c920e9a74..a81ae2a436 100644
--- a/lapack/src/KokkosLapack_geqrf.hpp
+++ b/lapack/src/KokkosLapack_geqrf.hpp
@@ -34,7 +34,7 @@ namespace KokkosLapack {
 ///
 /// \tparam ExecutionSpace The space where the kernel will run.
 /// \tparam AMatrix        Type of matrix A, as a 2-D Kokkos::View.
-/// \tparam TArray         Type of array Tau, as a 1-D Kokkos::View.
+/// \tparam TauArray       Type of array Tau, as a 1-D Kokkos::View.
 /// \tparam InfoArray      Type of array Info, as a 1-D Kokkos::View.
 ///
 /// \param space [in] Execution space instance used to specified how to execute
@@ -48,10 +48,9 @@ namespace KokkosLapack {
 ///                   is represented as a product of elementary reflectors
 ///                     Q = H(1) H(2) . . . H(k), where k = min(M,N).
 ///                   Each H(i) has the form
-///                     H(i) = I - Tau * v * v**H
-///                   where tau is a complex scalar, and v is a complex vector
-///                   with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on
-///                   exit in A(i+1:M,i), and tau in Tau(i).
+///                     H(i) = I - Tau(i) * v * v**H,
+///                   where v is a vector with v(1:i-1) = 0 and v(i) = 1;
+///                   v(i+1:M) is stored on exit in A(i+1:M,i).
 /// \param Tau [out]  One-dimensional array of size min(M,N) that contains the
 ///                   scalar factors of the elementary reflectors.
 /// \param Info [out] One-dimensional array of integers and of size 1:
@@ -59,8 +58,8 @@ namespace KokkosLapack {
 ///                   Info[0] < 0: if equal to '-i', the i-th argument had an
 ///                                illegal value
 ///
-template <class ExecutionSpace, class AMatrix, class TArray, class InfoArray>
-void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau,
+template <class ExecutionSpace, class AMatrix, class TauArray, class InfoArray>
+void geqrf(const ExecutionSpace& space, const AMatrix& A, const TauArray& Tau,
            const InfoArray& Info) {
   // NOTE: Currently, KokkosLapack::geqrf only supports LAPACK, MAGMA and
   // rocSOLVER TPLs.
@@ -73,21 +72,21 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau,
                                  typename AMatrix::memory_space>::accessible);
   static_assert(
       Kokkos::SpaceAccessibility<ExecutionSpace,
-                                 typename TArray::memory_space>::accessible);
+                                 typename TauArray::memory_space>::accessible);
   static_assert(
       Kokkos::SpaceAccessibility<ExecutionSpace,
                                  typename InfoArray::memory_space>::accessible);
 
   static_assert(Kokkos::is_view<AMatrix>::value,
                 "KokkosLapack::geqrf: A must be a Kokkos::View.");
-  static_assert(Kokkos::is_view<TArray>::value,
+  static_assert(Kokkos::is_view<TauArray>::value,
                 "KokkosLapack::geqrf: Tau must be Kokkos::View.");
   static_assert(Kokkos::is_view<InfoArray>::value,
                 "KokkosLapack::geqrf: Info must be Kokkos::View.");
 
   static_assert(static_cast<int>(AMatrix::rank) == 2,
                 "KokkosLapack::geqrf: A must have rank 2.");
-  static_assert(static_cast<int>(TArray::rank) == 1,
+  static_assert(static_cast<int>(TauArray::rank) == 1,
                 "KokkosLapack::geqrf: Tau must have rank 1.");
   static_assert(static_cast<int>(InfoArray::rank) == 1,
                 "KokkosLapack::geqrf: Info must have rank 1.");
@@ -118,9 +117,9 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau,
   using AMatrix_Internal = Kokkos::View<
       typename AMatrix::non_const_value_type**, typename AMatrix::array_layout,
       typename AMatrix::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
-  using TArray_Internal =
-      Kokkos::View<typename TArray::non_const_value_type*,
-                   typename TArray::array_layout, typename TArray::device_type,
+  using TauArray_Internal =
+      Kokkos::View<typename TauArray::non_const_value_type*,
+                   typename TauArray::array_layout, typename TauArray::device_type,
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
   using InfoArray_Internal =
       Kokkos::View<typename InfoArray::non_const_value_type*,
@@ -128,11 +127,11 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau,
                    typename InfoArray::device_type,
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
 
-  AMatrix_Internal A_i      = A;
-  TArray_Internal Tau_i     = Tau;
+  AMatrix_Internal   A_i    = A;
+  TauArray_Internal  Tau_i  = Tau;
   InfoArray_Internal Info_i = Info;
 
-  KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, TArray_Internal,
+  KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, TauArray_Internal,
                             InfoArray_Internal>::geqrf(space, A_i, Tau_i,
                                                        Info_i);
 }
@@ -140,7 +139,7 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau,
 /// \brief Computes a QR factorization of a matrix A
 ///
 /// \tparam AMatrix   Type of matrix A, as a 2-D Kokkos::View.
-/// \tparam TArray    Type of array Tau, as a 1-D Kokkos::View.
+/// \tparam TauArray  Type of array Tau, as a 1-D Kokkos::View.
 /// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View.
 ///
 /// \param A [in,out] On entry, the M-by-N matrix to be factorized.
@@ -152,10 +151,9 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau,
 ///                   is represented as a product of elementary reflectors
 ///                     Q = H(1) H(2) . . . H(k), where k = min(M,N).
 ///                   Each H(i) has the form
-///                     H(i) = I - Tau * v * v**H
-///                   where tau is a complex scalar, and v is a complex vector
-///                   with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on
-///                   exit in A(i+1:M,i), and tau in Tau(i).
+///                     H(i) = I - Tau(i) * v * v**H,
+///                   where v is a vector with v(1:i-1) = 0 and v(i) = 1;
+///                   v(i+1:M) is stored on exit in A(i+1:M,i).
 /// \param Tau [out]  One-dimensional array of size min(M,N) that contains the
 ///                   scalar factors of the elementary reflectors.
 /// \param Info [out] One-dimensional array of integers and of size 1:
@@ -163,8 +161,8 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau,
 ///                   Info[0] < 0: if equal to '-i', the i-th argument had an
 ///                                illegal value
 ///
-template <class AMatrix, class TArray, class InfoArray>
-void geqrf(const AMatrix& A, const TArray& Tau, const InfoArray& Info) {
+template <class AMatrix, class TauArray, class InfoArray>
+void geqrf(const AMatrix& A, const TauArray& Tau, const InfoArray& Info) {
   typename AMatrix::execution_space space{};
   geqrf(space, A, Tau, Info);
 }
diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
index d9f88549aa..c7630cc783 100644
--- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
+++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp
@@ -172,7 +172,7 @@ KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex<double>, Kokkos::LayoutLeft,
 }  // namespace KokkosLapack
 #endif  // KOKKOSKERNELS_ENABLE_TPL_LAPACK
 
-#if 0  // AquiEEP
+#if 0  // TO DO
 
 // MAGMA
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA
@@ -270,7 +270,7 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex<double>, Kokkos::LayoutLeft,
 }  // namespace KokkosLapack
 #endif  // KOKKOSKERNELS_ENABLE_TPL_MAGMA
 
-#endif  // AquiEEP
+#endif  // TO DO
 
 // CUSOLVER
 #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER
diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
index f619c8fba3..2a4533b8bc 100644
--- a/lapack/unit_test/Test_Lapack_geqrf.hpp
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -24,63 +24,127 @@
      (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \
       defined(TEST_THREADS_LAPACK_CPP)))
 
-// AquiEEP
-
 #include <gtest/gtest.h>
 #include <Kokkos_Core.hpp>
 #include <Kokkos_Random.hpp>
 
+#include <KokkosBlas2_ger.hpp>
+#include <KokkosBlas3_gemm.hpp>
 #include <KokkosLapack_geqrf.hpp>
-//#include <KokkosBlas2_gemv.hpp>
-//#include <KokkosBlas3_gemm.hpp>
 #include <KokkosKernels_TestUtils.hpp>
 
 namespace Test {
 
 template <class ViewTypeA, class ViewTypeTau>
 void getQR(int const m, int const n,
-           typename ViewTypeA::HostMirror const&  // h_A
-           ,
-           typename ViewTypeTau::HostMirror const&  // h_tau
-           ,
-           typename ViewTypeA::HostMirror&  // h_Q
-           ,
+           typename ViewTypeA::HostMirror const& h_A,
+           typename ViewTypeTau::HostMirror const& h_tau,
+           typename ViewTypeA::HostMirror& h_Q,
            typename ViewTypeA::HostMirror& h_R,
-           typename ViewTypeA::HostMirror&  // h_QR
+           typename ViewTypeA::HostMirror& h_QR
 ) {
   using ScalarA = typename ViewTypeA::value_type;
 
+  // Populate h_R
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
-      if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
-        h_R(i, j).real() = 0.;
-        h_R(i, j).imag() = 0.;
-      } else {
-        h_R(i, j) = 0.;
+      if ((i <= j) && (i < n)) {
+        h_R(i,j) = h_A(i,j);
+      }
+      else {
+        h_R(i,j) = Kokkos::ArithTraits<ScalarA>::zero();
       }
     }
   }
 
+  // Instantiate the identity matrix
   ViewTypeA I("I", m, m);
   typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I);
+  Kokkos::deep_copy(h_I,Kokkos::ArithTraits<ScalarA>::zero());
   for (int i(0); i < m; ++i) {
-    for (int j(0); j < m; ++j) {
-      if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
-        if (i == j) {
-          h_I(i, j).real() = 1.;
-        } else {
-          h_I(i, j).real() = 0.;
-        }
-        h_I(i, j).imag() = 0.;
-      } else {
-        if (i == j) {
-          h_I(i, j) = 1.;
-        } else {
-          h_I(i, j) = 0.;
-        }
-      }
+    if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
+      h_I(i,i).real() = 1.;
+    } else {
+      h_I(i,i) = 1.;
     }
   }
+
+  // Populate h_Q
+  int minMN(std::min(m, n));
+  ViewTypeTau v("v", m);
+  typename ViewTypeTau::HostMirror h_v = Kokkos::create_mirror_view(v);
+
+  ViewTypeA Qk("Qk", m, m);
+  typename ViewTypeA::HostMirror h_Qk = Kokkos::create_mirror_view(Qk);
+
+  ViewTypeA auxM("auxM", m, m);
+  typename ViewTypeA::HostMirror h_auxM = Kokkos::create_mirror_view(auxM);
+
+  // Q = H(0) H(1) . . . H(min(M,N)-1), where for k=0,1,...,min(m,n)-1:
+  //   H(k) = I - Tau(k) * v * v**H, and
+  //   v is a vector of size m with:
+  //     v(0:k-1) = 0,
+  //     v(k)     = 1,
+  //     v(k+1:m-1) = A(k+1:m-1,k).
+  for (int k(0); k < minMN; ++k) {
+    Kokkos::deep_copy(h_v,Kokkos::ArithTraits<ScalarA>::zero());
+    h_v[k] = 1.;
+    for (int index(k+1); index < minMN; ++index) {
+      h_v[index] = h_A(index,k);
+    }
+
+    // Rank-1 update of a general matrix: A = A + alpha * x * y^{T,H}.
+    // void ger( const char                                   trans[]
+    //         , const typename AViewType::const_value_type & alpha
+    //         , const XViewType                            & x
+    //         , const YViewType                            & y
+    //         , const AViewType                            & A
+    //         );
+    Kokkos::deep_copy(h_Qk, h_I);
+    KokkosBlas::ger( "H"
+                   , -h_tau[k]
+                   , h_v
+                   , h_v
+                   , h_Qk
+                   );
+
+    // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B).
+    // void gemm( const char                             transA[]
+    //          , const char                             transB[]
+    //          , typename AViewType::const_value_type & alpha
+    //          , const AViewType                      & A
+    //          , const BViewType                      & B
+    //          , typename CViewType::const_value_type & beta
+    //          , const CViewType                      & C
+    //          );
+    if (k == 0) {
+      Kokkos::deep_copy(h_Q, h_Qk);
+    }
+    else {
+      Kokkos::deep_copy(h_auxM, Kokkos::ArithTraits<ScalarA>::zero());
+      KokkosBlas::gemm( "N"
+                      , "N"
+                      , 1.
+                      , h_Q
+                      , h_Qk
+                      , 0.
+                      , h_auxM
+                      );
+      Kokkos::deep_copy(h_Q, h_auxM);
+    }
+  } // for k
+
+  Kokkos::deep_copy(h_QR, Kokkos::ArithTraits<ScalarA>::zero());
+  KokkosBlas::gemm( "N"
+                  , "N"
+                  , 1.
+                  , h_Q
+                  , h_R
+                  , 0.
+                  , h_QR
+                  );
+
+  // AquiEEP: test Q^H Q = I
 }
 
 template <class ViewTypeA, class ViewTypeTau, class Device>
@@ -88,7 +152,7 @@ void impl_test_geqrf(int m, int n) {
   using ViewTypeInfo    = Kokkos::View<int*, Kokkos::LayoutLeft, Device>;
   using execution_space = typename Device::execution_space;
   using ScalarA         = typename ViewTypeA::value_type;
-  // using ats             = Kokkos::ArithTraits<ScalarA>;
+  using ats             = Kokkos::ArithTraits<ScalarA>;
 
   execution_space space{};
 
@@ -97,13 +161,14 @@ void impl_test_geqrf(int m, int n) {
   int minMN(std::min(m, n));
 
   // Create device views
-  ViewTypeA A("A", m, n);
-  ViewTypeTau Tau("Tau", minMN);
-  ViewTypeInfo Info("Info", 1);
+  ViewTypeA    A    ("A", m, n);
+  ViewTypeA    Aorig("Aorig", m, n);
+  ViewTypeTau  Tau  ("Tau", minMN);
+  ViewTypeInfo Info ("Info", 1);
 
   // Create host mirrors of device views.
   typename ViewTypeA::HostMirror h_A       = Kokkos::create_mirror_view(A);
-  typename ViewTypeA::HostMirror h_Aorig   = Kokkos::create_mirror_view(A);
+  typename ViewTypeA::HostMirror h_Aorig   = Kokkos::create_mirror_view(Aorig);
   typename ViewTypeTau::HostMirror h_tau   = Kokkos::create_mirror_view(Tau);
   typename ViewTypeInfo::HostMirror h_info = Kokkos::create_mirror_view(Info);
 
@@ -124,7 +189,7 @@ void impl_test_geqrf(int m, int n) {
 
       for (int i(0); i < m; ++i) {
         for (int j(0); j < n; ++j) {
-          h_A(i, j).imag() = 0.;
+          h_A(i,j).imag() = 0.;
         }
       }
     } else {
@@ -151,10 +216,10 @@ void impl_test_geqrf(int m, int n) {
 
   Kokkos::deep_copy(h_Aorig, h_A);
 
-#if 1  // def HAVE_KOKKOSKERNELS_DEBUG
+#ifdef HAVE_KOKKOSKERNELS_DEBUG
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
-      std::cout << "A(" << i << "," << j << ") = " << h_A(i, j) << std::endl;
+      std::cout << "Aorig(" << i << "," << j << ") = " << h_A(i,j) << std::endl;
     }
   }
 #endif
@@ -180,18 +245,86 @@ void impl_test_geqrf(int m, int n) {
   Kokkos::deep_copy(h_A, A);
   Kokkos::deep_copy(h_tau, Tau);
 
-#if 1  // def HAVE_KOKKOSKERNELS_DEBUG
+#ifdef HAVE_KOKKOSKERNELS_DEBUG
   std::cout << "info[0] = " << h_info[0] << std::endl;
   for (int i(0); i < minMN; ++i) {
     for (int j(0); j < n; ++j) {
-      std::cout << "R(" << i << "," << j << ") = " << h_A(i, j) << std::endl;
+      std::cout << "Aoutput(" << i << "," << j << ") = " << std::setprecision(16) << h_A(i,j) << std::endl;
     }
   }
   for (int i(0); i < minMN; ++i) {
-    std::cout << "tau(" << i << ") = " << h_tau[i] << std::endl;
+    std::cout << "tau(" << i << ") = " << h_tau[i] << std::setprecision(16) << std::endl;
   }
 #endif
 
+  const typename Kokkos::ArithTraits<typename ViewTypeA::non_const_value_type>::mag_type absTol(1.e-8);
+
+  if ((m == 3) && (n == 3)) {
+    std::vector<std::vector<ScalarA>> refMatrix(m);
+    for (int i(0); i < m; ++i) {
+      refMatrix[i].resize(n,Kokkos::ArithTraits<ScalarA>::zero());
+    }
+
+    std::vector<ScalarA> refTau(m,Kokkos::ArithTraits<ScalarA>::zero());
+
+    if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
+      refMatrix[0][0].real() = -14.;
+      refMatrix[0][1].real() = -21.;
+      refMatrix[0][2].real() = 14.;
+
+      refMatrix[1][0].real() = 0.2307692307692308;
+      refMatrix[1][1].real() = -175.;
+      refMatrix[1][2].real() = 70.;
+
+      refMatrix[2][0].real() = -0.1538461538461539;
+      refMatrix[2][1].real() = 1./18.;
+      refMatrix[2][2].real() = -35.;
+
+      refTau[0].real() = 1.857142857142857;
+      refTau[1].real() = 1.993846153846154;
+      refTau[2].real() = 0.;
+    }
+    else {
+      refMatrix[0][0] = -14.;
+      refMatrix[0][1] = -21.;
+      refMatrix[0][2] = 14.;
+
+      refMatrix[1][0] = 0.2307692307692308;
+      refMatrix[1][1] = -175.;
+      refMatrix[1][2] = 70.;
+
+      refMatrix[2][0] = -0.1538461538461539;
+      refMatrix[2][1] = 1./18.;
+      refMatrix[2][2] = -35.;
+
+      refTau[0] = 1.857142857142857;
+      refTau[1] = 1.993846153846154;
+      refTau[2] = 0.;
+    }
+
+    {
+      bool test_flag_A = true;
+      for (int i(0); (i < m) && test_flag_A; ++i) {
+        for (int j(0); (j < n) && test_flag_A; ++j) {
+          if (ats::abs(h_A(i,j) - refMatrix[i][j]) > absTol) {
+            test_flag_A = false;
+          }
+        }
+      }
+      ASSERT_EQ(test_flag_A, true);
+    }
+
+    {
+      bool test_flag_tau = true;
+      for (int i(0); (i < m) && test_flag_tau; ++i) {
+        if (ats::abs(h_tau[i] - refTau[i]) > absTol) {
+          test_flag_tau = false;
+        }
+      }
+      ASSERT_EQ(test_flag_tau, true);
+    }
+  }
+
   ViewTypeA Q("Q", m, m);
   ViewTypeA R("R", m, n);
   ViewTypeA QR("QR", m, n);
@@ -202,65 +335,135 @@ void impl_test_geqrf(int m, int n) {
 
   getQR<ViewTypeA, ViewTypeTau>(m, n, h_A, h_tau, h_Q, h_R, h_QR);
 
-#if 1  // def HAVE_KOKKOSKERNELS_DEBUG
+#ifdef HAVE_KOKKOSKERNELS_DEBUG
   for (int i(0); i < m; ++i) {
     for (int j(0); j < m; ++j) {
-      std::cout << "Q(" << i << "," << j << ") = " << h_Q(i, j) << std::endl;
+      std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl;
     }
   }
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
-      std::cout << "R(" << i << "," << j << ") = " << h_R(i, j) << std::endl;
+      std::cout << "R(" << i << "," << j << ") = " << h_R(i,j) << std::endl;
     }
   }
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
-      std::cout << "QR(" << i << "," << j << ") = " << h_QR(i, j) << std::endl;
+      std::cout << "QR(" << i << "," << j << ") = " << h_QR(i,j) << std::endl;
     }
   }
 #endif
 
   if ((m == 3) && (n == 3)) {
-  }
+    std::vector<std::vector<ScalarA>> refQ(m);
+    for (int i(0); i < m; ++i) {
+      refQ[i].resize(n,Kokkos::ArithTraits<ScalarA>::zero());
+    }
 
-  // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B).
-  // void gemm( const execution_space                & space
-  //          , const char                             transA[]
-  //          , const char                             transB[]
-  //          , typename AViewType::const_value_type & alpha
-  //          , const AViewType                      & A
-  //          , const BViewType                      & B
-  //          , typename CViewType::const_value_type & beta
-  //          , const CViewType                      & C
-  //          );
-
-  // Rank-1 update of a general matrix: A = A + alpha * x * y^{T,H}.
-  // void ger( const ExecutionSpace                       & space
-  //         , const char                                   trans[]
-  //         , const typename AViewType::const_value_type & alpha
-  //         , const XViewType                            & x
-  //         , const YViewType                            & y
-  //         , const AViewType                            & A
-  //         );
-
-  // Checking vs ref on CPU, this eps is about 10^-9
-  // typedef typename ats::mag_type mag_type;
-  // const mag_type eps = 3.0e7 * ats::epsilon();
-  bool test_flag = true;
-  for (int i = 0; i < n; i++) {
-#if 0
-    if (ats::abs(h_B(i) - h_X0(i)) > eps) {
-      test_flag = false;
-      printf(
-          "    Error %d, pivot %c, padding %c: result( %.15lf ) !="
-          "solution( %.15lf ) at (%d), error=%.15e, eps=%.15e\n",
-          N, mode[0], padding[0], ats::abs(h_B(i)), ats::abs(h_X0(i)), int(i),
-          ats::abs(h_B(i) - h_X0(i)), eps);
-      break;
+    std::vector<std::vector<ScalarA>> refR(m);
+    for (int i(0); i < m; ++i) {
+      refR[i].resize(n,Kokkos::ArithTraits<ScalarA>::zero());
     }
+
+#if 0
+    Q = [ -6/7     69/175   58/175
+          -3/7   -158/175   -6/175
+           2/7     -6/35    33/35 ]
+
+    R = [ -14   -21   14
+           0   -175   70
+           0      0  -35 ]
 #endif
+
+    if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
+      refQ[0][0].real() = -6./7.;
+      refQ[0][1].real() = 69./175.;
+      refQ[0][2].real() = 58./175.;
+
+      refQ[1][0].real() = -3./7.;
+      refQ[1][1].real() = -158./175.;
+      refQ[1][2].real() = -6./175.;
+
+      refQ[2][0].real() = 2./7.;
+      refQ[2][1].real() = -6./35.;
+      refQ[2][2].real() = 33./35.;
+
+      refR[0][0].real() = -14.;
+      refR[0][1].real() = -21.;
+      refR[0][2].real() = 14.;
+
+      refR[1][1].real() = -175.;
+      refR[1][2].real() = 70.;
+
+      refR[2][2].real() = -35.;
+    }
+    else {
+      refQ[0][0] = -6./7.;
+      refQ[0][1] = 69./175.;
+      refQ[0][2] = 58./175.;
+
+      refQ[1][0] = -3./7.;
+      refQ[1][1] = -158./175.;
+      refQ[1][2] = -6./175.;
+
+      refQ[2][0] = 2./7.;
+      refQ[2][1] = -6./35.;
+      refQ[2][2] = 33./35.;
+
+      refR[0][0] = -14.;
+      refR[0][1] = -21.;
+      refR[0][2] = 14.;
+
+      refR[1][1] = -175.;
+      refR[1][2] = 70.;
+
+      refR[2][2] = -35.;
+    }
+
+    {
+      bool test_flag_Q = true;
+      for (int i(0); (i < m) && test_flag_Q; ++i) {
+        for (int j(0); (j < n) && test_flag_Q; ++j) {
+          if (ats::abs(h_Q(i,j) - refQ[i][j]) > absTol) {
+            test_flag_Q = false;
+          }
+        }
+      }
+      ASSERT_EQ(test_flag_Q, true);
+    }
+
+    {
+      bool test_flag_R = true;
+      for (int i(0); (i < m) && test_flag_R; ++i) {
+        for (int j(0); (j < n) && test_flag_R; ++j) {
+          if (ats::abs(h_R(i,j) - refR[i][j]) > absTol) {
+            test_flag_R = false;
+          }
+        }
+      }
+      ASSERT_EQ(test_flag_R, true);
+    }
+  }
+
+  {
+    bool test_flag_QR = true;
+    for (int i(0); (i < m) && test_flag_QR; ++i) {
+      for (int j(0); (j < n) && test_flag_QR; ++j) {
+        if (ats::abs(h_QR(i,j) - h_Aorig(i,j)) > absTol) {
+          std::cout << "m = " << m
+                    << ", n = " << n
+                    << ", i = " << i
+                    << ", j = " << j
+                    << ", h_Aorig(i,j) = " << std::setprecision(16) << h_Aorig(i,j)
+                    << ", h_QR(i,j) = "    << std::setprecision(16) << h_QR(i,j)
+                    << ", |diff| = "       << std::setprecision(16) << ats::abs(h_QR(i,j) - h_Aorig(i,j))
+                    << ", absTol = "       << std::setprecision(16) << absTol
+                    << std::endl;
+          test_flag_QR = false;
+        }
+      }
+    }
+    ASSERT_EQ(test_flag_QR, true);
   }
-  ASSERT_EQ(test_flag, true);
 }
 
 }  // namespace Test
@@ -274,6 +477,9 @@ void test_geqrf() {
   using view_type_tau_ll = Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device>;
 
   Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(3, 3);
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(100, 100);
+  //Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(100, 70); // AquiEEP
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(70, 100);
 #endif
 }
 

From fad256076753c6f53165359d83b68ddc5e49029e Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Sat, 25 May 2024 18:01:51 -0600
Subject: [PATCH 24/27] Backup

---
 lapack/unit_test/Test_Lapack_geqrf.hpp | 138 +++++++++++++++++++++----
 1 file changed, 119 insertions(+), 19 deletions(-)

diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
index 2a4533b8bc..240fde00bc 100644
--- a/lapack/unit_test/Test_Lapack_geqrf.hpp
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -48,7 +48,7 @@ void getQR(int const m, int const n,
   // Populate h_R
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
-      if ((i <= j) && (i < n)) {
+      if (i <= j) { // && (i < n)) { // Aqui
         h_R(i,j) = h_A(i,j);
       }
       else {
@@ -57,7 +57,7 @@ void getQR(int const m, int const n,
     }
   }
 
-  // Instantiate the identity matrix
+  // Instantiate the m x m identity matrix
   ViewTypeA I("I", m, m);
   typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I);
   Kokkos::deep_copy(h_I,Kokkos::ArithTraits<ScalarA>::zero());
@@ -69,7 +69,7 @@ void getQR(int const m, int const n,
     }
   }
 
-  // Populate h_Q
+  // Compute h_Q
   int minMN(std::min(m, n));
   ViewTypeTau v("v", m);
   typename ViewTypeTau::HostMirror h_v = Kokkos::create_mirror_view(v);
@@ -92,6 +92,11 @@ void getQR(int const m, int const n,
     for (int index(k+1); index < minMN; ++index) {
       h_v[index] = h_A(index,k);
     }
+#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+    for (int i(0); i < m; ++i) {
+      std::cout << "k = " << k << ", h_v[" << i << "] = " << std::setprecision(16) << h_v[i] << std::endl;
+    }
+#endif
 
     // Rank-1 update of a general matrix: A = A + alpha * x * y^{T,H}.
     // void ger( const char                                   trans[]
@@ -108,6 +113,14 @@ void getQR(int const m, int const n,
                    , h_Qk
                    );
 
+#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+    for (int i(0); i < m; ++i) {
+      for (int j(0); j < m; ++j) {
+        std::cout << "k = " << k << ", hQk(" << i << "," << j << ") = " << h_Qk(i,j) << std::endl;
+      }
+    }
+#endif
+
     // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B).
     // void gemm( const char                             transA[]
     //          , const char                             transB[]
@@ -132,8 +145,17 @@ void getQR(int const m, int const n,
                       );
       Kokkos::deep_copy(h_Q, h_auxM);
     }
+
+#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+    for (int i(0); i < m; ++i) {
+      for (int j(0); j < m; ++j) {
+        std::cout << "k = " << k << ", hQ(" << i << "," << j << ") = " << h_Q(i,j) << std::endl;
+      }
+    }
+#endif
   } // for k
 
+  // Compute h_QR
   Kokkos::deep_copy(h_QR, Kokkos::ArithTraits<ScalarA>::zero());
   KokkosBlas::gemm( "N"
                   , "N"
@@ -144,7 +166,43 @@ void getQR(int const m, int const n,
                   , h_QR
                   );
 
-  // AquiEEP: test Q^H Q = I
+  // Check that Q^H Q = I
+  {
+    Kokkos::deep_copy(h_auxM, Kokkos::ArithTraits<ScalarA>::zero());
+    KokkosBlas::gemm( "C"
+                    , "N"
+                    , 1.
+                    , h_Q
+                    , h_Q
+                    , 0.
+                    , h_auxM
+                    );
+
+    typename Kokkos::ArithTraits<typename ViewTypeA::non_const_value_type>::mag_type absTol(1.e-8);
+    if constexpr (std::is_same_v<typename Kokkos::ArithTraits<typename ViewTypeA::non_const_value_type>::mag_type,float>) {
+      absTol = 5.e-5;
+    }
+
+    using ats = Kokkos::ArithTraits<ScalarA>;
+    bool test_flag_QHQ = true;
+    for (int i(0); (i < m) && test_flag_QHQ; ++i) {
+      for (int j(0); (j < m) && test_flag_QHQ; ++j) {
+        if (ats::abs(h_auxM(i,j) - h_I(i,j)) > absTol) {
+          std::cout << "m = " << m
+                    << ", n = " << n
+                    << ", i = " << i
+                    << ", j = " << j
+                    << ", h_auxM(i,j) = " << std::setprecision(16) << h_auxM(i,j)
+                    << ", h_I(i,j) = "    << std::setprecision(16) << h_I(i,j)
+                    << ", |diff| = "      << std::setprecision(16) << ats::abs(h_auxM(i,j) - h_I(i,j))
+                    << ", absTol = "      << std::setprecision(16) << absTol
+                    << std::endl;
+          test_flag_QHQ = false;
+        }
+      }
+    }
+    ASSERT_EQ(test_flag_QHQ, true);
+  }
 }
 
 template <class ViewTypeA, class ViewTypeTau, class Device>
@@ -216,7 +274,7 @@ void impl_test_geqrf(int m, int n) {
 
   Kokkos::deep_copy(h_Aorig, h_A);
 
-#ifdef HAVE_KOKKOSKERNELS_DEBUG
+#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
       std::cout << "Aorig(" << i << "," << j << ") = " << h_A(i,j) << std::endl;
@@ -245,7 +303,12 @@ void impl_test_geqrf(int m, int n) {
   Kokkos::deep_copy(h_A, A);
   Kokkos::deep_copy(h_tau, Tau);
 
-#ifdef HAVE_KOKKOSKERNELS_DEBUG
+  typename Kokkos::ArithTraits<typename ViewTypeA::non_const_value_type>::mag_type absTol(1.e-8);
+  if constexpr (std::is_same_v<typename Kokkos::ArithTraits<typename ViewTypeA::non_const_value_type>::mag_type,float>) {
+    absTol = 5.e-5;
+  }
+
+#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
   std::cout << "info[0] = " << h_info[0] << std::endl;
   for (int i(0); i < minMN; ++i) {
     for (int j(0); j < n; ++j) {
@@ -255,9 +318,9 @@ void impl_test_geqrf(int m, int n) {
   for (int i(0); i < minMN; ++i) {
     std::cout << "tau(" << i << ") = " << h_tau[i] << std::setprecision(16) << std::endl;
   }
+  std::cout << "absTol = " << absTol << std::endl;
 #endif
-
-  const typename Kokkos::ArithTraits<typename ViewTypeA::non_const_value_type>::mag_type absTol(1.e-8);
+  std::cout << "absTol = " << absTol << std::endl; // Aqui
 
   if ((m == 3) && (n == 3)) {
     std::vector<std::vector<ScalarA>> refMatrix(m);
@@ -307,6 +370,15 @@ void impl_test_geqrf(int m, int n) {
       for (int i(0); (i < m) && test_flag_A; ++i) {
         for (int j(0); (j < n) && test_flag_A; ++j) {
           if (ats::abs(h_A(i,j) - refMatrix[i][j]) > absTol) {
+            std::cout << "m = " << m
+                      << ", n = " << n
+                      << ", i = " << i
+                      << ", j = " << j
+                      << ", h_Aoutput(i,j) = " << std::setprecision(16) << h_A(i,j)
+                      << ", refMatrix(i,j) = " << std::setprecision(16) << refMatrix[i][j]
+                      << ", |diff| = "         << std::setprecision(16) << ats::abs(h_A(i,j) - refMatrix[i][j])
+                      << ", absTol = "         << std::setprecision(16) << absTol
+                      << std::endl;
             test_flag_A = false;
           }
         }
@@ -318,6 +390,14 @@ void impl_test_geqrf(int m, int n) {
       bool test_flag_tau = true;
       for (int i(0); (i < m) && test_flag_tau; ++i) {
         if (ats::abs(h_tau[i] - refTau[i]) > absTol) {
+          std::cout << "m = " << m
+                    << ", n = " << n
+                    << ", i = " << i
+                    << ", h_tau(i,j) = "  << std::setprecision(16) << h_tau[i]
+                    << ", refTau(i,j) = " << std::setprecision(16) << refTau[i]
+                    << ", |diff| = "      << std::setprecision(16) << ats::abs(h_tau[i] - refTau[i])
+                    << ", absTol = "      << std::setprecision(16) << absTol
+                    << std::endl;
           test_flag_tau = false;
         }
       }
@@ -335,7 +415,7 @@ void impl_test_geqrf(int m, int n) {
 
   getQR<ViewTypeA, ViewTypeTau>(m, n, h_A, h_tau, h_Q, h_R, h_QR);
 
-#ifdef HAVE_KOKKOSKERNELS_DEBUG
+#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
   for (int i(0); i < m; ++i) {
     for (int j(0); j < m; ++j) {
       std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl;
@@ -367,11 +447,11 @@ void impl_test_geqrf(int m, int n) {
 #if 0
     Q = [ -6/7     69/175   58/175
           -3/7   -158/175   -6/175
-           2/7     -6/35    33/35 ]
+           2/7     -6/35    33/35  ]
 
-    R = [ -14   -21   14
-           0   -175   70
-           0      0  -35 ]
+    R = [  -14        -21       14
+             0       -175       70
+             0          0      -35 ]
 #endif
 
     if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
@@ -424,6 +504,15 @@ void impl_test_geqrf(int m, int n) {
       for (int i(0); (i < m) && test_flag_Q; ++i) {
         for (int j(0); (j < n) && test_flag_Q; ++j) {
           if (ats::abs(h_Q(i,j) - refQ[i][j]) > absTol) {
+            std::cout << "m = " << m
+                      << ", n = " << n
+                      << ", i = " << i
+                      << ", j = " << j
+                      << ", h_Q(i,j) = "  << std::setprecision(16) << h_Q(i,j)
+                      << ", refQ(i,j) = " << std::setprecision(16) << refQ[i][j]
+                      << ", |diff| = "    << std::setprecision(16) << ats::abs(h_Q(i,j) - refQ[i][j])
+                      << ", absTol = "    << std::setprecision(16) << absTol
+                      << std::endl;
             test_flag_Q = false;
           }
         }
@@ -436,6 +525,15 @@ void impl_test_geqrf(int m, int n) {
       for (int i(0); (i < m) && test_flag_R; ++i) {
         for (int j(0); (j < n) && test_flag_R; ++j) {
           if (ats::abs(h_R(i,j) - refR[i][j]) > absTol) {
+            std::cout << "m = " << m
+                      << ", n = " << n
+                      << ", i = " << i
+                      << ", j = " << j
+                      << ", h_R(i,j) = "  << std::setprecision(16) << h_R(i,j)
+                      << ", refR(i,j) = " << std::setprecision(16) << refR[i][j]
+                      << ", |diff| = "    << std::setprecision(16) << ats::abs(h_R(i,j) - refR[i][j])
+                      << ", absTol = "    << std::setprecision(16) << absTol
+                      << std::endl;
             test_flag_R = false;
           }
         }
@@ -444,6 +542,7 @@ void impl_test_geqrf(int m, int n) {
     }
   }
 
+  // Check that A = QR
   {
     bool test_flag_QR = true;
     for (int i(0); (i < m) && test_flag_QR; ++i) {
@@ -476,10 +575,11 @@ void test_geqrf() {
   using view_type_a_ll   = Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device>;
   using view_type_tau_ll = Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device>;
 
-  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(3, 3);
-  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(100, 100);
+  //Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(3, 3);
+  //Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(100, 100);
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(4, 3); // AquiEEP
   //Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(100, 70); // AquiEEP
-  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(70, 100);
+  //Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(70, 100);
 #endif
 }
 
@@ -488,7 +588,7 @@ void test_geqrf() {
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
 TEST_F(TestCategory, geqrf_float) {
   Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_float");
-  test_geqrf<float, TestDevice>();
+  //test_geqrf<float, TestDevice>();
   Kokkos::Profiling::popRegion();
 }
 #endif
@@ -508,7 +608,7 @@ TEST_F(TestCategory, geqrf_double) {
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
 TEST_F(TestCategory, geqrf_complex_double) {
   Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_double");
-  test_geqrf<Kokkos::complex<double>, TestDevice>();
+  //test_geqrf<Kokkos::complex<double>, TestDevice>();
   Kokkos::Profiling::popRegion();
 }
 #endif
@@ -518,7 +618,7 @@ TEST_F(TestCategory, geqrf_complex_double) {
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
 TEST_F(TestCategory, geqrf_complex_float) {
   Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_float");
-  test_geqrf<Kokkos::complex<float>, TestDevice>();
+  //test_geqrf<Kokkos::complex<float>, TestDevice>();
   Kokkos::Profiling::popRegion();
 }
 #endif

From 51a27b2240fd1051734c998029686fa838435857 Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Sat, 25 May 2024 18:32:10 -0600
Subject: [PATCH 25/27] Backup

---
 lapack/unit_test/Test_Lapack_geqrf.hpp | 136 ++++++++++++++++---------
 1 file changed, 89 insertions(+), 47 deletions(-)

diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
index 240fde00bc..d3c80b122c 100644
--- a/lapack/unit_test/Test_Lapack_geqrf.hpp
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -45,10 +45,12 @@ void getQR(int const m, int const n,
 ) {
   using ScalarA = typename ViewTypeA::value_type;
 
+  // ********************************************************************
   // Populate h_R
+  // ********************************************************************
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
-      if (i <= j) { // && (i < n)) { // Aqui
+      if (i <= j) {
         h_R(i,j) = h_A(i,j);
       }
       else {
@@ -57,7 +59,9 @@ void getQR(int const m, int const n,
     }
   }
 
-  // Instantiate the m x m identity matrix
+  // ********************************************************************
+  // Instantiate the m x m identity matrix h_I
+  // ********************************************************************
   ViewTypeA I("I", m, m);
   typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I);
   Kokkos::deep_copy(h_I,Kokkos::ArithTraits<ScalarA>::zero());
@@ -69,7 +73,9 @@ void getQR(int const m, int const n,
     }
   }
 
+  // ********************************************************************
   // Compute h_Q
+  // ********************************************************************
   int minMN(std::min(m, n));
   ViewTypeTau v("v", m);
   typename ViewTypeTau::HostMirror h_v = Kokkos::create_mirror_view(v);
@@ -89,10 +95,10 @@ void getQR(int const m, int const n,
   for (int k(0); k < minMN; ++k) {
     Kokkos::deep_copy(h_v,Kokkos::ArithTraits<ScalarA>::zero());
     h_v[k] = 1.;
-    for (int index(k+1); index < minMN; ++index) {
+    for (int index(k+1); index < m; ++index) {
       h_v[index] = h_A(index,k);
     }
-#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 0 // def HAVE_KOKKOSKERNELS_DEBUG
     for (int i(0); i < m; ++i) {
       std::cout << "k = " << k << ", h_v[" << i << "] = " << std::setprecision(16) << h_v[i] << std::endl;
     }
@@ -113,7 +119,7 @@ void getQR(int const m, int const n,
                    , h_Qk
                    );
 
-#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 0 // def HAVE_KOKKOSKERNELS_DEBUG
     for (int i(0); i < m; ++i) {
       for (int j(0); j < m; ++j) {
         std::cout << "k = " << k << ", hQk(" << i << "," << j << ") = " << h_Qk(i,j) << std::endl;
@@ -146,7 +152,7 @@ void getQR(int const m, int const n,
       Kokkos::deep_copy(h_Q, h_auxM);
     }
 
-#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 0 // def HAVE_KOKKOSKERNELS_DEBUG
     for (int i(0); i < m; ++i) {
       for (int j(0); j < m; ++j) {
         std::cout << "k = " << k << ", hQ(" << i << "," << j << ") = " << h_Q(i,j) << std::endl;
@@ -155,18 +161,9 @@ void getQR(int const m, int const n,
 #endif
   } // for k
 
-  // Compute h_QR
-  Kokkos::deep_copy(h_QR, Kokkos::ArithTraits<ScalarA>::zero());
-  KokkosBlas::gemm( "N"
-                  , "N"
-                  , 1.
-                  , h_Q
-                  , h_R
-                  , 0.
-                  , h_QR
-                  );
-
+  // ********************************************************************
   // Check that Q^H Q = I
+  // ********************************************************************
   {
     Kokkos::deep_copy(h_auxM, Kokkos::ArithTraits<ScalarA>::zero());
     KokkosBlas::gemm( "C"
@@ -188,7 +185,8 @@ void getQR(int const m, int const n,
     for (int i(0); (i < m) && test_flag_QHQ; ++i) {
       for (int j(0); (j < m) && test_flag_QHQ; ++j) {
         if (ats::abs(h_auxM(i,j) - h_I(i,j)) > absTol) {
-          std::cout << "m = " << m
+          std::cout << "QHQ checking"
+                    << ", m = " << m
                     << ", n = " << n
                     << ", i = " << i
                     << ", j = " << j
@@ -203,6 +201,19 @@ void getQR(int const m, int const n,
     }
     ASSERT_EQ(test_flag_QHQ, true);
   }
+
+  // ********************************************************************
+  // Compute h_QR
+  // ********************************************************************
+  Kokkos::deep_copy(h_QR, Kokkos::ArithTraits<ScalarA>::zero());
+  KokkosBlas::gemm( "N"
+                  , "N"
+                  , 1.
+                  , h_Q
+                  , h_R
+                  , 0.
+                  , h_QR
+                  );
 }
 
 template <class ViewTypeA, class ViewTypeTau, class Device>
@@ -212,25 +223,29 @@ void impl_test_geqrf(int m, int n) {
   using ScalarA         = typename ViewTypeA::value_type;
   using ats             = Kokkos::ArithTraits<ScalarA>;
 
-  execution_space space{};
-
   Kokkos::Random_XorShift64_Pool<execution_space> rand_pool(13718);
 
   int minMN(std::min(m, n));
 
+  // ********************************************************************
   // Create device views
+  // ********************************************************************
   ViewTypeA    A    ("A", m, n);
   ViewTypeA    Aorig("Aorig", m, n);
   ViewTypeTau  Tau  ("Tau", minMN);
   ViewTypeInfo Info ("Info", 1);
 
-  // Create host mirrors of device views.
+  // ********************************************************************
+  // Create host mirrors of device views
+  // ********************************************************************
   typename ViewTypeA::HostMirror h_A       = Kokkos::create_mirror_view(A);
   typename ViewTypeA::HostMirror h_Aorig   = Kokkos::create_mirror_view(Aorig);
   typename ViewTypeTau::HostMirror h_tau   = Kokkos::create_mirror_view(Tau);
   typename ViewTypeInfo::HostMirror h_info = Kokkos::create_mirror_view(Info);
 
-  // Initialize data.
+  // ********************************************************************
+  // Initialize data
+  // ********************************************************************
   if ((m == 3) && (n == 3)) {
     if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
       h_A(0, 0).real() = 12.;
@@ -274,7 +289,7 @@ void impl_test_geqrf(int m, int n) {
 
   Kokkos::deep_copy(h_Aorig, h_A);
 
-#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 0 // def HAVE_KOKKOSKERNELS_DEBUG
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
       std::cout << "Aorig(" << i << "," << j << ") = " << h_A(i,j) << std::endl;
@@ -284,8 +299,11 @@ void impl_test_geqrf(int m, int n) {
 
   Kokkos::fence();
 
+  // ********************************************************************
   // Perform the QR factorization
+  // ********************************************************************
   try {
+    execution_space space{};
     KokkosLapack::geqrf(space, A, Tau, Info);
   } catch (const std::runtime_error& e) {
     std::cout << "KokkosLapack::geqrf(): caught exception '" << e.what() << "'"
@@ -299,7 +317,9 @@ void impl_test_geqrf(int m, int n) {
   Kokkos::deep_copy(h_info, Info);
   EXPECT_EQ(h_info[0], 0) << "Failed geqrf() test: Info[0] = " << h_info[0];
 
+  // ********************************************************************
   // Get the results
+  // ********************************************************************
   Kokkos::deep_copy(h_A, A);
   Kokkos::deep_copy(h_tau, Tau);
 
@@ -308,7 +328,7 @@ void impl_test_geqrf(int m, int n) {
     absTol = 5.e-5;
   }
 
-#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 0 // def HAVE_KOKKOSKERNELS_DEBUG
   std::cout << "info[0] = " << h_info[0] << std::endl;
   for (int i(0); i < minMN; ++i) {
     for (int j(0); j < n; ++j) {
@@ -320,8 +340,10 @@ void impl_test_geqrf(int m, int n) {
   }
   std::cout << "absTol = " << absTol << std::endl;
 #endif
-  std::cout << "absTol = " << absTol << std::endl; // Aqui
 
+  // ********************************************************************
+  // Check outputs h_A and h_tau
+  // ********************************************************************
   if ((m == 3) && (n == 3)) {
     std::vector<std::vector<ScalarA>> refMatrix(m);
     for (int i(0); i < m; ++i) {
@@ -370,7 +392,8 @@ void impl_test_geqrf(int m, int n) {
       for (int i(0); (i < m) && test_flag_A; ++i) {
         for (int j(0); (j < n) && test_flag_A; ++j) {
           if (ats::abs(h_A(i,j) - refMatrix[i][j]) > absTol) {
-            std::cout << "m = " << m
+            std::cout << "h_Aoutput checking"
+                      << ", m = " << m
                       << ", n = " << n
                       << ", i = " << i
                       << ", j = " << j
@@ -390,7 +413,8 @@ void impl_test_geqrf(int m, int n) {
       bool test_flag_tau = true;
       for (int i(0); (i < m) && test_flag_tau; ++i) {
         if (ats::abs(h_tau[i] - refTau[i]) > absTol) {
-          std::cout << "m = " << m
+          std::cout << "tau checking"
+                    << ", m = " << m
                     << ", n = " << n
                     << ", i = " << i
                     << ", h_tau(i,j) = "  << std::setprecision(16) << h_tau[i]
@@ -405,6 +429,9 @@ void impl_test_geqrf(int m, int n) {
     }
   }
 
+  // ********************************************************************
+  // Compute Q, R, and QR
+  // ********************************************************************
   ViewTypeA Q("Q", m, m);
   ViewTypeA R("R", m, n);
   ViewTypeA QR("QR", m, n);
@@ -415,7 +442,7 @@ void impl_test_geqrf(int m, int n) {
 
   getQR<ViewTypeA, ViewTypeTau>(m, n, h_A, h_tau, h_Q, h_R, h_QR);
 
-#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 0 // def HAVE_KOKKOSKERNELS_DEBUG
   for (int i(0); i < m; ++i) {
     for (int j(0); j < m; ++j) {
       std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl;
@@ -433,6 +460,9 @@ void impl_test_geqrf(int m, int n) {
   }
 #endif
 
+  // ********************************************************************
+  // Check Q, R, and QR
+  // ********************************************************************
   if ((m == 3) && (n == 3)) {
     std::vector<std::vector<ScalarA>> refQ(m);
     for (int i(0); i < m; ++i) {
@@ -504,7 +534,8 @@ void impl_test_geqrf(int m, int n) {
       for (int i(0); (i < m) && test_flag_Q; ++i) {
         for (int j(0); (j < n) && test_flag_Q; ++j) {
           if (ats::abs(h_Q(i,j) - refQ[i][j]) > absTol) {
-            std::cout << "m = " << m
+            std::cout << "Q checking"
+                      << ", m = " << m
                       << ", n = " << n
                       << ", i = " << i
                       << ", j = " << j
@@ -525,7 +556,8 @@ void impl_test_geqrf(int m, int n) {
       for (int i(0); (i < m) && test_flag_R; ++i) {
         for (int j(0); (j < n) && test_flag_R; ++j) {
           if (ats::abs(h_R(i,j) - refR[i][j]) > absTol) {
-            std::cout << "m = " << m
+            std::cout << "R checking"
+                      << ", m = " << m
                       << ", n = " << n
                       << ", i = " << i
                       << ", j = " << j
@@ -542,13 +574,16 @@ void impl_test_geqrf(int m, int n) {
     }
   }
 
+  // ********************************************************************
   // Check that A = QR
+  // ********************************************************************
   {
     bool test_flag_QR = true;
     for (int i(0); (i < m) && test_flag_QR; ++i) {
       for (int j(0); (j < n) && test_flag_QR; ++j) {
         if (ats::abs(h_QR(i,j) - h_Aorig(i,j)) > absTol) {
-          std::cout << "m = " << m
+          std::cout << "QR checking"
+                    << ", m = " << m
                     << ", n = " << n
                     << ", i = " << i
                     << ", j = " << j
@@ -575,11 +610,18 @@ void test_geqrf() {
   using view_type_a_ll   = Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device>;
   using view_type_tau_ll = Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device>;
 
-  //Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(3, 3);
-  //Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(100, 100);
-  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(4, 3); // AquiEEP
-  //Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(100, 70); // AquiEEP
-  //Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(70, 100);
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(1, 1);
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(2, 1);
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(2, 2);
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(3, 1);
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(3, 2);
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(3, 3);
+
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(100, 100);
+
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(100, 70);
+
+  Test::impl_test_geqrf<view_type_a_ll, view_type_tau_ll, Device>(70, 100);
 #endif
 }
 
@@ -588,7 +630,7 @@ void test_geqrf() {
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
 TEST_F(TestCategory, geqrf_float) {
   Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_float");
-  //test_geqrf<float, TestDevice>();
+  test_geqrf<float, TestDevice>();
   Kokkos::Profiling::popRegion();
 }
 #endif
@@ -603,22 +645,22 @@ TEST_F(TestCategory, geqrf_double) {
 }
 #endif
 
-#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \
-    (!defined(KOKKOSKERNELS_ETI_ONLY) &&          \
+#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \
+    (!defined(KOKKOSKERNELS_ETI_ONLY) &&         \
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
-TEST_F(TestCategory, geqrf_complex_double) {
-  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_double");
-  //test_geqrf<Kokkos::complex<double>, TestDevice>();
+TEST_F(TestCategory, geqrf_complex_float) {
+  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_float");
+  test_geqrf<Kokkos::complex<float>, TestDevice>();
   Kokkos::Profiling::popRegion();
 }
 #endif
 
-#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \
-    (!defined(KOKKOSKERNELS_ETI_ONLY) &&         \
+#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \
+    (!defined(KOKKOSKERNELS_ETI_ONLY) &&          \
      !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS))
-TEST_F(TestCategory, geqrf_complex_float) {
-  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_float");
-  //test_geqrf<Kokkos::complex<float>, TestDevice>();
+TEST_F(TestCategory, geqrf_complex_double) {
+  Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_double");
+  test_geqrf<Kokkos::complex<double>, TestDevice>();
   Kokkos::Profiling::popRegion();
 }
 #endif

From bd22118cc964d4a711e9f188a8a5b9dd4c319ebb Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Sat, 25 May 2024 18:35:00 -0600
Subject: [PATCH 26/27] Formatting

---
 lapack/src/KokkosLapack_geqrf.hpp      |  11 +-
 lapack/unit_test/Test_Lapack_geqrf.hpp | 247 +++++++++++--------------
 2 files changed, 115 insertions(+), 143 deletions(-)

diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp
index a81ae2a436..c680120ae3 100644
--- a/lapack/src/KokkosLapack_geqrf.hpp
+++ b/lapack/src/KokkosLapack_geqrf.hpp
@@ -117,18 +117,17 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TauArray& Tau,
   using AMatrix_Internal = Kokkos::View<
       typename AMatrix::non_const_value_type**, typename AMatrix::array_layout,
       typename AMatrix::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
-  using TauArray_Internal =
-      Kokkos::View<typename TauArray::non_const_value_type*,
-                   typename TauArray::array_layout, typename TauArray::device_type,
-                   Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
+  using TauArray_Internal = Kokkos::View<
+      typename TauArray::non_const_value_type*, typename TauArray::array_layout,
+      typename TauArray::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
   using InfoArray_Internal =
       Kokkos::View<typename InfoArray::non_const_value_type*,
                    typename InfoArray::array_layout,
                    typename InfoArray::device_type,
                    Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
 
-  AMatrix_Internal   A_i    = A;
-  TauArray_Internal  Tau_i  = Tau;
+  AMatrix_Internal A_i      = A;
+  TauArray_Internal Tau_i   = Tau;
   InfoArray_Internal Info_i = Info;
 
   KokkosLapack::Impl::GEQRF<ExecutionSpace, AMatrix_Internal, TauArray_Internal,
diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
index d3c80b122c..7453e2fb3e 100644
--- a/lapack/unit_test/Test_Lapack_geqrf.hpp
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -36,13 +36,11 @@
 namespace Test {
 
 template <class ViewTypeA, class ViewTypeTau>
-void getQR(int const m, int const n,
-           typename ViewTypeA::HostMirror const& h_A,
+void getQR(int const m, int const n, typename ViewTypeA::HostMirror const& h_A,
            typename ViewTypeTau::HostMirror const& h_tau,
            typename ViewTypeA::HostMirror& h_Q,
            typename ViewTypeA::HostMirror& h_R,
-           typename ViewTypeA::HostMirror& h_QR
-) {
+           typename ViewTypeA::HostMirror& h_QR) {
   using ScalarA = typename ViewTypeA::value_type;
 
   // ********************************************************************
@@ -51,10 +49,9 @@ void getQR(int const m, int const n,
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
       if (i <= j) {
-        h_R(i,j) = h_A(i,j);
-      }
-      else {
-        h_R(i,j) = Kokkos::ArithTraits<ScalarA>::zero();
+        h_R(i, j) = h_A(i, j);
+      } else {
+        h_R(i, j) = Kokkos::ArithTraits<ScalarA>::zero();
       }
     }
   }
@@ -64,12 +61,12 @@ void getQR(int const m, int const n,
   // ********************************************************************
   ViewTypeA I("I", m, m);
   typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I);
-  Kokkos::deep_copy(h_I,Kokkos::ArithTraits<ScalarA>::zero());
+  Kokkos::deep_copy(h_I, Kokkos::ArithTraits<ScalarA>::zero());
   for (int i(0); i < m; ++i) {
     if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
-      h_I(i,i).real() = 1.;
+      h_I(i, i).real() = 1.;
     } else {
-      h_I(i,i) = 1.;
+      h_I(i, i) = 1.;
     }
   }
 
@@ -93,12 +90,12 @@ void getQR(int const m, int const n,
   //     v(k)     = 1,
   //     v(k+1:m-1) = A(k+1:m-1,k).
   for (int k(0); k < minMN; ++k) {
-    Kokkos::deep_copy(h_v,Kokkos::ArithTraits<ScalarA>::zero());
+    Kokkos::deep_copy(h_v, Kokkos::ArithTraits<ScalarA>::zero());
     h_v[k] = 1.;
-    for (int index(k+1); index < m; ++index) {
-      h_v[index] = h_A(index,k);
+    for (int index(k + 1); index < m; ++index) {
+      h_v[index] = h_A(index, k);
     }
-#if 0 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 0  // def HAVE_KOKKOSKERNELS_DEBUG
     for (int i(0); i < m; ++i) {
       std::cout << "k = " << k << ", h_v[" << i << "] = " << std::setprecision(16) << h_v[i] << std::endl;
     }
@@ -112,14 +109,9 @@ void getQR(int const m, int const n,
     //         , const AViewType                            & A
     //         );
     Kokkos::deep_copy(h_Qk, h_I);
-    KokkosBlas::ger( "H"
-                   , -h_tau[k]
-                   , h_v
-                   , h_v
-                   , h_Qk
-                   );
-
-#if 0 // def HAVE_KOKKOSKERNELS_DEBUG
+    KokkosBlas::ger("H", -h_tau[k], h_v, h_v, h_Qk);
+
+#if 0  // def HAVE_KOKKOSKERNELS_DEBUG
     for (int i(0); i < m; ++i) {
       for (int j(0); j < m; ++j) {
         std::cout << "k = " << k << ", hQk(" << i << "," << j << ") = " << h_Qk(i,j) << std::endl;
@@ -138,62 +130,50 @@ void getQR(int const m, int const n,
     //          );
     if (k == 0) {
       Kokkos::deep_copy(h_Q, h_Qk);
-    }
-    else {
+    } else {
       Kokkos::deep_copy(h_auxM, Kokkos::ArithTraits<ScalarA>::zero());
-      KokkosBlas::gemm( "N"
-                      , "N"
-                      , 1.
-                      , h_Q
-                      , h_Qk
-                      , 0.
-                      , h_auxM
-                      );
+      KokkosBlas::gemm("N", "N", 1., h_Q, h_Qk, 0., h_auxM);
       Kokkos::deep_copy(h_Q, h_auxM);
     }
 
-#if 0 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 0  // def HAVE_KOKKOSKERNELS_DEBUG
     for (int i(0); i < m; ++i) {
       for (int j(0); j < m; ++j) {
         std::cout << "k = " << k << ", hQ(" << i << "," << j << ") = " << h_Q(i,j) << std::endl;
       }
     }
 #endif
-  } // for k
+  }  // for k
 
   // ********************************************************************
   // Check that Q^H Q = I
   // ********************************************************************
   {
     Kokkos::deep_copy(h_auxM, Kokkos::ArithTraits<ScalarA>::zero());
-    KokkosBlas::gemm( "C"
-                    , "N"
-                    , 1.
-                    , h_Q
-                    , h_Q
-                    , 0.
-                    , h_auxM
-                    );
-
-    typename Kokkos::ArithTraits<typename ViewTypeA::non_const_value_type>::mag_type absTol(1.e-8);
-    if constexpr (std::is_same_v<typename Kokkos::ArithTraits<typename ViewTypeA::non_const_value_type>::mag_type,float>) {
+    KokkosBlas::gemm("C", "N", 1., h_Q, h_Q, 0., h_auxM);
+
+    typename Kokkos::ArithTraits<
+        typename ViewTypeA::non_const_value_type>::mag_type absTol(1.e-8);
+    if constexpr (std::is_same_v<
+                      typename Kokkos::ArithTraits<
+                          typename ViewTypeA::non_const_value_type>::mag_type,
+                      float>) {
       absTol = 5.e-5;
     }
 
-    using ats = Kokkos::ArithTraits<ScalarA>;
+    using ats          = Kokkos::ArithTraits<ScalarA>;
     bool test_flag_QHQ = true;
     for (int i(0); (i < m) && test_flag_QHQ; ++i) {
       for (int j(0); (j < m) && test_flag_QHQ; ++j) {
-        if (ats::abs(h_auxM(i,j) - h_I(i,j)) > absTol) {
+        if (ats::abs(h_auxM(i, j) - h_I(i, j)) > absTol) {
           std::cout << "QHQ checking"
-                    << ", m = " << m
-                    << ", n = " << n
-                    << ", i = " << i
+                    << ", m = " << m << ", n = " << n << ", i = " << i
                     << ", j = " << j
-                    << ", h_auxM(i,j) = " << std::setprecision(16) << h_auxM(i,j)
-                    << ", h_I(i,j) = "    << std::setprecision(16) << h_I(i,j)
-                    << ", |diff| = "      << std::setprecision(16) << ats::abs(h_auxM(i,j) - h_I(i,j))
-                    << ", absTol = "      << std::setprecision(16) << absTol
+                    << ", h_auxM(i,j) = " << std::setprecision(16)
+                    << h_auxM(i, j) << ", h_I(i,j) = " << std::setprecision(16)
+                    << h_I(i, j) << ", |diff| = " << std::setprecision(16)
+                    << ats::abs(h_auxM(i, j) - h_I(i, j))
+                    << ", absTol = " << std::setprecision(16) << absTol
                     << std::endl;
           test_flag_QHQ = false;
         }
@@ -206,14 +186,7 @@ void getQR(int const m, int const n,
   // Compute h_QR
   // ********************************************************************
   Kokkos::deep_copy(h_QR, Kokkos::ArithTraits<ScalarA>::zero());
-  KokkosBlas::gemm( "N"
-                  , "N"
-                  , 1.
-                  , h_Q
-                  , h_R
-                  , 0.
-                  , h_QR
-                  );
+  KokkosBlas::gemm("N", "N", 1., h_Q, h_R, 0., h_QR);
 }
 
 template <class ViewTypeA, class ViewTypeTau, class Device>
@@ -230,10 +203,10 @@ void impl_test_geqrf(int m, int n) {
   // ********************************************************************
   // Create device views
   // ********************************************************************
-  ViewTypeA    A    ("A", m, n);
-  ViewTypeA    Aorig("Aorig", m, n);
-  ViewTypeTau  Tau  ("Tau", minMN);
-  ViewTypeInfo Info ("Info", 1);
+  ViewTypeA A("A", m, n);
+  ViewTypeA Aorig("Aorig", m, n);
+  ViewTypeTau Tau("Tau", minMN);
+  ViewTypeInfo Info("Info", 1);
 
   // ********************************************************************
   // Create host mirrors of device views
@@ -262,7 +235,7 @@ void impl_test_geqrf(int m, int n) {
 
       for (int i(0); i < m; ++i) {
         for (int j(0); j < n; ++j) {
-          h_A(i,j).imag() = 0.;
+          h_A(i, j).imag() = 0.;
         }
       }
     } else {
@@ -289,7 +262,7 @@ void impl_test_geqrf(int m, int n) {
 
   Kokkos::deep_copy(h_Aorig, h_A);
 
-#if 0 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 0  // def HAVE_KOKKOSKERNELS_DEBUG
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
       std::cout << "Aorig(" << i << "," << j << ") = " << h_A(i,j) << std::endl;
@@ -323,12 +296,16 @@ void impl_test_geqrf(int m, int n) {
   Kokkos::deep_copy(h_A, A);
   Kokkos::deep_copy(h_tau, Tau);
 
-  typename Kokkos::ArithTraits<typename ViewTypeA::non_const_value_type>::mag_type absTol(1.e-8);
-  if constexpr (std::is_same_v<typename Kokkos::ArithTraits<typename ViewTypeA::non_const_value_type>::mag_type,float>) {
+  typename Kokkos::ArithTraits<
+      typename ViewTypeA::non_const_value_type>::mag_type absTol(1.e-8);
+  if constexpr (std::is_same_v<
+                    typename Kokkos::ArithTraits<
+                        typename ViewTypeA::non_const_value_type>::mag_type,
+                    float>) {
     absTol = 5.e-5;
   }
 
-#if 0 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 0  // def HAVE_KOKKOSKERNELS_DEBUG
   std::cout << "info[0] = " << h_info[0] << std::endl;
   for (int i(0); i < minMN; ++i) {
     for (int j(0); j < n; ++j) {
@@ -347,10 +324,10 @@ void impl_test_geqrf(int m, int n) {
   if ((m == 3) && (n == 3)) {
     std::vector<std::vector<ScalarA>> refMatrix(m);
     for (int i(0); i < m; ++i) {
-      refMatrix[i].resize(n,Kokkos::ArithTraits<ScalarA>::zero());
+      refMatrix[i].resize(n, Kokkos::ArithTraits<ScalarA>::zero());
     }
 
-    std::vector<ScalarA> refTau(m,Kokkos::ArithTraits<ScalarA>::zero());
+    std::vector<ScalarA> refTau(m, Kokkos::ArithTraits<ScalarA>::zero());
 
     if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
       refMatrix[0][0].real() = -14.;
@@ -362,14 +339,13 @@ void impl_test_geqrf(int m, int n) {
       refMatrix[1][2].real() = 70.;
 
       refMatrix[2][0].real() = -0.1538461538461539;
-      refMatrix[2][1].real() = 1./18.;
+      refMatrix[2][1].real() = 1. / 18.;
       refMatrix[2][2].real() = -35.;
 
       refTau[0].real() = 1.857142857142857;
       refTau[1].real() = 1.993846153846154;
       refTau[2].real() = 0.;
-    }
-    else {
+    } else {
       refMatrix[0][0] = -14.;
       refMatrix[0][1] = -21.;
       refMatrix[0][2] = 14.;
@@ -379,7 +355,7 @@ void impl_test_geqrf(int m, int n) {
       refMatrix[1][2] = 70.;
 
       refMatrix[2][0] = -0.1538461538461539;
-      refMatrix[2][1] = 1./18.;
+      refMatrix[2][1] = 1. / 18.;
       refMatrix[2][2] = -35.;
 
       refTau[0] = 1.857142857142857;
@@ -391,16 +367,17 @@ void impl_test_geqrf(int m, int n) {
       bool test_flag_A = true;
       for (int i(0); (i < m) && test_flag_A; ++i) {
         for (int j(0); (j < n) && test_flag_A; ++j) {
-          if (ats::abs(h_A(i,j) - refMatrix[i][j]) > absTol) {
+          if (ats::abs(h_A(i, j) - refMatrix[i][j]) > absTol) {
             std::cout << "h_Aoutput checking"
-                      << ", m = " << m
-                      << ", n = " << n
-                      << ", i = " << i
+                      << ", m = " << m << ", n = " << n << ", i = " << i
                       << ", j = " << j
-                      << ", h_Aoutput(i,j) = " << std::setprecision(16) << h_A(i,j)
-                      << ", refMatrix(i,j) = " << std::setprecision(16) << refMatrix[i][j]
-                      << ", |diff| = "         << std::setprecision(16) << ats::abs(h_A(i,j) - refMatrix[i][j])
-                      << ", absTol = "         << std::setprecision(16) << absTol
+                      << ", h_Aoutput(i,j) = " << std::setprecision(16)
+                      << h_A(i, j)
+                      << ", refMatrix(i,j) = " << std::setprecision(16)
+                      << refMatrix[i][j]
+                      << ", |diff| = " << std::setprecision(16)
+                      << ats::abs(h_A(i, j) - refMatrix[i][j])
+                      << ", absTol = " << std::setprecision(16) << absTol
                       << std::endl;
             test_flag_A = false;
           }
@@ -414,13 +391,12 @@ void impl_test_geqrf(int m, int n) {
       for (int i(0); (i < m) && test_flag_tau; ++i) {
         if (ats::abs(h_tau[i] - refTau[i]) > absTol) {
           std::cout << "tau checking"
-                    << ", m = " << m
-                    << ", n = " << n
-                    << ", i = " << i
-                    << ", h_tau(i,j) = "  << std::setprecision(16) << h_tau[i]
+                    << ", m = " << m << ", n = " << n << ", i = " << i
+                    << ", h_tau(i,j) = " << std::setprecision(16) << h_tau[i]
                     << ", refTau(i,j) = " << std::setprecision(16) << refTau[i]
-                    << ", |diff| = "      << std::setprecision(16) << ats::abs(h_tau[i] - refTau[i])
-                    << ", absTol = "      << std::setprecision(16) << absTol
+                    << ", |diff| = " << std::setprecision(16)
+                    << ats::abs(h_tau[i] - refTau[i])
+                    << ", absTol = " << std::setprecision(16) << absTol
                     << std::endl;
           test_flag_tau = false;
         }
@@ -442,7 +418,7 @@ void impl_test_geqrf(int m, int n) {
 
   getQR<ViewTypeA, ViewTypeTau>(m, n, h_A, h_tau, h_Q, h_R, h_QR);
 
-#if 0 // def HAVE_KOKKOSKERNELS_DEBUG
+#if 0  // def HAVE_KOKKOSKERNELS_DEBUG
   for (int i(0); i < m; ++i) {
     for (int j(0); j < m; ++j) {
       std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl;
@@ -466,12 +442,12 @@ void impl_test_geqrf(int m, int n) {
   if ((m == 3) && (n == 3)) {
     std::vector<std::vector<ScalarA>> refQ(m);
     for (int i(0); i < m; ++i) {
-      refQ[i].resize(n,Kokkos::ArithTraits<ScalarA>::zero());
+      refQ[i].resize(n, Kokkos::ArithTraits<ScalarA>::zero());
     }
 
     std::vector<std::vector<ScalarA>> refR(m);
     for (int i(0); i < m; ++i) {
-      refR[i].resize(n,Kokkos::ArithTraits<ScalarA>::zero());
+      refR[i].resize(n, Kokkos::ArithTraits<ScalarA>::zero());
     }
 
 #if 0
@@ -485,17 +461,17 @@ void impl_test_geqrf(int m, int n) {
 #endif
 
     if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
-      refQ[0][0].real() = -6./7.;
-      refQ[0][1].real() = 69./175.;
-      refQ[0][2].real() = 58./175.;
+      refQ[0][0].real() = -6. / 7.;
+      refQ[0][1].real() = 69. / 175.;
+      refQ[0][2].real() = 58. / 175.;
 
-      refQ[1][0].real() = -3./7.;
-      refQ[1][1].real() = -158./175.;
-      refQ[1][2].real() = -6./175.;
+      refQ[1][0].real() = -3. / 7.;
+      refQ[1][1].real() = -158. / 175.;
+      refQ[1][2].real() = -6. / 175.;
 
-      refQ[2][0].real() = 2./7.;
-      refQ[2][1].real() = -6./35.;
-      refQ[2][2].real() = 33./35.;
+      refQ[2][0].real() = 2. / 7.;
+      refQ[2][1].real() = -6. / 35.;
+      refQ[2][2].real() = 33. / 35.;
 
       refR[0][0].real() = -14.;
       refR[0][1].real() = -21.;
@@ -505,19 +481,18 @@ void impl_test_geqrf(int m, int n) {
       refR[1][2].real() = 70.;
 
       refR[2][2].real() = -35.;
-    }
-    else {
-      refQ[0][0] = -6./7.;
-      refQ[0][1] = 69./175.;
-      refQ[0][2] = 58./175.;
+    } else {
+      refQ[0][0] = -6. / 7.;
+      refQ[0][1] = 69. / 175.;
+      refQ[0][2] = 58. / 175.;
 
-      refQ[1][0] = -3./7.;
-      refQ[1][1] = -158./175.;
-      refQ[1][2] = -6./175.;
+      refQ[1][0] = -3. / 7.;
+      refQ[1][1] = -158. / 175.;
+      refQ[1][2] = -6. / 175.;
 
-      refQ[2][0] = 2./7.;
-      refQ[2][1] = -6./35.;
-      refQ[2][2] = 33./35.;
+      refQ[2][0] = 2. / 7.;
+      refQ[2][1] = -6. / 35.;
+      refQ[2][2] = 33. / 35.;
 
       refR[0][0] = -14.;
       refR[0][1] = -21.;
@@ -533,16 +508,15 @@ void impl_test_geqrf(int m, int n) {
       bool test_flag_Q = true;
       for (int i(0); (i < m) && test_flag_Q; ++i) {
         for (int j(0); (j < n) && test_flag_Q; ++j) {
-          if (ats::abs(h_Q(i,j) - refQ[i][j]) > absTol) {
+          if (ats::abs(h_Q(i, j) - refQ[i][j]) > absTol) {
             std::cout << "Q checking"
-                      << ", m = " << m
-                      << ", n = " << n
-                      << ", i = " << i
+                      << ", m = " << m << ", n = " << n << ", i = " << i
                       << ", j = " << j
-                      << ", h_Q(i,j) = "  << std::setprecision(16) << h_Q(i,j)
+                      << ", h_Q(i,j) = " << std::setprecision(16) << h_Q(i, j)
                       << ", refQ(i,j) = " << std::setprecision(16) << refQ[i][j]
-                      << ", |diff| = "    << std::setprecision(16) << ats::abs(h_Q(i,j) - refQ[i][j])
-                      << ", absTol = "    << std::setprecision(16) << absTol
+                      << ", |diff| = " << std::setprecision(16)
+                      << ats::abs(h_Q(i, j) - refQ[i][j])
+                      << ", absTol = " << std::setprecision(16) << absTol
                       << std::endl;
             test_flag_Q = false;
           }
@@ -555,16 +529,15 @@ void impl_test_geqrf(int m, int n) {
       bool test_flag_R = true;
       for (int i(0); (i < m) && test_flag_R; ++i) {
         for (int j(0); (j < n) && test_flag_R; ++j) {
-          if (ats::abs(h_R(i,j) - refR[i][j]) > absTol) {
+          if (ats::abs(h_R(i, j) - refR[i][j]) > absTol) {
             std::cout << "R checking"
-                      << ", m = " << m
-                      << ", n = " << n
-                      << ", i = " << i
+                      << ", m = " << m << ", n = " << n << ", i = " << i
                       << ", j = " << j
-                      << ", h_R(i,j) = "  << std::setprecision(16) << h_R(i,j)
+                      << ", h_R(i,j) = " << std::setprecision(16) << h_R(i, j)
                       << ", refR(i,j) = " << std::setprecision(16) << refR[i][j]
-                      << ", |diff| = "    << std::setprecision(16) << ats::abs(h_R(i,j) - refR[i][j])
-                      << ", absTol = "    << std::setprecision(16) << absTol
+                      << ", |diff| = " << std::setprecision(16)
+                      << ats::abs(h_R(i, j) - refR[i][j])
+                      << ", absTol = " << std::setprecision(16) << absTol
                       << std::endl;
             test_flag_R = false;
           }
@@ -581,16 +554,16 @@ void impl_test_geqrf(int m, int n) {
     bool test_flag_QR = true;
     for (int i(0); (i < m) && test_flag_QR; ++i) {
       for (int j(0); (j < n) && test_flag_QR; ++j) {
-        if (ats::abs(h_QR(i,j) - h_Aorig(i,j)) > absTol) {
+        if (ats::abs(h_QR(i, j) - h_Aorig(i, j)) > absTol) {
           std::cout << "QR checking"
-                    << ", m = " << m
-                    << ", n = " << n
-                    << ", i = " << i
+                    << ", m = " << m << ", n = " << n << ", i = " << i
                     << ", j = " << j
-                    << ", h_Aorig(i,j) = " << std::setprecision(16) << h_Aorig(i,j)
-                    << ", h_QR(i,j) = "    << std::setprecision(16) << h_QR(i,j)
-                    << ", |diff| = "       << std::setprecision(16) << ats::abs(h_QR(i,j) - h_Aorig(i,j))
-                    << ", absTol = "       << std::setprecision(16) << absTol
+                    << ", h_Aorig(i,j) = " << std::setprecision(16)
+                    << h_Aorig(i, j)
+                    << ", h_QR(i,j) = " << std::setprecision(16) << h_QR(i, j)
+                    << ", |diff| = " << std::setprecision(16)
+                    << ats::abs(h_QR(i, j) - h_Aorig(i, j))
+                    << ", absTol = " << std::setprecision(16) << absTol
                     << std::endl;
           test_flag_QR = false;
         }

From 89bb40c6ad797aa4391bfb411da1c2ea7d6232ac Mon Sep 17 00:00:00 2001
From: Ernesto Prudencio <eeprude@sandia.gov>
Date: Sat, 25 May 2024 18:41:47 -0600
Subject: [PATCH 27/27] Backup

---
 lapack/unit_test/Test_Lapack_geqrf.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
index 7453e2fb3e..0ec9d2679a 100644
--- a/lapack/unit_test/Test_Lapack_geqrf.hpp
+++ b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -191,7 +191,8 @@ void getQR(int const m, int const n, typename ViewTypeA::HostMirror const& h_A,
 
 template <class ViewTypeA, class ViewTypeTau, class Device>
 void impl_test_geqrf(int m, int n) {
-  using ViewTypeInfo    = Kokkos::View<int*, Kokkos::LayoutLeft, Device>;
+  using ALayout_t       = typename ViewTypeA::array_layout;
+  using ViewTypeInfo    = Kokkos::View<int*, ALayout_t, Device>;
   using execution_space = typename Device::execution_space;
   using ScalarA         = typename ViewTypeA::value_type;
   using ats             = Kokkos::ArithTraits<ScalarA>;