From f338e5f79efbaf5ee5cbc9e9f018d32a7f54f6f0 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Tue, 7 May 2024 04:21:53 -0600 Subject: [PATCH 01/27] Backup --- .../KokkosLapack_geqrf_eti_spec_inst.cpp.in | 26 + .../KokkosLapack_geqrf_eti_spec_avail.hpp.in | 24 + lapack/impl/KokkosLapack_geqrf_impl.hpp | 34 ++ lapack/impl/KokkosLapack_geqrf_spec.hpp | 140 +++++ lapack/src/KokkosLapack_geqrf.hpp | 198 +++++++ .../KokkosLapack_geqrf_tpl_spec_avail.hpp | 163 +++++ .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 557 ++++++++++++++++++ lapack/unit_test/Test_Lapack_geqrf.hpp | 444 ++++++++++++++ 8 files changed, 1586 insertions(+) create mode 100644 lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in create mode 100644 lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in create mode 100644 lapack/impl/KokkosLapack_geqrf_impl.hpp create mode 100644 lapack/impl/KokkosLapack_geqrf_spec.hpp create mode 100644 lapack/src/KokkosLapack_geqrf.hpp create mode 100644 lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp create mode 100644 lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp create mode 100644 lapack/unit_test/Test_Lapack_geqrf.hpp diff --git a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..9558d0f6cc --- /dev/null +++ b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in @@ -0,0 +1,26 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" +#include "KokkosLapack_geqrf_spec.hpp" + +namespace KokkosLapack { +namespace Impl { +@LAPACK_GEQRF_ETI_INST_BLOCK@ + } //IMPL +} //Kokkos diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..c4619b9c07 --- /dev/null +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in @@ -0,0 +1,24 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_ +namespace KokkosLapack { +namespace Impl { +@LAPACK_GEQRF_ETI_AVAIL_BLOCK@ + } //IMPL +} //Kokkos +#endif diff --git a/lapack/impl/KokkosLapack_geqrf_impl.hpp b/lapack/impl/KokkosLapack_geqrf_impl.hpp new file mode 100644 index 0000000000..ea20018073 --- /dev/null +++ b/lapack/impl/KokkosLapack_geqrf_impl.hpp @@ -0,0 +1,34 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_IMPL_GEQRF_HPP_ +#define KOKKOSLAPACK_IMPL_GEQRF_HPP_ + +/// \file KokkosLapack_geqrf_impl.hpp +/// \brief Implementation(s) of dense linear solve. + +#include +#include + +namespace KokkosLapack { +namespace Impl { + +// NOTE: Might add the implementation of KokkosLapack::geqrf later + +} // namespace Impl +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_IMPL_GEQRF_HPP diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp new file mode 100644 index 0000000000..d0083cb151 --- /dev/null +++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp @@ -0,0 +1,140 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_ +#define KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_ + +#include +#include +#include + +// Include the actual functors +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#include +#endif + +namespace KokkosLapack { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct geqrf_eti_spec_avail { + enum : bool { value = false }; +}; +} // namespace Impl +} // namespace KokkosLapack + +// +// Macro for declaration of full specialization availability +// KokkosLapack::Impl::GEQRF. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _INST macro below) across one or +// more .cpp files. +// +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct geqrf_eti_spec_avail< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +// Include the actual specialization declarations +#include +#include + +namespace KokkosLapack { +namespace Impl { + +// Unification layer +/// \brief Implementation of KokkosLapack::geqrf. + +template ::value, + bool eti_spec_avail = + geqrf_eti_spec_avail::value> +struct GEQRF { + static void geqrf(const ExecutionSpace &space, const AMatrix &A, const TWArray &Tau, + const TWArray &Work); +}; + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +//! Full specialization of geqrf for multi vectors. +// Unification layer +template +struct GEQRF { + static void geqrf(const ExecutionSpace & /* space */, const AMatrix & /* A */, + const TWArray & /* Tau */, const TWArray & /* Work */) { + // NOTE: Might add the implementation of KokkosLapack::geqrf later + throw std::runtime_error( + "No fallback implementation of GEQRF (general QR factorization) " + "exists. Enable LAPACK, CUSOLVER, ROCSOLVER or MAGMA TPL."); + } +}; + +#endif +} // namespace Impl +} // namespace KokkosLapack + +// +// Macro for declaration of full specialization of +// KokkosLapack::Impl::GEQRF. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _DEF macro below) across one or +// more .cpp files. +// +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct GEQRF< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct GEQRF< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +#include + +#endif // KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_ diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp new file mode 100644 index 0000000000..ba360ad830 --- /dev/null +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -0,0 +1,198 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/// \file KokkosLapack_geqrf.hpp +/// \brief Local dense linear solve +/// +/// This file provides KokkosLapack::geqrf. This function performs a +/// local (no MPI) QR factorization of a M-by-N matrix A. + +#ifndef KOKKOSLAPACK_GEQRF_HPP_ +#define KOKKOSLAPACK_GEQRF_HPP_ + +#include + +#include "KokkosLapack_geqrf_spec.hpp" +#include "KokkosKernels_Error.hpp" + +namespace KokkosLapack { + +/// \brief Computes a QR factorization of a matrix A +/// +/// \tparam ExecutionSpace the space where the kernel will run. +/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. +/// \tparam TWArray Type of arrays Tau and Work, as a 1-D Kokkos::View. +/// +/// \param space [in] Execution space instance used to specified how to execute +/// the geqrf kernels. +/// \param A [in,out] On entry, the M-by-N matrix to be factorized. +/// On exit, the elements on and above the diagonal contain +/// the min(M,N)-by-N upper trapezoidal matrix R (R is +/// upper triangular if M >= N); the elements below the +/// diagonal, with the array Tau, represent the unitary +/// matrix Q as a product of min(M,N) elementary reflectors. +/// \param Tau [out] One-dimensional array of size min(M,N) that contain +/// the scalar factors of the elementary reflectors. +/// \param Work [out] One-dimensional array of size max(1,LWORK). +/// If min(M,N) == 0, then LWORK must be >= 1. +/// If min(M,N) != 0, then LWORK must be >= N. +/// If the QR factorization is successful, then the first +/// position of Work contains the optimal LWORK. +/// +template +void geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, + const TWArray& Work) { + // NOTE: Currently, KokkosLapack::geqrf only supports LAPACK, MAGMA and + // rocSOLVER TPLs. + // MAGMA/rocSOLVER TPL should be enabled to call the MAGMA/rocSOLVER GPU + // interface for device views LAPACK TPL should be enabled to call the + // LAPACK interface for host views + + static_assert( + Kokkos::SpaceAccessibility::accessible); + static_assert( + Kokkos::SpaceAccessibility::accessible); +#if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) + if constexpr (!std::is_same_v) { + static_assert( + Kokkos::SpaceAccessibility::accessible); + } +#else + static_assert( + Kokkos::SpaceAccessibility::accessible); +#endif + static_assert(Kokkos::is_view::value, + "KokkosLapack::geqrf: A must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosLapack::geqrf: B must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosLapack::geqrf: IPIV must be a Kokkos::View."); + static_assert(static_cast(AMatrix::rank) == 2, + "KokkosLapack::geqrf: A must have rank 2."); + static_assert( + static_cast(BXMV::rank) == 1 || static_cast(BXMV::rank) == 2, + "KokkosLapack::geqrf: B must have either rank 1 or rank 2."); + static_assert(static_cast(IPIVV::rank) == 1, + "KokkosLapack::geqrf: IPIV must have rank 1."); + + int64_t IPIV0 = IPIV.extent(0); + int64_t A0 = A.extent(0); + int64_t A1 = A.extent(1); + int64_t B0 = B.extent(0); + + // Check validity of pivot argument + bool valid_pivot = + (IPIV0 == A1) || ((IPIV0 == 0) && (IPIV.data() == nullptr)); + if (!(valid_pivot)) { + std::ostringstream os; + os << "KokkosLapack::geqrf: IPIV: " << IPIV0 << ". " + << "Valid options include zero-extent 1-D view (no pivoting), or 1-D " + "View with size of " + << A0 << " (partial pivoting)."; +g KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + // Check for no pivoting case. Only MAGMA supports no pivoting interface +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL + if ((!std::is_same::value) && + (IPIV0 == 0) && (IPIV.data() == nullptr)) { + std::ostringstream os; + os << "KokkosLapack::geqrf: IPIV: " << IPIV0 << ". " + << "LAPACK TPL does not support no pivoting."; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } +#endif +#else // not have MAGMA TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // but have LAPACK TPL + if ((IPIV0 == 0) && (IPIV.data() == nullptr)) { + std::ostringstream os; + os << "KokkosLapack::geqrf: IPIV: " << IPIV0 << ". " + << "LAPACK TPL does not support no pivoting."; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } +#endif +#endif + + // Check compatibility of dimensions at run time. + if ((A0 < A1) || (A0 != B0)) { + std::ostringstream os; + os << "KokkosLapack::geqrf: Dimensions of A, and B do not match: " + << " A: " << A.extent(0) << " x " << A.extent(1) << " B: " << B.extent(0) + << " x " << B.extent(1); + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + typedef Kokkos::View< + typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, + typename AMatrix::device_type, Kokkos::MemoryTraits > + AMatrix_Internal; + typedef Kokkos::View > + BXMV_Internal; + typedef Kokkos::View< + typename IPIVV::non_const_value_type*, typename IPIVV::array_layout, + typename IPIVV::device_type, Kokkos::MemoryTraits > + IPIVV_Internal; + AMatrix_Internal A_i = A; + // BXMV_Internal B_i = B; + IPIVV_Internal IPIV_i = IPIV; + + if (BXMV::rank == 1) { + auto B_i = BXMV_Internal(B.data(), B.extent(0), 1); + KokkosLapack::Impl::GEQRF::geqrf(space, A_i, B_i, IPIV_i); + } else { // BXMV::rank == 2 + auto B_i = BXMV_Internal(B.data(), B.extent(0), B.extent(1)); + KokkosLapack::Impl::GEQRF::geqrf(space, A_i, B_i, IPIV_i); + } +} + +/// \brief Computes a QR factorization of a matrix A +/// +/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. +/// \tparam TWArray Type of arrays Tau and Work, as a 1-D Kokkos::View. +/// +/// \param A [in,out] On entry, the M-by-N matrix to be factorized. +/// On exit, the elements on and above the diagonal contain +/// the min(M,N)-by-N upper trapezoidal matrix R (R is +/// upper triangular if M >= N); the elements below the +/// diagonal, with the array Tau, represent the unitary +/// matrix Q as a product of min(M,N) elementary reflectors. +/// \param Tau [out] One-dimensional array of size min(M,N) that contain +/// the scalar factors of the elementary reflectors. +/// \param Work [out] One-dimensional array of size max(1,LWORK). +/// If min(M,N) == 0, then LWORK must be >= 1. +/// If min(M,N) != 0, then LWORK must be >= N. +/// If the QR factorization is successful, then the first +/// position of Work contains the optimal LWORK. +/// +template +void geqrf(const AMatrix& A, const TWArray& Tau, const TWArray& Work) { + typename AMatrix::execution_space space{}; + geqrf(space, A, Tau, Work); +} + +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_GEQRF_HPP_ diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp new file mode 100644 index 0000000000..733f0510e0 --- /dev/null +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp @@ -0,0 +1,163 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_HPP_ + +namespace KokkosLapack { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct geqrf_tpl_spec_avail { + enum : bool { value = false }; +}; + +// Generic Host side LAPACK (could be MKL or whatever) +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK + +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct geqrf_tpl_spec_avail< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +} // namespace Impl +} // namespace KokkosLapack + +// MAGMA +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA +#include "magma_v2.h" + +namespace KokkosLapack { +namespace Impl { +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct geqrf_tpl_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +namespace KokkosLapack { +namespace Impl { + +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct geqrf_tpl_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // CUSOLVER + +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#include + +namespace KokkosLapack { +namespace Impl { + +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct geqrf_tpl_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HIPSpace) + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER + +#endif diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp new file mode 100644 index 0000000000..8a5b37812d --- /dev/null +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -0,0 +1,557 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_ +#define KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_ + +namespace KokkosLapack { +namespace Impl { +template +inline void geqrf_print_specialization() { +#ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA + printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s, %s >\n", + typeid(AViewType).name(), typeid(BViewType).name(), + typeid(PViewType).name()); +#else +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK + printf("KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n", + typeid(AViewType).name(), typeid(BViewType).name(), + typeid(PViewType).name()); +#endif +#endif +#endif +} +} // namespace Impl +} // namespace KokkosLapack + +// Generic Host side LAPACK (could be MKL or whatever) +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK +#include + +namespace KokkosLapack { +namespace Impl { + +template +void lapackGeqrfWrapper(const AViewType& A, const BViewType& B, + const IPIVViewType& IPIV) { + using Scalar = typename AViewType::non_const_value_type; + + const bool with_pivot = !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); + + const int N = static_cast(A.extent(1)); + const int AST = static_cast(A.stride(1)); + const int LDA = (AST == 0) ? 1 : AST; + const int BST = static_cast(B.stride(1)); + const int LDB = (BST == 0) ? 1 : BST; + const int NRHS = static_cast(B.extent(1)); + + int info = 0; + + if (with_pivot) { + if constexpr (Kokkos::ArithTraits::is_complex) { + using MagType = typename Kokkos::ArithTraits::mag_type; + + HostLapack>::geqrf( + N, NRHS, reinterpret_cast*>(A.data()), LDA, + IPIV.data(), reinterpret_cast*>(B.data()), LDB, + info); + } else { + HostLapack::geqrf(N, NRHS, A.data(), LDA, IPIV.data(), B.data(), + LDB, info); + } + } +} + +#define KOKKOSLAPACK_GEQRF_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE) \ + template <> \ + struct GEQRF< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + geqrf_eti_spec_avail< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using BViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void geqrf(const EXECSPACE& /* space */, const AViewType& A, \ + const BViewType& B, const PViewType& IPIV) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR \ + "]"); \ + geqrf_print_specialization(); \ + lapackGeqrfWrapper(A, B, IPIV); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#if defined(KOKKOS_ENABLE_SERIAL) +KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Serial, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Serial, Kokkos::HostSpace) +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::OpenMP, Kokkos::HostSpace) +#endif + +#if defined(KOKKOS_ENABLE_THREADS) +KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Threads, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Threads, + Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads, Kokkos::HostSpace) +KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads, Kokkos::HostSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK + +// MAGMA +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA +#include + +namespace KokkosLapack { +namespace Impl { + +template +void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A, + const BViewType& B, const IPIVViewType& IPIV) { + using scalar_type = typename AViewType::non_const_value_type; + + Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_MAGMA," + + Kokkos::ArithTraits::name() + "]"); + geqrf_print_specialization(); + + const bool with_pivot = !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); + + magma_int_t N = static_cast(A.extent(1)); + magma_int_t AST = static_cast(A.stride(1)); + magma_int_t LDA = (AST == 0) ? 1 : AST; + magma_int_t BST = static_cast(B.stride(1)); + magma_int_t LDB = (BST == 0) ? 1 : BST; + magma_int_t NRHS = static_cast(B.extent(1)); + + KokkosLapack::Impl::MagmaSingleton& s = + KokkosLapack::Impl::MagmaSingleton::singleton(); + magma_int_t info = 0; + + space.fence(); + if constexpr (std::is_same_v) { + if (with_pivot) { + magma_sgeqrf_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, + IPIV.data(), reinterpret_cast(B.data()), + LDB, &info); + } else { + magma_sgeqrf_nopiv_gpu(N, NRHS, reinterpret_cast(A.data()), + LDA, reinterpret_cast(B.data()), + LDB, &info); + } + } + + if constexpr (std::is_same_v) { + if (with_pivot) { + magma_dgeqrf_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, + IPIV.data(), reinterpret_cast(B.data()), + LDB, &info); + } else { + magma_dgeqrf_nopiv_gpu( + N, NRHS, reinterpret_cast(A.data()), LDA, + reinterpret_cast(B.data()), LDB, &info); + } + } + + if constexpr (std::is_same_v>) { + if (with_pivot) { + magma_cgeqrf_gpu( + N, NRHS, reinterpret_cast(A.data()), LDA, + IPIV.data(), reinterpret_cast(B.data()), LDB, + &info); + } else { + magma_cgeqrf_nopiv_gpu( + N, NRHS, reinterpret_cast(A.data()), LDA, + reinterpret_cast(B.data()), LDB, &info); + } + } + + if constexpr (std::is_same_v>) { + if (with_pivot) { + magma_zgeqrf_gpu( + N, NRHS, reinterpret_cast(A.data()), LDA, + IPIV.data(), reinterpret_cast(B.data()), LDB, + &info); + } else { + magma_zgeqrf_nopiv_gpu( + N, NRHS, reinterpret_cast(A.data()), LDA, + reinterpret_cast(B.data()), LDB, &info); + } + } + ExecSpace().fence(); + Kokkos::Profiling::popRegion(); +} + +#define KOKKOSLAPACK_GEQRF_MAGMA(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GEQRF< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + geqrf_eti_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using BViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PViewType = Kokkos::View< \ + magma_int_t*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + \ + static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ + const BViewType& B, const PViewType& IPIV) { \ + magmaGeqrfWrapper(space, A, B, IPIV); \ + } \ + }; + +KOKKOSLAPACK_GEQRF_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#include "KokkosLapack_cusolver.hpp" + +namespace KokkosLapack { +namespace Impl { + +template +void cusolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, + const AViewType& A, const BViewType& B) { + using memory_space = typename AViewType::memory_space; + using Scalar = typename BViewType::non_const_value_type; + using ALayout_t = typename AViewType::array_layout; + using BLayout_t = typename BViewType::array_layout; + + const int m = A.extent_int(0); + const int n = A.extent_int(1); + const int lda = std::is_same_v ? A.stride(0) + : A.stride(1); + + (void)B; + + const int nrhs = B.extent_int(1); + const int ldb = std::is_same_v ? B.stride(0) + : B.stride(1); + int lwork = 0; + Kokkos::View info("getrf info"); + + CudaLapackSingleton& s = CudaLapackSingleton::singleton(); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSetStream(s.handle, space.cuda_stream())); + if constexpr (std::is_same_v) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); + Kokkos::View Workspace("getrf workspace", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgetrf(s.handle, m, n, A.data(), + lda, Workspace.data(), + IPIV.data(), info.data())); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda, + IPIV.data(), B.data(), ldb, info.data())); + } + if constexpr (std::is_same_v) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnDgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); + Kokkos::View Workspace("getrf workspace", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgetrf(s.handle, m, n, A.data(), + lda, Workspace.data(), + IPIV.data(), info.data())); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnDgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda, + IPIV.data(), B.data(), ldb, info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrf_bufferSize( + s.handle, m, n, reinterpret_cast(A.data()), lda, &lwork)); + Kokkos::View Workspace("getrf workspace", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnCgetrf(s.handle, m, n, reinterpret_cast(A.data()), + lda, reinterpret_cast(Workspace.data()), + IPIV.data(), info.data())); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrs( + s.handle, CUBLAS_OP_N, m, nrhs, reinterpret_cast(A.data()), + lda, IPIV.data(), reinterpret_cast(B.data()), ldb, + info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrf_bufferSize( + s.handle, m, n, reinterpret_cast(A.data()), lda, + &lwork)); + Kokkos::View Workspace("getrf workspace", + lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrf( + s.handle, m, n, reinterpret_cast(A.data()), lda, + reinterpret_cast(Workspace.data()), IPIV.data(), + info.data())); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrs( + s.handle, CUBLAS_OP_N, m, nrhs, + reinterpret_cast(A.data()), lda, IPIV.data(), + reinterpret_cast(B.data()), ldb, info.data())); + } + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL)); +} + +#define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GEQRF< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + geqrf_eti_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using BViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ + const BViewType& B, const PViewType& IPIV) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR \ + "]"); \ + geqrf_print_specialization(); \ + \ + cusolverGeqrfWrapper(space, IPIV, A, B); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER + +// ROCSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#include +#include + +namespace KokkosLapack { +namespace Impl { + +template +void rocsolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, + const AViewType& A, const BViewType& B) { + using Scalar = typename BViewType::non_const_value_type; + using ALayout_t = typename AViewType::array_layout; + using BLayout_t = typename BViewType::array_layout; + + const rocblas_int N = static_cast(A.extent(0)); + const rocblas_int nrhs = static_cast(B.extent(1)); + const rocblas_int lda = std::is_same_v + ? A.stride(0) + : A.stride(1); + const rocblas_int ldb = std::is_same_v + ? B.stride(0) + : B.stride(1); + Kokkos::View info("rocsolver info"); + + KokkosBlas::Impl::RocBlasSingleton& s = + KokkosBlas::Impl::RocBlasSingleton::singleton(); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocblas_set_stream(s.handle, space.hip_stream())); + if constexpr (std::is_same_v) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_sgeqrf(s.handle, N, nrhs, A.data(), + lda, IPIV.data(), B.data(), + ldb, info.data())); + } + if constexpr (std::is_same_v) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_dgeqrf(s.handle, N, nrhs, A.data(), + lda, IPIV.data(), B.data(), + ldb, info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgeqrf( + s.handle, N, nrhs, reinterpret_cast(A.data()), + lda, IPIV.data(), reinterpret_cast(B.data()), + ldb, info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgeqrf( + s.handle, N, nrhs, reinterpret_cast(A.data()), + lda, IPIV.data(), reinterpret_cast(B.data()), + ldb, info.data())); + } + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); +} + +#define KOKKOSLAPACK_GEQRF_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GEQRF< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + geqrf_eti_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using BViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void geqrf(const Kokkos::HIP& space, const AViewType& A, \ + const BViewType& B, const PViewType& IPIV) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]"); \ + geqrf_print_specialization(); \ + \ + rocsolverGeqrfWrapper(space, IPIV, A, B); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSLAPACK_GEQRF_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HIPSpace) + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER + +#endif diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp new file mode 100644 index 0000000000..f9e93180b1 --- /dev/null +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -0,0 +1,444 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +// only enable this test where KokkosLapack supports geqrf: +// CUDA+(MAGMA or CUSOLVER), HIP+ROCSOLVER and HOST+LAPACK +#if (defined(TEST_CUDA_LAPACK_CPP) && \ + (defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) || \ + defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER))) || \ + (defined(TEST_HIP_LAPACK_CPP) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ + (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ + (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \ + defined(TEST_THREADS_LAPACK_CPP))) + +#include +#include +#include + +#include +#include +#include +#include + +namespace Test { + +template +void impl_test_geqrf(const char* mode, const char* padding, int N) { + using execution_space = typename Device::execution_space; + using ScalarA = typename ViewTypeA::value_type; + using ats = Kokkos::ArithTraits; + + execution_space space{}; + + Kokkos::Random_XorShift64_Pool rand_pool(13718); + + int ldda, lddb; + + if (padding[0] == 'Y') { // rounded up to multiple of 32 + ldda = ((N + 32 - 1) / 32) * 32; + lddb = ldda; + } else { + ldda = N; + lddb = N; + } + + // Create device views + ViewTypeA A("A", ldda, N); + ViewTypeB X0("X0", N); + ViewTypeB B("B", lddb); + + // Create host mirrors of device views. + typename ViewTypeB::HostMirror h_X0 = Kokkos::create_mirror_view(X0); + typename ViewTypeB::HostMirror h_B = Kokkos::create_mirror(B); + + // Initialize data. + Kokkos::fill_random( + A, rand_pool, + Kokkos::rand, ScalarA>::max()); + Kokkos::fill_random( + X0, rand_pool, + Kokkos::rand, ScalarA>::max()); + + // Generate RHS B = A*X0. + ScalarA alpha = 1.0; + ScalarA beta = 0.0; + + KokkosBlas::gemv("N", alpha, A, X0, beta, B); + Kokkos::fence(); + + // Deep copy device view to host view. + Kokkos::deep_copy(h_X0, X0); + + // Allocate IPIV view on host + using ViewTypeP = typename std::conditional< + MAGMA, Kokkos::View, + Kokkos::View>::type; + ViewTypeP ipiv; + int Nt = 0; + if (mode[0] == 'Y') { + Nt = N; + ipiv = ViewTypeP("IPIV", Nt); + } + + // Solve. + try { + KokkosLapack::geqrf(space, A, B, ipiv); + } catch (const std::runtime_error& error) { + // Check for expected runtime errors due to: + // no-pivoting case (note: only MAGMA supports no-pivoting interface) + // and no-tpl case + bool nopivot_runtime_err = false; + bool notpl_runtime_err = false; +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL + nopivot_runtime_err = (!std::is_same::value) && + (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); + notpl_runtime_err = false; +#else + notpl_runtime_err = true; +#endif +#else // not have MAGMA TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // but have LAPACK TPL + nopivot_runtime_err = (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); + notpl_runtime_err = false; +#else + notpl_runtime_err = true; +#endif +#endif + if (!nopivot_runtime_err && !notpl_runtime_err) FAIL(); + return; + } + Kokkos::fence(); + + // Get the solution vector. + Kokkos::deep_copy(h_B, B); + + // Checking vs ref on CPU, this eps is about 10^-9 + typedef typename ats::mag_type mag_type; + const mag_type eps = 3.0e7 * ats::epsilon(); + bool test_flag = true; + for (int i = 0; i < N; i++) { + if (ats::abs(h_B(i) - h_X0(i)) > eps) { + test_flag = false; + printf( + " Error %d, pivot %c, padding %c: result( %.15lf ) !=" + "solution( %.15lf ) at (%d), error=%.15e, eps=%.15e\n", + N, mode[0], padding[0], ats::abs(h_B(i)), ats::abs(h_X0(i)), int(i), + ats::abs(h_B(i) - h_X0(i)), eps); + break; + } + } + ASSERT_EQ(test_flag, true); +} + +template +void impl_test_geqrf_mrhs(const char* mode, const char* padding, int N, + int nrhs) { + using execution_space = typename Device::execution_space; + using ScalarA = typename ViewTypeA::value_type; + using ats = Kokkos::ArithTraits; + + execution_space space{}; + + Kokkos::Random_XorShift64_Pool rand_pool(13718); + + int ldda, lddb; + + if (padding[0] == 'Y') { // rounded up to multiple of 32 + ldda = ((N + 32 - 1) / 32) * 32; + lddb = ldda; + } else { + ldda = N; + lddb = N; + } + + // Create device views + ViewTypeA A("A", ldda, N); + ViewTypeB X0("X0", N, nrhs); + ViewTypeB B("B", lddb, nrhs); + + // Create host mirrors of device views. + typename ViewTypeB::HostMirror h_X0 = Kokkos::create_mirror_view(X0); + typename ViewTypeB::HostMirror h_B = Kokkos::create_mirror(B); + + // Initialize data. + Kokkos::fill_random( + A, rand_pool, + Kokkos::rand, ScalarA>::max()); + Kokkos::fill_random( + X0, rand_pool, + Kokkos::rand, ScalarA>::max()); + + // Generate RHS B = A*X0. + ScalarA alpha = 1.0; + ScalarA beta = 0.0; + + KokkosBlas::gemm("N", "N", alpha, A, X0, beta, B); + Kokkos::fence(); + + // Deep copy device view to host view. + Kokkos::deep_copy(h_X0, X0); + + // Allocate IPIV view on host + using ViewTypeP = typename std::conditional< + MAGMA, Kokkos::View, + Kokkos::View>::type; + ViewTypeP ipiv; + int Nt = 0; + if (mode[0] == 'Y') { + Nt = N; + ipiv = ViewTypeP("IPIV", Nt); + } + + // Solve. + try { + KokkosLapack::geqrf(space, A, B, ipiv); + } catch (const std::runtime_error& error) { + // Check for expected runtime errors due to: + // no-pivoting case (note: only MAGMA supports no-pivoting interface) + // and no-tpl case + bool nopivot_runtime_err = false; + bool notpl_runtime_err = false; +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL + nopivot_runtime_err = (!std::is_same::value) && + (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); + notpl_runtime_err = false; +#else + notpl_runtime_err = true; +#endif +#else // not have MAGMA TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // but have LAPACK TPL + nopivot_runtime_err = (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); + notpl_runtime_err = false; +#else + notpl_runtime_err = true; +#endif +#endif + if (!nopivot_runtime_err && !notpl_runtime_err) FAIL(); + return; + } + Kokkos::fence(); + + // Get the solution vector. + Kokkos::deep_copy(h_B, B); + + // Checking vs ref on CPU, this eps is about 10^-9 + typedef typename ats::mag_type mag_type; + const mag_type eps = 1.0e7 * ats::epsilon(); + bool test_flag = true; + for (int j = 0; j < nrhs; j++) { + for (int i = 0; i < N; i++) { + if (ats::abs(h_B(i, j) - h_X0(i, j)) > eps) { + test_flag = false; + // printf( " Error %d, pivot %c, padding %c: result( %.15lf ) != + // solution( %.15lf ) at (%ld) at rhs %d\n", N, mode[0], padding[0], + // ats::abs(h_B(i,j)), ats::abs(h_X0(i,j)), i, j ); + break; + } + } + if (test_flag == false) break; + } + ASSERT_EQ(test_flag, true); +} + +} // namespace Test + +template +int test_geqrf(const char* mode) { +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + using view_type_a_ll = Kokkos::View; + using view_type_b_ll = Kokkos::View; + +#if (defined(TEST_CUDA_LAPACK_CPP) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ + (defined(TEST_HIP_LAPACK_CPP) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ + (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ + (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \ + defined(TEST_THREADS_LAPACK_CPP))) + Test::impl_test_geqrf( + &mode[0], "N", 2); // no padding + Test::impl_test_geqrf( + &mode[0], "N", 13); // no padding + Test::impl_test_geqrf( + &mode[0], "N", 179); // no padding + Test::impl_test_geqrf( + &mode[0], "N", 64); // no padding + Test::impl_test_geqrf( + &mode[0], "N", 1024); // no padding + +#elif defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && defined(KOKKOS_ENABLE_CUDA) + if constexpr (std::is_same_v) { + Test::impl_test_geqrf( + &mode[0], "N", 2); // no padding + Test::impl_test_geqrf( + &mode[0], "N", 13); // no padding + Test::impl_test_geqrf( + &mode[0], "N", 179); // no padding + Test::impl_test_geqrf( + &mode[0], "N", 64); // no padding + Test::impl_test_geqrf( + &mode[0], "N", 1024); // no padding + + Test::impl_test_geqrf( + &mode[0], "Y", + 13); // padding + Test::impl_test_geqrf( + &mode[0], "Y", + 179); // padding + } +#endif +#endif + + // Supress unused parameters on CUDA10 + (void)mode; + return 1; +} + +template +int test_geqrf_mrhs(const char* mode) { +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + using view_type_a_ll = Kokkos::View; + using view_type_b_ll = Kokkos::View; + +#if (defined(TEST_CUDA_LAPACK_CPP) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ + (defined(TEST_HIP_LAPACK_CPP) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ + (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ + (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \ + defined(TEST_THREADS_LAPACK_CPP))) + Test::impl_test_geqrf_mrhs( + &mode[0], "N", 2, 5); // no padding + Test::impl_test_geqrf_mrhs( + &mode[0], "N", 13, 5); // no padding + Test::impl_test_geqrf_mrhs( + &mode[0], "N", 179, 5); // no padding + Test::impl_test_geqrf_mrhs( + &mode[0], "N", 64, 5); // no padding + Test::impl_test_geqrf_mrhs( + &mode[0], "N", 1024, 5); // no padding + +// When appropriate run MAGMA specific tests +#elif defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && defined(KOKKOS_ENABLE_CUDA) + if constexpr (std::is_same_v) { + Test::impl_test_geqrf_mrhs( + &mode[0], "N", 2, 5); // no padding + Test::impl_test_geqrf_mrhs( + &mode[0], "N", 13, 5); // no padding + Test::impl_test_geqrf_mrhs( + &mode[0], "N", 179, 5); // no padding + Test::impl_test_geqrf_mrhs( + &mode[0], "N", 64, 5); // no padding + Test::impl_test_geqrf_mrhs( + &mode[0], "N", 1024, 5); // no padding + + Test::impl_test_geqrf_mrhs( + &mode[0], "Y", 13, 5); // padding + Test::impl_test_geqrf_mrhs( + &mode[0], "Y", 179, 5); // padding + } +#endif +#endif + + // Supress unused parameters on CUDA10 + (void)mode; + return 1; +} + +#if defined(KOKKOSKERNELS_INST_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, geqrf_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_float"); + test_geqrf("N"); // No pivoting + test_geqrf("Y"); // Partial pivoting + Kokkos::Profiling::popRegion(); +} + +TEST_F(TestCategory, geqrf_mrhs_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_float"); + test_geqrf_mrhs("N"); // No pivoting + test_geqrf_mrhs("Y"); // Partial pivoting + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, geqrf_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_double"); + test_geqrf("N"); // No pivoting + test_geqrf("Y"); // Partial pivoting + Kokkos::Profiling::popRegion(); +} + +TEST_F(TestCategory, geqrf_mrhs_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_double"); + test_geqrf_mrhs("N"); // No pivoting + test_geqrf_mrhs("Y"); // Partial pivoting + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, geqrf_complex_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_double"); + test_geqrf, TestDevice>("N"); // No pivoting + test_geqrf, TestDevice>("Y"); // Partial pivoting + Kokkos::Profiling::popRegion(); +} + +TEST_F(TestCategory, geqrf_mrhs_complex_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_complex_double"); + test_geqrf_mrhs, TestDevice>("N"); // No pivoting + test_geqrf_mrhs, TestDevice>("Y"); // Partial pivoting + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, geqrf_complex_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_float"); + test_geqrf, TestDevice>("N"); // No pivoting + test_geqrf, TestDevice>("Y"); // Partial pivoting + Kokkos::Profiling::popRegion(); +} + +TEST_F(TestCategory, geqrf_mrhs_complex_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_complex_float"); + test_geqrf_mrhs, TestDevice>("N"); // No pivoting + test_geqrf_mrhs, TestDevice>("Y"); // Partial pivoting + Kokkos::Profiling::popRegion(); +} +#endif + +#endif // CUDA+(MAGMA or CUSOLVER) or HIP+ROCSOLVER or LAPACK+HOST From 04fbf5fda48de0c56705799b59e0aca9607781c3 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Tue, 14 May 2024 05:46:53 -0600 Subject: [PATCH 02/27] Backup --- lapack/CMakeLists.txt | 7 + lapack/impl/KokkosLapack_geqrf_spec.hpp | 13 +- lapack/src/KokkosLapack_geqrf.hpp | 116 ++++-------- .../KokkosLapack_geqrf_tpl_spec_avail.hpp | 8 - .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 171 +++++------------- lapack/unit_test/Test_Lapack_geqrf.hpp | 2 + 6 files changed, 90 insertions(+), 227 deletions(-) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index 804a2b7542..2bd27c3681 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -71,3 +71,10 @@ KOKKOSKERNELS_GENERATE_ETI(Lapack_svd svd SOURCE_LIST SOURCES TYPE_LISTS FLOATS LAYOUTS DEVICES ) + +KOKKOSKERNELS_GENERATE_ETI(Lapack_geqrf geqrf + COMPONENTS lapack + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS LAYOUTS DEVICES +) diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp index d0083cb151..b990e7550f 100644 --- a/lapack/impl/KokkosLapack_geqrf_spec.hpp +++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp @@ -50,10 +50,7 @@ struct geqrf_eti_spec_avail { Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>> { \ enum : bool { value = true }; \ @@ -83,7 +80,7 @@ struct GEQRF { //! Full specialization of geqrf for multi vectors. // Unification layer template -struct GEQRF { static void geqrf(const ExecutionSpace & /* space */, const AMatrix & /* A */, const TWArray & /* Tau */, const TWArray & /* Work */) { @@ -115,9 +112,6 @@ struct GEQRF, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ false, true>; #define KOKKOSLAPACK_GEQRF_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ @@ -130,9 +124,6 @@ struct GEQRF, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ false, true>; #include diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index ba360ad830..e5c59c996e 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -66,106 +66,56 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, typename AMatrix::memory_space>::accessible); static_assert( Kokkos::SpaceAccessibility::accessible); -#if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) - if constexpr (!std::is_same_v) { - static_assert( - Kokkos::SpaceAccessibility::accessible); - } -#else - static_assert( - Kokkos::SpaceAccessibility::accessible); -#endif + typename TWArray::memory_space>::accessible); + static_assert(Kokkos::is_view::value, "KokkosLapack::geqrf: A must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosLapack::geqrf: B must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosLapack::geqrf: IPIV must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosLapack::geqrf: Tau and Work must be Kokkos::View."); static_assert(static_cast(AMatrix::rank) == 2, "KokkosLapack::geqrf: A must have rank 2."); - static_assert( - static_cast(BXMV::rank) == 1 || static_cast(BXMV::rank) == 2, - "KokkosLapack::geqrf: B must have either rank 1 or rank 2."); - static_assert(static_cast(IPIVV::rank) == 1, - "KokkosLapack::geqrf: IPIV must have rank 1."); - - int64_t IPIV0 = IPIV.extent(0); - int64_t A0 = A.extent(0); - int64_t A1 = A.extent(1); - int64_t B0 = B.extent(0); + static_assert(static_cast(TWArray::rank) == 1, + "KokkosLapack::geqrf: Tau and Work must have rank 1."); - // Check validity of pivot argument - bool valid_pivot = - (IPIV0 == A1) || ((IPIV0 == 0) && (IPIV.data() == nullptr)); - if (!(valid_pivot)) { - std::ostringstream os; - os << "KokkosLapack::geqrf: IPIV: " << IPIV0 << ". " - << "Valid options include zero-extent 1-D view (no pivoting), or 1-D " - "View with size of " - << A0 << " (partial pivoting)."; -g KokkosKernels::Impl::throw_runtime_exception(os.str()); - } + int64_t m = A.extent(0); + int64_t n = A.extent(1); - // Check for no pivoting case. Only MAGMA supports no pivoting interface -#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL - if ((!std::is_same::value) && - (IPIV0 == 0) && (IPIV.data() == nullptr)) { + // Check validity of dimensions + if (Tau.extent(0) != std::min(m,n)) { std::ostringstream os; - os << "KokkosLapack::geqrf: IPIV: " << IPIV0 << ". " - << "LAPACK TPL does not support no pivoting."; + os << "KokkosLapack::geqrf: length of Tau must be equal to min(m,n): " + << " A: " << m << " x " << n << ", Tau length = " << Tau.extent(0); KokkosKernels::Impl::throw_runtime_exception(os.str()); } -#endif -#else // not have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // but have LAPACK TPL - if ((IPIV0 == 0) && (IPIV.data() == nullptr)) { - std::ostringstream os; - os << "KokkosLapack::geqrf: IPIV: " << IPIV0 << ". " - << "LAPACK TPL does not support no pivoting."; - KokkosKernels::Impl::throw_runtime_exception(os.str()); + if ((m == 0) || (n == 0)) { + if (Work.extent(0) < 1) { + std::ostringstream os; + os << "KokkosLapack::geqrf: In case min(m,n) == 0, then Work must have length >= 1: " + << " A: " << m << " x " << n << ", Work length = " << Work.extent(0); + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } } -#endif -#endif - - // Check compatibility of dimensions at run time. - if ((A0 < A1) || (A0 != B0)) { - std::ostringstream os; - os << "KokkosLapack::geqrf: Dimensions of A, and B do not match: " - << " A: " << A.extent(0) << " x " << A.extent(1) << " B: " << B.extent(0) - << " x " << B.extent(1); - KokkosKernels::Impl::throw_runtime_exception(os.str()); + else { + if (Work.extent(0) < n) { + std::ostringstream os; + os << "KokkosLapack::geqrf: In case min(m,n) != 0, then Work must have length >= n: " + << " A: " << m << " x " << n << ", Work length = " << Work.extent(0); + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } } typedef Kokkos::View< typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, typename AMatrix::device_type, Kokkos::MemoryTraits > AMatrix_Internal; - typedef Kokkos::View > - BXMV_Internal; - typedef Kokkos::View< - typename IPIVV::non_const_value_type*, typename IPIVV::array_layout, - typename IPIVV::device_type, Kokkos::MemoryTraits > - IPIVV_Internal; - AMatrix_Internal A_i = A; - // BXMV_Internal B_i = B; - IPIVV_Internal IPIV_i = IPIV; - - if (BXMV::rank == 1) { - auto B_i = BXMV_Internal(B.data(), B.extent(0), 1); - KokkosLapack::Impl::GEQRF::geqrf(space, A_i, B_i, IPIV_i); - } else { // BXMV::rank == 2 - auto B_i = BXMV_Internal(B.data(), B.extent(0), B.extent(1)); - KokkosLapack::Impl::GEQRF::geqrf(space, A_i, B_i, IPIV_i); - } + TWArray_Internal; + AMatrix_Internal A_i = A; + TWArray_Internal Tau_i = Tau; + TWArray_Internal Work_i = Work; + KokkosLapack::Impl::GEQRF::geqrf(space, A_i, Tau_i, Work_i); } /// \brief Computes a QR factorization of a matrix A diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp index 733f0510e0..d4f1ff107e 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp @@ -34,8 +34,6 @@ struct geqrf_tpl_spec_avail { ExecSpace, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits > > { \ enum : bool { value = true }; \ @@ -65,8 +63,6 @@ namespace Impl { Kokkos::Cuda, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits > > { \ enum : bool { value = true }; \ @@ -95,8 +91,6 @@ namespace Impl { Kokkos::Cuda, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits > > { \ enum : bool { value = true }; \ @@ -140,8 +134,6 @@ namespace Impl { Kokkos::HIP, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits > > { \ enum : bool { value = true }; \ diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index 8a5b37812d..9b6e1700a3 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -17,20 +17,20 @@ #ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_ #define KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_ +// AquiEEP + namespace KokkosLapack { namespace Impl { -template +template inline void geqrf_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA - printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s, %s >\n", - typeid(AViewType).name(), typeid(BViewType).name(), - typeid(PViewType).name()); + printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s >\n", + typeid(AViewType).name(), typeid(TWViewType).name()); #else #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK - printf("KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n", - typeid(AViewType).name(), typeid(BViewType).name(), - typeid(PViewType).name()); + printf("KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s >\n", + typeid(AViewType).name(), typeid(TWViewType).name()); #endif #endif #endif @@ -45,13 +45,11 @@ inline void geqrf_print_specialization() { namespace KokkosLapack { namespace Impl { -template -void lapackGeqrfWrapper(const AViewType& A, const BViewType& B, - const IPIVViewType& IPIV) { +template +void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau, + const TWViewType& Work) { using Scalar = typename AViewType::non_const_value_type; - const bool with_pivot = !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); - const int N = static_cast(A.extent(1)); const int AST = static_cast(A.stride(1)); const int LDA = (AST == 0) ? 1 : AST; @@ -61,19 +59,8 @@ void lapackGeqrfWrapper(const AViewType& A, const BViewType& B, int info = 0; - if (with_pivot) { - if constexpr (Kokkos::ArithTraits::is_complex) { - using MagType = typename Kokkos::ArithTraits::mag_type; - - HostLapack>::geqrf( - N, NRHS, reinterpret_cast*>(A.data()), LDA, - IPIV.data(), reinterpret_cast*>(B.data()), LDB, - info); - } else { HostLapack::geqrf(N, NRHS, A.data(), LDA, IPIV.data(), B.data(), LDB, info); - } - } } #define KOKKOSLAPACK_GEQRF_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE) \ @@ -82,36 +69,28 @@ void lapackGeqrfWrapper(const AViewType& A, const BViewType& B, EXECSPACE, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ true, \ geqrf_eti_spec_avail< \ EXECSPACE, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>>::value> { \ using AViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ - using BViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using PViewType = \ - Kokkos::View, \ + using TWViewType = \ + Kokkos::View, \ Kokkos::MemoryTraits>; \ \ static void geqrf(const EXECSPACE& /* space */, const AViewType& A, \ - const BViewType& B, const PViewType& IPIV) { \ + const TWViewType& Tau, const TWViewType& Work) { \ Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR \ "]"); \ - geqrf_print_specialization(); \ - lapackGeqrfWrapper(A, B, IPIV); \ + geqrf_print_specialization(); \ + lapackGeqrfWrapper(A, Tau, Work); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -160,16 +139,14 @@ KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template +template void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A, - const BViewType& B, const IPIVViewType& IPIV) { + const TWViewType& Tau, const TWViewType& Work) { using scalar_type = typename AViewType::non_const_value_type; Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_MAGMA," + Kokkos::ArithTraits::name() + "]"); - geqrf_print_specialization(); - - const bool with_pivot = !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); + geqrf_print_specialization(); magma_int_t N = static_cast(A.extent(1)); magma_int_t AST = static_cast(A.stride(1)); @@ -184,53 +161,27 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A, space.fence(); if constexpr (std::is_same_v) { - if (with_pivot) { - magma_sgeqrf_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, - IPIV.data(), reinterpret_cast(B.data()), - LDB, &info); - } else { magma_sgeqrf_nopiv_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, reinterpret_cast(B.data()), LDB, &info); - } } if constexpr (std::is_same_v) { - if (with_pivot) { - magma_dgeqrf_gpu(N, NRHS, reinterpret_cast(A.data()), LDA, - IPIV.data(), reinterpret_cast(B.data()), - LDB, &info); - } else { magma_dgeqrf_nopiv_gpu( N, NRHS, reinterpret_cast(A.data()), LDA, reinterpret_cast(B.data()), LDB, &info); - } } if constexpr (std::is_same_v>) { - if (with_pivot) { - magma_cgeqrf_gpu( - N, NRHS, reinterpret_cast(A.data()), LDA, - IPIV.data(), reinterpret_cast(B.data()), LDB, - &info); - } else { magma_cgeqrf_nopiv_gpu( N, NRHS, reinterpret_cast(A.data()), LDA, reinterpret_cast(B.data()), LDB, &info); - } } if constexpr (std::is_same_v>) { - if (with_pivot) { - magma_zgeqrf_gpu( - N, NRHS, reinterpret_cast(A.data()), LDA, - IPIV.data(), reinterpret_cast(B.data()), LDB, - &info); - } else { magma_zgeqrf_nopiv_gpu( N, NRHS, reinterpret_cast(A.data()), LDA, reinterpret_cast(B.data()), LDB, &info); - } } ExecSpace().fence(); Kokkos::Profiling::popRegion(); @@ -242,11 +193,7 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A, Kokkos::Cuda, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ true, \ geqrf_eti_spec_avail< \ @@ -254,27 +201,19 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A, Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>>::value> { \ using AViewType = Kokkos::View, \ Kokkos::MemoryTraits>; \ - using BViewType = Kokkos::View, \ Kokkos::MemoryTraits>; \ - using PViewType = Kokkos::View< \ - magma_int_t*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>; \ \ static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ - const BViewType& B, const PViewType& IPIV) { \ - magmaGeqrfWrapper(space, A, B, IPIV); \ + const TWViewType& Tau, const TWViewType& Work) { \ + magmaGeqrfWrapper(space, A, Tau, Work); \ } \ }; @@ -296,14 +235,13 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template -void cusolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, - const AViewType& A, const BViewType& B) { +template +void cusolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, + const AViewType& A, const TWViewType& Tau) { using memory_space = typename AViewType::memory_space; - using Scalar = typename BViewType::non_const_value_type; + using Scalar = typename TWViewType::non_const_value_type; using ALayout_t = typename AViewType::array_layout; - using BLayout_t = typename BViewType::array_layout; + using BLayout_t = typename TWViewType::array_layout; const int m = A.extent_int(0); const int n = A.extent_int(1); @@ -388,9 +326,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, Kokkos::Cuda, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ true, \ geqrf_eti_spec_avail< \ @@ -398,26 +334,21 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>>::value> { \ using AViewType = Kokkos::View, \ Kokkos::MemoryTraits>; \ - using BViewType = Kokkos::View, \ Kokkos::MemoryTraits>; \ - using PViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ \ static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ - const BViewType& B, const PViewType& IPIV) { \ + const TWViewType& Tau, const TWViewType& Work) { \ Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR \ "]"); \ - geqrf_print_specialization(); \ + geqrf_print_specialization(); \ \ cusolverGeqrfWrapper(space, IPIV, A, B); \ Kokkos::Profiling::popRegion(); \ @@ -452,13 +383,12 @@ KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template -void rocsolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, - const AViewType& A, const BViewType& B) { - using Scalar = typename BViewType::non_const_value_type; +template +void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, + const AViewType& A, const TWViewType& Tau) { + using Scalar = typename TWViewType::non_const_value_type; using ALayout_t = typename AViewType::array_layout; - using BLayout_t = typename BViewType::array_layout; + using BLayout_t = typename TWViewType::array_layout; const rocblas_int N = static_cast(A.extent(0)); const rocblas_int nrhs = static_cast(B.extent(1)); @@ -505,10 +435,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV Kokkos::HIP, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ true, \ geqrf_eti_spec_avail< \ @@ -516,27 +443,21 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>>::value> { \ using AViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ - using BViewType = \ - Kokkos::View, \ + using TWViewType = \ + Kokkos::View, \ Kokkos::MemoryTraits>; \ - using PViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ \ static void geqrf(const Kokkos::HIP& space, const AViewType& A, \ - const BViewType& B, const PViewType& IPIV) { \ + const TWViewType& Tau, const TWViewType& Work) { \ Kokkos::Profiling::pushRegion( \ "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]"); \ - geqrf_print_specialization(); \ + geqrf_print_specialization(); \ \ rocsolverGeqrfWrapper(space, IPIV, A, B); \ Kokkos::Profiling::popRegion(); \ diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index f9e93180b1..c246b86a29 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -25,6 +25,8 @@ (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \ defined(TEST_THREADS_LAPACK_CPP))) +// AquiEEP + #include #include #include From 1caf1476f2941f59a8ea57e284b9dfdb0b70338f Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Sun, 19 May 2024 01:10:37 -0600 Subject: [PATCH 03/27] Backup --- lapack/impl/KokkosLapack_geqrf_spec.hpp | 24 +- lapack/src/KokkosLapack_geqrf.hpp | 29 ++- lapack/tpls/KokkosLapack_Host_tpl.cpp | 47 ++++ lapack/tpls/KokkosLapack_Host_tpl.hpp | 2 + .../KokkosLapack_geqrf_tpl_spec_avail.hpp | 8 +- .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 179 +++++++------- lapack/unit_test/Test_Lapack_geqrf.hpp | 224 +----------------- 7 files changed, 197 insertions(+), 316 deletions(-) diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp index b990e7550f..98d532489b 100644 --- a/lapack/impl/KokkosLapack_geqrf_spec.hpp +++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp @@ -28,7 +28,7 @@ namespace KokkosLapack { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct geqrf_eti_spec_avail { enum : bool { value = false }; }; @@ -52,6 +52,8 @@ struct geqrf_eti_spec_avail { Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View>> { \ enum : bool { value = true }; \ }; @@ -64,26 +66,24 @@ namespace KokkosLapack { namespace Impl { // Unification layer -/// \brief Implementation of KokkosLapack::geqrf. - -template ::value, + geqrf_tpl_spec_avail::value, bool eti_spec_avail = - geqrf_eti_spec_avail::value> + geqrf_eti_spec_avail::value> struct GEQRF { static void geqrf(const ExecutionSpace &space, const AMatrix &A, const TWArray &Tau, - const TWArray &Work); + const TWArray &Work, const RType &R); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of geqrf for multi vectors. // Unification layer -template -struct GEQRF +struct GEQRF { static void geqrf(const ExecutionSpace & /* space */, const AMatrix & /* A */, - const TWArray & /* Tau */, const TWArray & /* Work */) { + const TWArray & /* Tau */, const TWArray & /* Work */, const RType & /* R */) { // NOTE: Might add the implementation of KokkosLapack::geqrf later throw std::runtime_error( "No fallback implementation of GEQRF (general QR factorization) " @@ -112,6 +112,8 @@ struct GEQRF, \ Kokkos::MemoryTraits>, \ + Kokkos::View>, \ false, true>; #define KOKKOSLAPACK_GEQRF_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ @@ -124,6 +126,8 @@ struct GEQRF, \ Kokkos::MemoryTraits>, \ + Kokkos::View>, \ false, true>; #include diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index e5c59c996e..78bea3a4e7 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -44,16 +44,19 @@ namespace KokkosLapack { /// upper triangular if M >= N); the elements below the /// diagonal, with the array Tau, represent the unitary /// matrix Q as a product of min(M,N) elementary reflectors. -/// \param Tau [out] One-dimensional array of size min(M,N) that contain +/// \param Tau [out] One-dimensional array of size min(M,N) that contains /// the scalar factors of the elementary reflectors. /// \param Work [out] One-dimensional array of size max(1,LWORK). /// If min(M,N) == 0, then LWORK must be >= 1. /// If min(M,N) != 0, then LWORK must be >= N. /// If the QR factorization is successful, then the first /// position of Work contains the optimal LWORK. +/// \return = 0: successfull exit +/// < 0: if equal to '-i', the i-th argument had an illegal +/// value /// template -void geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, +int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, const TWArray& Work) { // NOTE: Currently, KokkosLapack::geqrf only supports LAPACK, MAGMA and // rocSOLVER TPLs. @@ -115,7 +118,18 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, AMatrix_Internal A_i = A; TWArray_Internal Tau_i = Tau; TWArray_Internal Work_i = Work; - KokkosLapack::Impl::GEQRF::geqrf(space, A_i, Tau_i, Work_i); + + // This is the return value type and should always reside on host + using RViewInternalType = + Kokkos::View >; + + int result; + RViewInternalType R = RViewInternalType(&result); + + KokkosLapack::Impl::GEQRF::geqrf(space, A_i, Tau_i, Work_i, R); + + return result; } /// \brief Computes a QR factorization of a matrix A @@ -129,18 +143,21 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, /// upper triangular if M >= N); the elements below the /// diagonal, with the array Tau, represent the unitary /// matrix Q as a product of min(M,N) elementary reflectors. -/// \param Tau [out] One-dimensional array of size min(M,N) that contain +/// \param Tau [out] One-dimensional array of size min(M,N) that contains /// the scalar factors of the elementary reflectors. /// \param Work [out] One-dimensional array of size max(1,LWORK). /// If min(M,N) == 0, then LWORK must be >= 1. /// If min(M,N) != 0, then LWORK must be >= N. /// If the QR factorization is successful, then the first /// position of Work contains the optimal LWORK. +/// \return = 0: successfull exit +/// < 0: if equal to '-i', the i-th argument had an illegal +/// value /// template -void geqrf(const AMatrix& A, const TWArray& Tau, const TWArray& Work) { +int geqrf(const AMatrix& A, const TWArray& Tau, const TWArray& Work) { typename AMatrix::execution_space space{}; - geqrf(space, A, Tau, Work); + return geqrf(space, A, Tau, Work); } } // namespace KokkosLapack diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp index add0a802bd..89085619e8 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.cpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -82,6 +82,20 @@ void F77_BLAS_MANGLE(ctrtri, CTRTRI)(const char*, const char*, int*, const std::complex*, int*, int*); void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, const std::complex*, int*, int*); + +/// +/// Geqrf +/// + +void F77_BLAS_MANGLE(sgeqrf, SGEQRF)(int*, int*, float*, int*, float*, float*, int*, + int*); +void F77_BLAS_MANGLE(dgeqrf, DGEQRF)(int*, int*, double*, int*, double*, double*, + int*, int*); +void F77_BLAS_MANGLE(cgeqrf, CGEQRF)(int*, int*, std::complex*, int*, std::complex*, + std::complex*, int*, int*); +void F77_BLAS_MANGLE(zgeqrf, ZGEQRF)(int*, int*, std::complex*, int*, + std::complex*, std::complex*, int*, int*); + } #define F77_FUNC_SGESV F77_BLAS_MANGLE(sgesv, SGESV) @@ -99,6 +113,11 @@ void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, #define F77_FUNC_CTRTRI F77_BLAS_MANGLE(ctrtri, CTRTRI) #define F77_FUNC_ZTRTRI F77_BLAS_MANGLE(ztrtri, ZTRTRI) +#define F77_FUNC_SGEQRF F77_BLAS_MANGLE(sgeqrf, SGEQRF) +#define F77_FUNC_DGEQRF F77_BLAS_MANGLE(dgeqrf, DGEQRF) +#define F77_FUNC_CGEQRF F77_BLAS_MANGLE(cgeqrf, CGEQRF) +#define F77_FUNC_ZGEQRF F77_BLAS_MANGLE(zgeqrf, ZGEQRF) + namespace KokkosLapack { namespace Impl { @@ -127,6 +146,13 @@ int HostLapack::trtri(const char uplo, const char diag, int n, F77_FUNC_STRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } +template <> +int HostLapack::geqrf(int m, int n, float* a, int lda, float* tau, + float* work, int lwork) { + int info = 0; + F77_FUNC_SGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info); + return info; +} /// /// double @@ -153,6 +179,13 @@ int HostLapack::trtri(const char uplo, const char diag, int n, F77_FUNC_DTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } +template <> +int HostLapack::geqrf(int m, int n, double* a, int lda, double* tau, + double* work, int lwork) { + int info = 0; + F77_FUNC_DGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info); + return info; +} /// /// std::complex @@ -182,6 +215,13 @@ int HostLapack >::trtri(const char uplo, const char diag, F77_FUNC_CTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } +template <> +int HostLapack>::geqrf(int m, int n, std::complex* a, int lda, std::complex* tau, + std::complex* work, int lwork) { + int info = 0; + F77_FUNC_CGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info); + return info; +} /// /// std::complex @@ -212,6 +252,13 @@ int HostLapack >::trtri(const char uplo, const char diag, F77_FUNC_ZTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } +template <> +int HostLapack>::geqrf(int m, int n, std::complex* a, int lda, std::complex* tau, + std::complex* work, int lwork) { + int info = 0; + F77_FUNC_ZGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info); + return info; +} } // namespace Impl } // namespace KokkosLapack diff --git a/lapack/tpls/KokkosLapack_Host_tpl.hpp b/lapack/tpls/KokkosLapack_Host_tpl.hpp index 9eca83afea..d651c9ca52 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.hpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.hpp @@ -41,6 +41,8 @@ struct HostLapack { static int trtri(const char uplo, const char diag, int n, const T *a, int lda); + + static int geqrf(int m, int n, T *a, int lda, T *tau, T *work, int lwork); }; } // namespace Impl } // namespace KokkosLapack diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp index d4f1ff107e..aaa465a814 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp @@ -20,7 +20,7 @@ namespace KokkosLapack { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct geqrf_tpl_spec_avail { enum : bool { value = false }; }; @@ -33,9 +33,11 @@ struct geqrf_tpl_spec_avail { struct geqrf_tpl_spec_avail< \ ExecSpace, \ Kokkos::View, \ - Kokkos::MemoryTraits >, \ + Kokkos::MemoryTraits>, \ Kokkos::View, \ - Kokkos::MemoryTraits > > { \ + Kokkos::MemoryTraits>, \ + Kokkos::View>> { \ enum : bool { value = true }; \ }; diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index 9b6e1700a3..559e2854a6 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -21,16 +21,16 @@ namespace KokkosLapack { namespace Impl { -template +template inline void geqrf_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA - printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s >\n", - typeid(AViewType).name(), typeid(TWViewType).name()); + printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s, %s >\n", + typeid(AViewType).name(), typeid(TWViewType).name(), typeid(RType).name()); #else #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK - printf("KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s >\n", - typeid(AViewType).name(), typeid(TWViewType).name()); + printf("KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n", + typeid(AViewType).name(), typeid(TWViewType).name(), typeid(RType).name()); #endif #endif #endif @@ -45,52 +45,59 @@ inline void geqrf_print_specialization() { namespace KokkosLapack { namespace Impl { -template +template void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau, - const TWViewType& Work) { + const TWViewType& Work, const RType& R) { using Scalar = typename AViewType::non_const_value_type; - const int N = static_cast(A.extent(1)); - const int AST = static_cast(A.stride(1)); - const int LDA = (AST == 0) ? 1 : AST; - const int BST = static_cast(B.stride(1)); - const int LDB = (BST == 0) ? 1 : BST; - const int NRHS = static_cast(B.extent(1)); - - int info = 0; - - HostLapack::geqrf(N, NRHS, A.data(), LDA, IPIV.data(), B.data(), - LDB, info); + using ALayout_t = typename AViewType::array_layout; + static_assert(std::is_same_v, + "KokkosLapack - geqrf: A needs to have a Kokkos::LayoutLeft"); + const int M = A.extent_int(0); + const int N = A.extent_int(1); + const int LDA = A.stride(1); + const int LWORK = static_cast(Work.extent(0)); + + R() = HostLapack::geqrf(M, N, A.data(), LDA, Tau.data(), Work.data(), + LWORK); } -#define KOKKOSLAPACK_GEQRF_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE) \ +#define KOKKOSLAPACK_GEQRF_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE) \ template <> \ - struct GEQRF< \ + struct GEQRF< \ EXECSPACE, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View>, \ true, \ - geqrf_eti_spec_avail< \ + geqrf_eti_spec_avail< \ EXECSPACE, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View>>::value> { \ using AViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ - using TWViewType = \ - Kokkos::View, \ + using TWViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using RType = \ + Kokkos::View>; \ \ - static void geqrf(const EXECSPACE& /* space */, const AViewType& A, \ - const TWViewType& Tau, const TWViewType& Work) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR \ + static void geqrf(const EXECSPACE& /* space */, const AViewType& A, \ + const TWViewType& Tau, const TWViewType& Work, \ + const RType& R) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR \ "]"); \ - geqrf_print_specialization(); \ - lapackGeqrfWrapper(A, Tau, Work); \ + geqrf_print_specialization(); \ + lapackGeqrfWrapper(A, Tau, Work, R); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -132,6 +139,8 @@ KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK +#if 0 // AquiEEP + // MAGMA #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA #include @@ -146,7 +155,7 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A, Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_MAGMA," + Kokkos::ArithTraits::name() + "]"); - geqrf_print_specialization(); + geqrf_print_specialization(); magma_int_t N = static_cast(A.extent(1)); magma_int_t AST = static_cast(A.stride(1)); @@ -321,38 +330,38 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, } #define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ - template <> \ + template <> \ struct GEQRF< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ geqrf_eti_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using TWViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using TWViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ - const TWViewType& Tau, const TWViewType& Work) { \ + const TWViewType& Tau, const TWViewType& Work) { \ Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR \ - "]"); \ - geqrf_print_specialization(); \ - \ + "]"); \ + geqrf_print_specialization(); \ + \ cusolverGeqrfWrapper(space, IPIV, A, B); \ - Kokkos::Profiling::popRegion(); \ - } \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) @@ -430,38 +439,38 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, } #define KOKKOSLAPACK_GEQRF_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ - template <> \ + template <> \ struct GEQRF< \ - Kokkos::HIP, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ geqrf_eti_spec_avail< \ - Kokkos::HIP, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ using TWViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ static void geqrf(const Kokkos::HIP& space, const AViewType& A, \ - const TWViewType& Tau, const TWViewType& Work) { \ - Kokkos::Profiling::pushRegion( \ + const TWViewType& Tau, const TWViewType& Work) { \ + Kokkos::Profiling::pushRegion( \ "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]"); \ - geqrf_print_specialization(); \ - \ + geqrf_print_specialization(); \ + \ rocsolverGeqrfWrapper(space, IPIV, A, B); \ - Kokkos::Profiling::popRegion(); \ - } \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSLAPACK_GEQRF_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) @@ -475,4 +484,6 @@ KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#endif // AquiEEP + #endif diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index c246b86a29..bb8183f32c 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -38,8 +38,8 @@ namespace Test { -template -void impl_test_geqrf(const char* mode, const char* padding, int N) { +template +void impl_test_geqrf(int M, int N) { using execution_space = typename Device::execution_space; using ScalarA = typename ViewTypeA::value_type; using ats = Kokkos::ArithTraits; @@ -48,32 +48,24 @@ void impl_test_geqrf(const char* mode, const char* padding, int N) { Kokkos::Random_XorShift64_Pool rand_pool(13718); - int ldda, lddb; - - if (padding[0] == 'Y') { // rounded up to multiple of 32 - ldda = ((N + 32 - 1) / 32) * 32; - lddb = ldda; - } else { - ldda = N; - lddb = N; + int lwork(1); + if (std::min(M,N) != 0) { + lwork = N; } // Create device views - ViewTypeA A("A", ldda, N); - ViewTypeB X0("X0", N); - ViewTypeB B("B", lddb); + ViewTypeA A ("A", M, N); + ViewTypeTW Tau ("Tau", std::min(M,N)); + ViewTypeTW Work("Work", lddb); // Create host mirrors of device views. - typename ViewTypeB::HostMirror h_X0 = Kokkos::create_mirror_view(X0); - typename ViewTypeB::HostMirror h_B = Kokkos::create_mirror(B); + typename ViewTypeTW::HostMirror h_X0 = Kokkos::create_mirror_view(X0); + typename ViewTypeTW::HostMirror h_B = Kokkos::create_mirror(B); // Initialize data. Kokkos::fill_random( A, rand_pool, Kokkos::rand, ScalarA>::max()); - Kokkos::fill_random( - X0, rand_pool, - Kokkos::rand, ScalarA>::max()); // Generate RHS B = A*X0. ScalarA alpha = 1.0; @@ -98,7 +90,7 @@ void impl_test_geqrf(const char* mode, const char* padding, int N) { // Solve. try { - KokkosLapack::geqrf(space, A, B, ipiv); + KokkosLapack::geqrf(space, A, Tau, Work); } catch (const std::runtime_error& error) { // Check for expected runtime errors due to: // no-pivoting case (note: only MAGMA supports no-pivoting interface) @@ -148,118 +140,6 @@ void impl_test_geqrf(const char* mode, const char* padding, int N) { ASSERT_EQ(test_flag, true); } -template -void impl_test_geqrf_mrhs(const char* mode, const char* padding, int N, - int nrhs) { - using execution_space = typename Device::execution_space; - using ScalarA = typename ViewTypeA::value_type; - using ats = Kokkos::ArithTraits; - - execution_space space{}; - - Kokkos::Random_XorShift64_Pool rand_pool(13718); - - int ldda, lddb; - - if (padding[0] == 'Y') { // rounded up to multiple of 32 - ldda = ((N + 32 - 1) / 32) * 32; - lddb = ldda; - } else { - ldda = N; - lddb = N; - } - - // Create device views - ViewTypeA A("A", ldda, N); - ViewTypeB X0("X0", N, nrhs); - ViewTypeB B("B", lddb, nrhs); - - // Create host mirrors of device views. - typename ViewTypeB::HostMirror h_X0 = Kokkos::create_mirror_view(X0); - typename ViewTypeB::HostMirror h_B = Kokkos::create_mirror(B); - - // Initialize data. - Kokkos::fill_random( - A, rand_pool, - Kokkos::rand, ScalarA>::max()); - Kokkos::fill_random( - X0, rand_pool, - Kokkos::rand, ScalarA>::max()); - - // Generate RHS B = A*X0. - ScalarA alpha = 1.0; - ScalarA beta = 0.0; - - KokkosBlas::gemm("N", "N", alpha, A, X0, beta, B); - Kokkos::fence(); - - // Deep copy device view to host view. - Kokkos::deep_copy(h_X0, X0); - - // Allocate IPIV view on host - using ViewTypeP = typename std::conditional< - MAGMA, Kokkos::View, - Kokkos::View>::type; - ViewTypeP ipiv; - int Nt = 0; - if (mode[0] == 'Y') { - Nt = N; - ipiv = ViewTypeP("IPIV", Nt); - } - - // Solve. - try { - KokkosLapack::geqrf(space, A, B, ipiv); - } catch (const std::runtime_error& error) { - // Check for expected runtime errors due to: - // no-pivoting case (note: only MAGMA supports no-pivoting interface) - // and no-tpl case - bool nopivot_runtime_err = false; - bool notpl_runtime_err = false; -#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL - nopivot_runtime_err = (!std::is_same::value) && - (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); - notpl_runtime_err = false; -#else - notpl_runtime_err = true; -#endif -#else // not have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // but have LAPACK TPL - nopivot_runtime_err = (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); - notpl_runtime_err = false; -#else - notpl_runtime_err = true; -#endif -#endif - if (!nopivot_runtime_err && !notpl_runtime_err) FAIL(); - return; - } - Kokkos::fence(); - - // Get the solution vector. - Kokkos::deep_copy(h_B, B); - - // Checking vs ref on CPU, this eps is about 10^-9 - typedef typename ats::mag_type mag_type; - const mag_type eps = 1.0e7 * ats::epsilon(); - bool test_flag = true; - for (int j = 0; j < nrhs; j++) { - for (int i = 0; i < N; i++) { - if (ats::abs(h_B(i, j) - h_X0(i, j)) > eps) { - test_flag = false; - // printf( " Error %d, pivot %c, padding %c: result( %.15lf ) != - // solution( %.15lf ) at (%ld) at rhs %d\n", N, mode[0], padding[0], - // ats::abs(h_B(i,j)), ats::abs(h_X0(i,j)), i, j ); - break; - } - } - if (test_flag == false) break; - } - ASSERT_EQ(test_flag, true); -} - } // namespace Test template @@ -317,60 +197,6 @@ int test_geqrf(const char* mode) { return 1; } -template -int test_geqrf_mrhs(const char* mode) { -#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - using view_type_a_ll = Kokkos::View; - using view_type_b_ll = Kokkos::View; - -#if (defined(TEST_CUDA_LAPACK_CPP) && \ - defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ - (defined(TEST_HIP_LAPACK_CPP) && \ - defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ - (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ - (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \ - defined(TEST_THREADS_LAPACK_CPP))) - Test::impl_test_geqrf_mrhs( - &mode[0], "N", 2, 5); // no padding - Test::impl_test_geqrf_mrhs( - &mode[0], "N", 13, 5); // no padding - Test::impl_test_geqrf_mrhs( - &mode[0], "N", 179, 5); // no padding - Test::impl_test_geqrf_mrhs( - &mode[0], "N", 64, 5); // no padding - Test::impl_test_geqrf_mrhs( - &mode[0], "N", 1024, 5); // no padding - -// When appropriate run MAGMA specific tests -#elif defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && defined(KOKKOS_ENABLE_CUDA) - if constexpr (std::is_same_v) { - Test::impl_test_geqrf_mrhs( - &mode[0], "N", 2, 5); // no padding - Test::impl_test_geqrf_mrhs( - &mode[0], "N", 13, 5); // no padding - Test::impl_test_geqrf_mrhs( - &mode[0], "N", 179, 5); // no padding - Test::impl_test_geqrf_mrhs( - &mode[0], "N", 64, 5); // no padding - Test::impl_test_geqrf_mrhs( - &mode[0], "N", 1024, 5); // no padding - - Test::impl_test_geqrf_mrhs( - &mode[0], "Y", 13, 5); // padding - Test::impl_test_geqrf_mrhs( - &mode[0], "Y", 179, 5); // padding - } -#endif -#endif - - // Supress unused parameters on CUDA10 - (void)mode; - return 1; -} - #if defined(KOKKOSKERNELS_INST_FLOAT) || \ (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) @@ -380,13 +206,6 @@ TEST_F(TestCategory, geqrf_float) { test_geqrf("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } - -TEST_F(TestCategory, geqrf_mrhs_float) { - Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_float"); - test_geqrf_mrhs("N"); // No pivoting - test_geqrf_mrhs("Y"); // Partial pivoting - Kokkos::Profiling::popRegion(); -} #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) || \ @@ -398,13 +217,6 @@ TEST_F(TestCategory, geqrf_double) { test_geqrf("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } - -TEST_F(TestCategory, geqrf_mrhs_double) { - Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_double"); - test_geqrf_mrhs("N"); // No pivoting - test_geqrf_mrhs("Y"); // Partial pivoting - Kokkos::Profiling::popRegion(); -} #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ @@ -416,13 +228,6 @@ TEST_F(TestCategory, geqrf_complex_double) { test_geqrf, TestDevice>("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } - -TEST_F(TestCategory, geqrf_mrhs_complex_double) { - Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_complex_double"); - test_geqrf_mrhs, TestDevice>("N"); // No pivoting - test_geqrf_mrhs, TestDevice>("Y"); // Partial pivoting - Kokkos::Profiling::popRegion(); -} #endif #if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ @@ -434,13 +239,6 @@ TEST_F(TestCategory, geqrf_complex_float) { test_geqrf, TestDevice>("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } - -TEST_F(TestCategory, geqrf_mrhs_complex_float) { - Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_mrhs_complex_float"); - test_geqrf_mrhs, TestDevice>("N"); // No pivoting - test_geqrf_mrhs, TestDevice>("Y"); // Partial pivoting - Kokkos::Profiling::popRegion(); -} #endif #endif // CUDA+(MAGMA or CUSOLVER) or HIP+ROCSOLVER or LAPACK+HOST From a8fc00d279992783bb07e6b92b072ec9be9070f7 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Sun, 19 May 2024 04:35:54 -0600 Subject: [PATCH 04/27] Backup --- lapack/src/KokkosLapack_geqrf.hpp | 18 +- .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 14 +- lapack/unit_test/Test_Lapack.hpp | 1 + lapack/unit_test/Test_Lapack_geqrf.hpp | 155 +++++------------- 4 files changed, 64 insertions(+), 124 deletions(-) diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index 78bea3a4e7..8e422aaa11 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -80,29 +80,31 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, static_assert(static_cast(TWArray::rank) == 1, "KokkosLapack::geqrf: Tau and Work must have rank 1."); - int64_t m = A.extent(0); - int64_t n = A.extent(1); + int64_t m = A.extent(0); + int64_t n = A.extent(1); + int64_t tau0 = Tau.extent(0); + int64_t work0 = Work.extent(0); // Check validity of dimensions - if (Tau.extent(0) != std::min(m,n)) { + if (tau0 != std::min(m,n)) { std::ostringstream os; os << "KokkosLapack::geqrf: length of Tau must be equal to min(m,n): " - << " A: " << m << " x " << n << ", Tau length = " << Tau.extent(0); + << " A: " << m << " x " << n << ", Tau length = " << tau0; KokkosKernels::Impl::throw_runtime_exception(os.str()); } if ((m == 0) || (n == 0)) { - if (Work.extent(0) < 1) { + if (work0 < 1) { std::ostringstream os; os << "KokkosLapack::geqrf: In case min(m,n) == 0, then Work must have length >= 1: " - << " A: " << m << " x " << n << ", Work length = " << Work.extent(0); + << " A: " << m << " x " << n << ", Work length = " << work0; KokkosKernels::Impl::throw_runtime_exception(os.str()); } } else { - if (Work.extent(0) < n) { + if (work0 < n) { std::ostringstream os; os << "KokkosLapack::geqrf: In case min(m,n) != 0, then Work must have length >= n: " - << " A: " << m << " x " << n << ", Work length = " << Work.extent(0); + << " A: " << m << " x " << n << ", Work length = " << work0; KokkosKernels::Impl::throw_runtime_exception(os.str()); } } diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index 559e2854a6..69cd6fb4f3 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -58,8 +58,18 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau, const int LDA = A.stride(1); const int LWORK = static_cast(Work.extent(0)); - R() = HostLapack::geqrf(M, N, A.data(), LDA, Tau.data(), Work.data(), - LWORK); + if constexpr (Kokkos::ArithTraits::is_complex) { + using MagType = typename Kokkos::ArithTraits::mag_type; + + R() = HostLapack>::geqrf(M, N, + reinterpret_cast*>(A.data()), LDA, + reinterpret_cast*>(Tau.data()), + reinterpret_cast*>(Work.data()), LWORK); + } + else { + R() = HostLapack::geqrf(M, N, A.data(), LDA, Tau.data(), Work.data(), + LWORK); + } } #define KOKKOSLAPACK_GEQRF_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE) \ diff --git a/lapack/unit_test/Test_Lapack.hpp b/lapack/unit_test/Test_Lapack.hpp index 1a717521f8..2bcecaceae 100644 --- a/lapack/unit_test/Test_Lapack.hpp +++ b/lapack/unit_test/Test_Lapack.hpp @@ -19,5 +19,6 @@ #include "Test_Lapack_gesv.hpp" #include "Test_Lapack_trtri.hpp" #include "Test_Lapack_svd.hpp" +#include "Test_Lapack_geqrf.hpp" #endif // TEST_LAPACK_HPP diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index bb8183f32c..b4e8dc9b9d 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -14,11 +14,10 @@ // //@HEADER -// only enable this test where KokkosLapack supports geqrf: -// CUDA+(MAGMA or CUSOLVER), HIP+ROCSOLVER and HOST+LAPACK +// Only enable this test where KokkosLapack supports geqrf: +// CUDA+CUSOLVER, HIP+ROCSOLVER and HOST+LAPACK #if (defined(TEST_CUDA_LAPACK_CPP) && \ - (defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) || \ - defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER))) || \ + defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ (defined(TEST_HIP_LAPACK_CPP) && \ defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ @@ -38,95 +37,69 @@ namespace Test { -template -void impl_test_geqrf(int M, int N) { +template +void impl_test_geqrf(int m, int n) { using execution_space = typename Device::execution_space; using ScalarA = typename ViewTypeA::value_type; - using ats = Kokkos::ArithTraits; + //using ats = Kokkos::ArithTraits; execution_space space{}; Kokkos::Random_XorShift64_Pool rand_pool(13718); int lwork(1); - if (std::min(M,N) != 0) { - lwork = N; + if (std::min(m,n) != 0) { + lwork = n; } // Create device views - ViewTypeA A ("A", M, N); - ViewTypeTW Tau ("Tau", std::min(M,N)); - ViewTypeTW Work("Work", lddb); + ViewTypeA A ("A", m, n); + ViewTypeTW Tau ("Tau", std::min(m,n)); + ViewTypeTW Work("Work", lwork); // Create host mirrors of device views. - typename ViewTypeTW::HostMirror h_X0 = Kokkos::create_mirror_view(X0); - typename ViewTypeTW::HostMirror h_B = Kokkos::create_mirror(B); + typename ViewTypeTW::HostMirror h_tau = Kokkos::create_mirror_view(Tau); + typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror(Work); // Initialize data. - Kokkos::fill_random( - A, rand_pool, - Kokkos::rand, ScalarA>::max()); - - // Generate RHS B = A*X0. - ScalarA alpha = 1.0; - ScalarA beta = 0.0; + if ((m == 3) && (n == 3)) { + } + else { + Kokkos::fill_random( A + , rand_pool + , Kokkos::rand, ScalarA>::max() + ); + } - KokkosBlas::gemv("N", alpha, A, X0, beta, B); Kokkos::fence(); // Deep copy device view to host view. - Kokkos::deep_copy(h_X0, X0); + //Kokkos::deep_copy(h_X0, X0); // Allocate IPIV view on host - using ViewTypeP = typename std::conditional< - MAGMA, Kokkos::View, - Kokkos::View>::type; + using ViewTypeP = Kokkos::View; ViewTypeP ipiv; - int Nt = 0; - if (mode[0] == 'Y') { - Nt = N; - ipiv = ViewTypeP("IPIV", Nt); - } + int Nt = n; + ipiv = ViewTypeP("IPIV", Nt); // Solve. try { KokkosLapack::geqrf(space, A, Tau, Work); - } catch (const std::runtime_error& error) { - // Check for expected runtime errors due to: - // no-pivoting case (note: only MAGMA supports no-pivoting interface) - // and no-tpl case - bool nopivot_runtime_err = false; - bool notpl_runtime_err = false; -#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL - nopivot_runtime_err = (!std::is_same::value) && - (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); - notpl_runtime_err = false; -#else - notpl_runtime_err = true; -#endif -#else // not have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // but have LAPACK TPL - nopivot_runtime_err = (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); - notpl_runtime_err = false; -#else - notpl_runtime_err = true; -#endif -#endif - if (!nopivot_runtime_err && !notpl_runtime_err) FAIL(); + } + catch (const std::runtime_error& error) { return; } Kokkos::fence(); // Get the solution vector. - Kokkos::deep_copy(h_B, B); + //Kokkos::deep_copy(h_B, B); // Checking vs ref on CPU, this eps is about 10^-9 - typedef typename ats::mag_type mag_type; - const mag_type eps = 3.0e7 * ats::epsilon(); + //typedef typename ats::mag_type mag_type; + //const mag_type eps = 3.0e7 * ats::epsilon(); bool test_flag = true; - for (int i = 0; i < N; i++) { + for (int i = 0; i < n; i++) { +#if 0 if (ats::abs(h_B(i) - h_X0(i)) > eps) { test_flag = false; printf( @@ -136,6 +109,7 @@ void impl_test_geqrf(int M, int N) { ats::abs(h_B(i) - h_X0(i)), eps); break; } +#endif } ASSERT_EQ(test_flag, true); } @@ -143,58 +117,15 @@ void impl_test_geqrf(int M, int N) { } // namespace Test template -int test_geqrf(const char* mode) { +void test_geqrf() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) using view_type_a_ll = Kokkos::View; - using view_type_b_ll = Kokkos::View; + using view_type_tw_ll = Kokkos::View; -#if (defined(TEST_CUDA_LAPACK_CPP) && \ - defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ - (defined(TEST_HIP_LAPACK_CPP) && \ - defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ - (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ - (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \ - defined(TEST_THREADS_LAPACK_CPP))) - Test::impl_test_geqrf( - &mode[0], "N", 2); // no padding - Test::impl_test_geqrf( - &mode[0], "N", 13); // no padding - Test::impl_test_geqrf( - &mode[0], "N", 179); // no padding - Test::impl_test_geqrf( - &mode[0], "N", 64); // no padding - Test::impl_test_geqrf( - &mode[0], "N", 1024); // no padding - -#elif defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) && defined(KOKKOS_ENABLE_CUDA) - if constexpr (std::is_same_v) { - Test::impl_test_geqrf( - &mode[0], "N", 2); // no padding - Test::impl_test_geqrf( - &mode[0], "N", 13); // no padding - Test::impl_test_geqrf( - &mode[0], "N", 179); // no padding - Test::impl_test_geqrf( - &mode[0], "N", 64); // no padding - Test::impl_test_geqrf( - &mode[0], "N", 1024); // no padding - - Test::impl_test_geqrf( - &mode[0], "Y", - 13); // padding - Test::impl_test_geqrf( - &mode[0], "Y", - 179); // padding - } + Test::impl_test_geqrf(3, 3); #endif -#endif - - // Supress unused parameters on CUDA10 - (void)mode; - return 1; } #if defined(KOKKOSKERNELS_INST_FLOAT) || \ @@ -202,8 +133,7 @@ int test_geqrf(const char* mode) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, geqrf_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_float"); - test_geqrf("N"); // No pivoting - test_geqrf("Y"); // Partial pivoting + test_geqrf(); Kokkos::Profiling::popRegion(); } #endif @@ -213,8 +143,7 @@ TEST_F(TestCategory, geqrf_float) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, geqrf_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_double"); - test_geqrf("N"); // No pivoting - test_geqrf("Y"); // Partial pivoting + test_geqrf(); Kokkos::Profiling::popRegion(); } #endif @@ -224,8 +153,7 @@ TEST_F(TestCategory, geqrf_double) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, geqrf_complex_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_double"); - test_geqrf, TestDevice>("N"); // No pivoting - test_geqrf, TestDevice>("Y"); // Partial pivoting + test_geqrf, TestDevice>(); Kokkos::Profiling::popRegion(); } #endif @@ -235,10 +163,9 @@ TEST_F(TestCategory, geqrf_complex_double) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, geqrf_complex_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_float"); - test_geqrf, TestDevice>("N"); // No pivoting - test_geqrf, TestDevice>("Y"); // Partial pivoting + test_geqrf, TestDevice>(); Kokkos::Profiling::popRegion(); } #endif -#endif // CUDA+(MAGMA or CUSOLVER) or HIP+ROCSOLVER or LAPACK+HOST +#endif // CUDA+CUSOLVER or HIP+ROCSOLVER or LAPACK+HOST From b75c07e82c7151fd69e88bf2b77dbc2bf14c5a48 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Sun, 19 May 2024 21:21:56 -0600 Subject: [PATCH 05/27] Backup --- lapack/src/KokkosLapack_geqrf.hpp | 15 +++- lapack/unit_test/Test_Lapack_geqrf.hpp | 108 ++++++++++++++++++++----- 2 files changed, 101 insertions(+), 22 deletions(-) diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index 8e422aaa11..334dbef682 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -40,10 +40,17 @@ namespace KokkosLapack { /// the geqrf kernels. /// \param A [in,out] On entry, the M-by-N matrix to be factorized. /// On exit, the elements on and above the diagonal contain -/// the min(M,N)-by-N upper trapezoidal matrix R (R is -/// upper triangular if M >= N); the elements below the -/// diagonal, with the array Tau, represent the unitary -/// matrix Q as a product of min(M,N) elementary reflectors. +/// the min(M,N)-by-N upper trapezoidal matrix R (R is upper +/// triangular if M >= N); the elements below the diagonal, +/// with the array Tau, represent the unitary matrix Q as a +/// product of min(M,N) elementary reflectors. The matrix Q +/// is represented as a product of elementary reflectors +/// Q = H(1) H(2) . . . H(k), where k = min(M,N). +/// Each H(i) has the form +/// H(i) = I - Tau * v * v**H +/// where tau is a complex scalar, and v is a complex vector +/// with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on +/// exit in A(i+1:M,i), and tau in Tau(i). /// \param Tau [out] One-dimensional array of size min(M,N) that contains /// the scalar factors of the elementary reflectors. /// \param Work [out] One-dimensional array of size max(1,LWORK). diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index b4e8dc9b9d..0ec9388dd4 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -47,52 +47,124 @@ void impl_test_geqrf(int m, int n) { Kokkos::Random_XorShift64_Pool rand_pool(13718); - int lwork(1); - if (std::min(m,n) != 0) { + int minMN( std::min(m,n) ); + int lwork( 1 ); + if (minMN != 0) { lwork = n; } // Create device views ViewTypeA A ("A", m, n); - ViewTypeTW Tau ("Tau", std::min(m,n)); + ViewTypeTW Tau ("Tau", minMN); ViewTypeTW Work("Work", lwork); // Create host mirrors of device views. + typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewTypeTW::HostMirror h_tau = Kokkos::create_mirror_view(Tau); - typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror(Work); + typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror_view(Work); // Initialize data. if ((m == 3) && (n == 3)) { + if constexpr (Kokkos::ArithTraits::is_complex) { + h_A(0, 0).real() = 12.; + h_A(0, 1).real() = -51.; + h_A(0, 2).real() = 4.; + + h_A(1, 0).real() = 6.; + h_A(1, 1).real() = 167.; + h_A(1, 2).real() = -68.; + + h_A(2, 0).real() = -4.; + h_A(2, 1).real() = 24.; + h_A(2, 2).real() = -41.; + + for (int i(0); i < m; ++i) { + for (int j(0); j < n; ++j) { + h_A(i, j).imag() = 0.; + } + } + } + else { + h_A(0, 0) = 12.; + h_A(0, 1) = -51.; + h_A(0, 2) = 4.; + + h_A(1, 0) = 6.; + h_A(1, 1) = 167.; + h_A(1, 2) = -68.; + + h_A(2, 0) = -4.; + h_A(2, 1) = 24.; + h_A(2, 2) = -41.; + } + + Kokkos::deep_copy(A, h_A); } else { Kokkos::fill_random( A , rand_pool , Kokkos::rand, ScalarA>::max() ); + Kokkos::deep_copy(h_A, A); } - Kokkos::fence(); - - // Deep copy device view to host view. - //Kokkos::deep_copy(h_X0, X0); + for (int i(0); i < m; ++i) { + for (int j(0); j < n; ++j) { + std::cout << "A(" << i << "," << j << ") = " << h_A(i,j) << std::endl; + } + } - // Allocate IPIV view on host - using ViewTypeP = Kokkos::View; - ViewTypeP ipiv; - int Nt = n; - ipiv = ViewTypeP("IPIV", Nt); + Kokkos::fence(); - // Solve. + // Perform the QR factorization + int rc(0); try { - KokkosLapack::geqrf(space, A, Tau, Work); + rc = KokkosLapack::geqrf(space, A, Tau, Work); } - catch (const std::runtime_error& error) { + catch (const std::runtime_error & e) { + std::cout << "KokkosLapack::geqrf(): caught exception '" << e.what() << "'" << std::endl; + FAIL(); return; } Kokkos::fence(); - // Get the solution vector. - //Kokkos::deep_copy(h_B, B); + // Get the results + Kokkos::deep_copy(h_A, A); + Kokkos::deep_copy(h_tau, Tau); + Kokkos::deep_copy(h_work, Work); + + std::cout << "rc = " << rc << std::endl; + for (int i(0); i < minMN; ++i) { + for (int j(0); j < n; ++j) { + std::cout << "R(" << i << "," << j << ") = " << h_A(i,j) << std::endl; + } + } + for (int i(0); i < minMN; ++i) { + std::cout << "tau(" << i << ") = " << h_tau[i] << std::endl; + } + for (int i(0); i < lwork; ++i) { + std::cout << "work(" << i << ") = " << h_work[i] << std::endl; + } + + // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B). + // void gemm( const execution_space & space + // , const char transA[] + // , const char transB[] + // , typename AViewType::const_value_type & alpha + // , const AViewType & A + // , const BViewType & B + // , typename CViewType::const_value_type & beta + // , const CViewType & C + // ); + + // Rank-1 update of a general matrix: A = A + alpha * x * y^{T,H}. + // void ger( const ExecutionSpace & space + // , const char trans[] + // , const typename AViewType::const_value_type & alpha + // , const XViewType & x + // , const YViewType & y + // , const AViewType & A + // ); // Checking vs ref on CPU, this eps is about 10^-9 //typedef typename ats::mag_type mag_type; From f9c0c8ef1208e327850a06c73363f712c397ee14 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Sun, 19 May 2024 22:50:09 -0600 Subject: [PATCH 06/27] Backup --- lapack/src/KokkosLapack_geqrf.hpp | 23 ++-- lapack/unit_test/Test_Lapack_geqrf.hpp | 139 ++++++++++++++++++++----- 2 files changed, 131 insertions(+), 31 deletions(-) diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index 334dbef682..15a5522f37 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -32,9 +32,9 @@ namespace KokkosLapack { /// \brief Computes a QR factorization of a matrix A /// -/// \tparam ExecutionSpace the space where the kernel will run. -/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. -/// \tparam TWArray Type of arrays Tau and Work, as a 1-D Kokkos::View. +/// \tparam ExecutionSpace The space where the kernel will run. +/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. +/// \tparam TWArray Type of arrays Tau and Work, as a 1-D Kokkos::View. /// /// \param space [in] Execution space instance used to specified how to execute /// the geqrf kernels. @@ -58,6 +58,7 @@ namespace KokkosLapack { /// If min(M,N) != 0, then LWORK must be >= N. /// If the QR factorization is successful, then the first /// position of Work contains the optimal LWORK. +/// /// \return = 0: successfull exit /// < 0: if equal to '-i', the i-th argument had an illegal /// value @@ -148,10 +149,17 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, /// /// \param A [in,out] On entry, the M-by-N matrix to be factorized. /// On exit, the elements on and above the diagonal contain -/// the min(M,N)-by-N upper trapezoidal matrix R (R is -/// upper triangular if M >= N); the elements below the -/// diagonal, with the array Tau, represent the unitary -/// matrix Q as a product of min(M,N) elementary reflectors. +/// the min(M,N)-by-N upper trapezoidal matrix R (R is upper +/// triangular if M >= N); the elements below the diagonal, +/// with the array Tau, represent the unitary matrix Q as a +/// product of min(M,N) elementary reflectors. The matrix Q +/// is represented as a product of elementary reflectors +/// Q = H(1) H(2) . . . H(k), where k = min(M,N). +/// Each H(i) has the form +/// H(i) = I - Tau * v * v**H +/// where tau is a complex scalar, and v is a complex vector +/// with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on +/// exit in A(i+1:M,i), and tau in Tau(i). /// \param Tau [out] One-dimensional array of size min(M,N) that contains /// the scalar factors of the elementary reflectors. /// \param Work [out] One-dimensional array of size max(1,LWORK). @@ -159,6 +167,7 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, /// If min(M,N) != 0, then LWORK must be >= N. /// If the QR factorization is successful, then the first /// position of Work contains the optimal LWORK. +/// /// \return = 0: successfull exit /// < 0: if equal to '-i', the i-th argument had an illegal /// value diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index 0ec9388dd4..dfcc7566e9 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -31,12 +31,62 @@ #include #include -#include -#include +//#include +//#include #include namespace Test { +template +void getQR( int const m + , int const n + , typename ViewTypeA::HostMirror const & //h_A + , typename ViewTypeTW::HostMirror const & //h_tau + , typename ViewTypeTW::HostMirror const & //h_work + , typename ViewTypeA::HostMirror & //h_Q + , typename ViewTypeA::HostMirror & h_R + , typename ViewTypeA::HostMirror & //h_QR + ) +{ + using ScalarA = typename ViewTypeA::value_type; + + for (int i(0); i < m; ++i) { + for (int j(0); j < n; ++j) { + if constexpr (Kokkos::ArithTraits::is_complex) { + h_R(i,j).real() = 0.; + h_R(i,j).imag() = 0.; + } + else { + h_R(i,j) = 0.; + } + } + } + + ViewTypeA I("I", m, m); + typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I); + for (int i(0); i < m; ++i) { + for (int j(0); j < m; ++j) { + if constexpr (Kokkos::ArithTraits::is_complex) { + if (i == j) { + h_I(i,j).real() = 1.; + } + else { + h_I(i,j).real() = 0.; + } + h_I(i,j).imag() = 0.; + } + else { + if (i == j) { + h_I(i,j) = 1.; + } + else { + h_I(i,j) = 0.; + } + } + } + } +} + template void impl_test_geqrf(int m, int n) { using execution_space = typename Device::execution_space; @@ -59,43 +109,44 @@ void impl_test_geqrf(int m, int n) { ViewTypeTW Work("Work", lwork); // Create host mirrors of device views. - typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); - typename ViewTypeTW::HostMirror h_tau = Kokkos::create_mirror_view(Tau); - typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror_view(Work); + typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); + typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(A); + typename ViewTypeTW::HostMirror h_tau = Kokkos::create_mirror_view(Tau); + typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror_view(Work); // Initialize data. if ((m == 3) && (n == 3)) { if constexpr (Kokkos::ArithTraits::is_complex) { - h_A(0, 0).real() = 12.; - h_A(0, 1).real() = -51.; - h_A(0, 2).real() = 4.; + h_A(0,0).real() = 12.; + h_A(0,1).real() = -51.; + h_A(0,2).real() = 4.; - h_A(1, 0).real() = 6.; - h_A(1, 1).real() = 167.; - h_A(1, 2).real() = -68.; + h_A(1,0).real() = 6.; + h_A(1,1).real() = 167.; + h_A(1,2).real() = -68.; - h_A(2, 0).real() = -4.; - h_A(2, 1).real() = 24.; - h_A(2, 2).real() = -41.; + h_A(2,0).real() = -4.; + h_A(2,1).real() = 24.; + h_A(2,2).real() = -41.; for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - h_A(i, j).imag() = 0.; + h_A(i,j).imag() = 0.; } } } else { - h_A(0, 0) = 12.; - h_A(0, 1) = -51.; - h_A(0, 2) = 4.; + h_A(0,0) = 12.; + h_A(0,1) = -51.; + h_A(0,2) = 4.; - h_A(1, 0) = 6.; - h_A(1, 1) = 167.; - h_A(1, 2) = -68.; + h_A(1,0) = 6.; + h_A(1,1) = 167.; + h_A(1,2) = -68.; - h_A(2, 0) = -4.; - h_A(2, 1) = 24.; - h_A(2, 2) = -41.; + h_A(2,0) = -4.; + h_A(2,1) = 24.; + h_A(2,2) = -41.; } Kokkos::deep_copy(A, h_A); @@ -108,11 +159,15 @@ void impl_test_geqrf(int m, int n) { Kokkos::deep_copy(h_A, A); } + Kokkos::deep_copy(h_Aorig, h_A); + +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { std::cout << "A(" << i << "," << j << ") = " << h_A(i,j) << std::endl; } } +#endif Kokkos::fence(); @@ -126,13 +181,17 @@ void impl_test_geqrf(int m, int n) { FAIL(); return; } + Kokkos::fence(); + EXPECT_EQ(rc, 0) << "Failed geqrf() test: rc = " << rc; + // Get the results Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_tau, Tau); Kokkos::deep_copy(h_work, Work); +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG std::cout << "rc = " << rc << std::endl; for (int i(0); i < minMN; ++i) { for (int j(0); j < n; ++j) { @@ -145,6 +204,38 @@ void impl_test_geqrf(int m, int n) { for (int i(0); i < lwork; ++i) { std::cout << "work(" << i << ") = " << h_work[i] << std::endl; } +#endif + + ViewTypeA Q ("Q", m, m); + ViewTypeA R ("R", m, n); + ViewTypeA QR("QR", m, n); + + typename ViewTypeA::HostMirror h_Q = Kokkos::create_mirror_view(Q); + typename ViewTypeA::HostMirror h_R = Kokkos::create_mirror_view(R); + typename ViewTypeA::HostMirror h_QR = Kokkos::create_mirror_view(QR); + + getQR(m, n, h_A, h_tau, h_work, h_Q, h_R, h_QR); + +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG + for (int i(0); i < m; ++i) { + for (int j(0); j < m; ++j) { + std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl; + } + } + for (int i(0); i < m; ++i) { + for (int j(0); j < n; ++j) { + std::cout << "R(" << i << "," << j << ") = " << h_R(i,j) << std::endl; + } + } + for (int i(0); i < m; ++i) { + for (int j(0); j < n; ++j) { + std::cout << "QR(" << i << "," << j << ") = " << h_QR(i,j) << std::endl; + } + } +#endif + + if ((m == 3) && (n == 3)) { + } // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B). // void gemm( const execution_space & space From 4988a3572cd157935758ccacb837b40052bf7706 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Mon, 20 May 2024 10:26:55 -0600 Subject: [PATCH 07/27] Formatting --- .../KokkosLapack_geqrf_eti_spec_inst.cpp.in | 7 +- .../KokkosLapack_geqrf_eti_spec_avail.hpp.in | 6 +- lapack/impl/KokkosLapack_geqrf_spec.hpp | 96 +++---- lapack/src/KokkosLapack_geqrf.hpp | 21 +- .../KokkosLapack_geqrf_tpl_spec_avail.hpp | 60 ++--- .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 237 +++++++++--------- lapack/unit_test/Test_Lapack_geqrf.hpp | 169 ++++++------- 7 files changed, 300 insertions(+), 296 deletions(-) diff --git a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in index 9558d0f6cc..2015898d13 100644 --- a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in +++ b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in @@ -14,13 +14,12 @@ // //@HEADER - #define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true #include "KokkosKernels_config.h" #include "KokkosLapack_geqrf_spec.hpp" namespace KokkosLapack { namespace Impl { -@LAPACK_GEQRF_ETI_INST_BLOCK@ - } //IMPL -} //Kokkos +@LAPACK_GEQRF_ETI_INST_BLOCK @ +} // namespace Impl +} // namespace KokkosLapack diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in index c4619b9c07..2726dddd80 100644 --- a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in @@ -18,7 +18,7 @@ #define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_ namespace KokkosLapack { namespace Impl { -@LAPACK_GEQRF_ETI_AVAIL_BLOCK@ - } //IMPL -} //Kokkos +@LAPACK_GEQRF_ETI_AVAIL_BLOCK @ +} // namespace Impl +} // namespace KokkosLapack #endif diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp index 98d532489b..6970c6dd2c 100644 --- a/lapack/impl/KokkosLapack_geqrf_spec.hpp +++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp @@ -42,20 +42,20 @@ struct geqrf_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - template <> \ - struct geqrf_eti_spec_avail< \ - EXEC_SPACE_TYPE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View>> { \ - enum : bool { value = true }; \ +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct geqrf_eti_spec_avail< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View>> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -66,14 +66,15 @@ namespace KokkosLapack { namespace Impl { // Unification layer -template ::value, - bool eti_spec_avail = - geqrf_eti_spec_avail::value> +template < + class ExecutionSpace, class AMatrix, class TWArray, class RType, + bool tpl_spec_avail = + geqrf_tpl_spec_avail::value, + bool eti_spec_avail = + geqrf_eti_spec_avail::value> struct GEQRF { - static void geqrf(const ExecutionSpace &space, const AMatrix &A, const TWArray &Tau, - const TWArray &Work, const RType &R); + static void geqrf(const ExecutionSpace &space, const AMatrix &A, + const TWArray &Tau, const TWArray &Work, const RType &R); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY @@ -81,9 +82,10 @@ struct GEQRF { // Unification layer template struct GEQRF { + KOKKOSKERNELS_IMPL_COMPILE_LIBRARY> { static void geqrf(const ExecutionSpace & /* space */, const AMatrix & /* A */, - const TWArray & /* Tau */, const TWArray & /* Work */, const RType & /* R */) { + const TWArray & /* Tau */, const TWArray & /* Work */, + const RType & /* R */) { // NOTE: Might add the implementation of KokkosLapack::geqrf later throw std::runtime_error( "No fallback implementation of GEQRF (general QR factorization) " @@ -102,32 +104,32 @@ struct GEQRF, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View>, \ +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct GEQRF< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View>, \ false, true>; -#define KOKKOSLAPACK_GEQRF_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - template struct GEQRF< \ - EXEC_SPACE_TYPE, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View>, \ +#define KOKKOSLAPACK_GEQRF_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct GEQRF< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View>, \ false, true>; #include diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index 15a5522f37..506d3c60b7 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -94,7 +94,7 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, int64_t work0 = Work.extent(0); // Check validity of dimensions - if (tau0 != std::min(m,n)) { + if (tau0 != std::min(m, n)) { std::ostringstream os; os << "KokkosLapack::geqrf: length of Tau must be equal to min(m,n): " << " A: " << m << " x " << n << ", Tau length = " << tau0; @@ -103,15 +103,16 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, if ((m == 0) || (n == 0)) { if (work0 < 1) { std::ostringstream os; - os << "KokkosLapack::geqrf: In case min(m,n) == 0, then Work must have length >= 1: " + os << "KokkosLapack::geqrf: In case min(m,n) == 0, then Work must have " + "length >= 1: " << " A: " << m << " x " << n << ", Work length = " << work0; KokkosKernels::Impl::throw_runtime_exception(os.str()); } - } - else { + } else { if (work0 < n) { std::ostringstream os; - os << "KokkosLapack::geqrf: In case min(m,n) != 0, then Work must have length >= n: " + os << "KokkosLapack::geqrf: In case min(m,n) != 0, then Work must have " + "length >= n: " << " A: " << m << " x " << n << ", Work length = " << work0; KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -121,9 +122,9 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, typename AMatrix::device_type, Kokkos::MemoryTraits > AMatrix_Internal; - typedef Kokkos::View > + typedef Kokkos::View< + typename TWArray::non_const_value_type*, typename TWArray::array_layout, + typename TWArray::device_type, Kokkos::MemoryTraits > TWArray_Internal; AMatrix_Internal A_i = A; TWArray_Internal Tau_i = Tau; @@ -137,7 +138,9 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, int result; RViewInternalType R = RViewInternalType(&result); - KokkosLapack::Impl::GEQRF::geqrf(space, A_i, Tau_i, Work_i, R); + KokkosLapack::Impl::GEQRF::geqrf(space, A_i, Tau_i, Work_i, + R); return result; } diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp index aaa465a814..8a1fcf618d 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp @@ -42,13 +42,13 @@ struct geqrf_tpl_spec_avail { }; KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, - Kokkos::HostSpace) + Kokkos::HostSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, - Kokkos::HostSpace) + Kokkos::HostSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) + Kokkos::LayoutLeft, Kokkos::HostSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HostSpace) + Kokkos::LayoutLeft, Kokkos::HostSpace) #endif } // namespace Impl } // namespace KokkosLapack @@ -64,20 +64,20 @@ namespace Impl { struct geqrf_tpl_spec_avail< \ Kokkos::Cuda, \ Kokkos::View, \ - Kokkos::MemoryTraits >, \ + Kokkos::MemoryTraits>, \ Kokkos::View, \ - Kokkos::MemoryTraits > > { \ + Kokkos::MemoryTraits>> { \ enum : bool { value = true }; \ }; KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, - Kokkos::CudaSpace) + Kokkos::CudaSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, - Kokkos::CudaSpace) + Kokkos::CudaSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) + Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) + Kokkos::LayoutLeft, Kokkos::CudaSpace) } // namespace Impl } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA @@ -92,32 +92,34 @@ namespace Impl { struct geqrf_tpl_spec_avail< \ Kokkos::Cuda, \ Kokkos::View, \ - Kokkos::MemoryTraits >, \ + Kokkos::MemoryTraits>, \ Kokkos::View, \ - Kokkos::MemoryTraits > > { \ + Kokkos::MemoryTraits>> { \ enum : bool { value = true }; \ }; KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, - Kokkos::CudaSpace) + Kokkos::CudaSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, - Kokkos::CudaSpace) + Kokkos::CudaSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) + Kokkos::LayoutLeft, + Kokkos::CudaSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) + Kokkos::LayoutLeft, + Kokkos::CudaSpace) #if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) + Kokkos::CudaUVMSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) + Kokkos::CudaUVMSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) #endif } // namespace Impl @@ -135,20 +137,22 @@ namespace Impl { struct geqrf_tpl_spec_avail< \ Kokkos::HIP, \ Kokkos::View, \ - Kokkos::MemoryTraits >, \ + Kokkos::MemoryTraits>, \ Kokkos::View, \ - Kokkos::MemoryTraits > > { \ + Kokkos::MemoryTraits>> { \ enum : bool { value = true }; \ }; KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, - Kokkos::HIPSpace) + Kokkos::HIPSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, - Kokkos::HIPSpace) + Kokkos::HIPSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) + Kokkos::LayoutLeft, + Kokkos::HIPSpace) KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace) + Kokkos::LayoutLeft, + Kokkos::HIPSpace) } // namespace Impl } // namespace KokkosLapack diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index 69cd6fb4f3..7fbc5ff391 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -26,11 +26,14 @@ inline void geqrf_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s, %s >\n", - typeid(AViewType).name(), typeid(TWViewType).name(), typeid(RType).name()); + typeid(AViewType).name(), typeid(TWViewType).name(), + typeid(RType).name()); #else #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK - printf("KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n", - typeid(AViewType).name(), typeid(TWViewType).name(), typeid(RType).name()); + printf( + "KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n", + typeid(AViewType).name(), typeid(TWViewType).name(), + typeid(RType).name()); #endif #endif #endif @@ -61,14 +64,13 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau, if constexpr (Kokkos::ArithTraits::is_complex) { using MagType = typename Kokkos::ArithTraits::mag_type; - R() = HostLapack>::geqrf(M, N, - reinterpret_cast*>(A.data()), LDA, - reinterpret_cast*>(Tau.data()), - reinterpret_cast*>(Work.data()), LWORK); - } - else { - R() = HostLapack::geqrf(M, N, A.data(), LDA, Tau.data(), Work.data(), - LWORK); + R() = HostLapack>::geqrf( + M, N, reinterpret_cast*>(A.data()), LDA, + reinterpret_cast*>(Tau.data()), + reinterpret_cast*>(Work.data()), LWORK); + } else { + R() = HostLapack::geqrf(M, N, A.data(), LDA, Tau.data(), + Work.data(), LWORK); } } @@ -97,13 +99,12 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau, using TWViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ - using RType = \ - Kokkos::View>; \ + using RType = Kokkos::View>; \ \ static void geqrf(const EXECSPACE& /* space */, const AViewType& A, \ const TWViewType& Tau, const TWViewType& Work, \ - const RType& R) { \ + const RType& R) { \ Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR \ "]"); \ geqrf_print_specialization(); \ @@ -114,42 +115,42 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau, #if defined(KOKKOS_ENABLE_SERIAL) KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) + Kokkos::HostSpace) KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial, - Kokkos::HostSpace) + Kokkos::HostSpace) KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial, Kokkos::HostSpace) + Kokkos::Serial, Kokkos::HostSpace) KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Serial, Kokkos::HostSpace) + Kokkos::Serial, Kokkos::HostSpace) #endif #if defined(KOKKOS_ENABLE_OPENMP) KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) + Kokkos::HostSpace) KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP, - Kokkos::HostSpace) + Kokkos::HostSpace) KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP, Kokkos::HostSpace) + Kokkos::OpenMP, Kokkos::HostSpace) KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::OpenMP, Kokkos::HostSpace) + Kokkos::OpenMP, Kokkos::HostSpace) #endif #if defined(KOKKOS_ENABLE_THREADS) KOKKOSLAPACK_GEQRF_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Threads, - Kokkos::HostSpace) + Kokkos::HostSpace) KOKKOSLAPACK_GEQRF_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Threads, - Kokkos::HostSpace) + Kokkos::HostSpace) KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Threads, Kokkos::HostSpace) + Kokkos::Threads, Kokkos::HostSpace) KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Threads, Kokkos::HostSpace) + Kokkos::Threads, Kokkos::HostSpace) #endif } // namespace Impl } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK -#if 0 // AquiEEP +#if 0 // AquiEEP // MAGMA #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA @@ -207,33 +208,33 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A, } #define KOKKOSLAPACK_GEQRF_MAGMA(SCALAR, LAYOUT, MEM_SPACE) \ - template <> \ + template <> \ struct GEQRF< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ geqrf_eti_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using TWViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using TWViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ - const TWViewType& Tau, const TWViewType& Work) { \ - magmaGeqrfWrapper(space, A, Tau, Work); \ - } \ + const TWViewType& Tau, const TWViewType& Work) { \ + magmaGeqrfWrapper(space, A, Tau, Work); \ + } \ }; KOKKOSLAPACK_GEQRF_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) @@ -339,39 +340,39 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL)); } -#define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ - template <> \ - struct GEQRF< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ - geqrf_eti_spec_avail< \ - Kokkos::Cuda, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using TWViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ - const TWViewType& Tau, const TWViewType& Work) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR \ - "]"); \ - geqrf_print_specialization(); \ - \ - cusolverGeqrfWrapper(space, IPIV, A, B); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GEQRF< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + geqrf_eti_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using TWViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ + const TWViewType& Tau, const TWViewType& Work) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR "]"); \ + geqrf_print_specialization(); \ + \ + cusolverGeqrfWrapper(space, IPIV, A, B); \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) @@ -448,39 +449,39 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); } -#define KOKKOSLAPACK_GEQRF_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ - template <> \ - struct GEQRF< \ - Kokkos::HIP, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, \ - geqrf_eti_spec_avail< \ - Kokkos::HIP, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - Kokkos::View \ + struct GEQRF< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + geqrf_eti_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ - Kokkos::MemoryTraits>>::value> { \ - using AViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - using TWViewType = \ - Kokkos::View, \ - Kokkos::MemoryTraits>; \ - \ - static void geqrf(const Kokkos::HIP& space, const AViewType& A, \ - const TWViewType& Tau, const TWViewType& Work) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]"); \ - geqrf_print_specialization(); \ - \ - rocsolverGeqrfWrapper(space, IPIV, A, B); \ - Kokkos::Profiling::popRegion(); \ - } \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using TWViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void geqrf(const Kokkos::HIP& space, const AViewType& A, \ + const TWViewType& Tau, const TWViewType& Work) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]"); \ + geqrf_print_specialization(); \ + \ + rocsolverGeqrfWrapper(space, IPIV, A, B); \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSLAPACK_GEQRF_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) @@ -494,6 +495,6 @@ KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER -#endif // AquiEEP +#endif // AquiEEP #endif diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index dfcc7566e9..de662365ac 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -17,7 +17,7 @@ // Only enable this test where KokkosLapack supports geqrf: // CUDA+CUSOLVER, HIP+ROCSOLVER and HOST+LAPACK #if (defined(TEST_CUDA_LAPACK_CPP) && \ - defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ + defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ (defined(TEST_HIP_LAPACK_CPP) && \ defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ @@ -38,26 +38,27 @@ namespace Test { template -void getQR( int const m - , int const n - , typename ViewTypeA::HostMirror const & //h_A - , typename ViewTypeTW::HostMirror const & //h_tau - , typename ViewTypeTW::HostMirror const & //h_work - , typename ViewTypeA::HostMirror & //h_Q - , typename ViewTypeA::HostMirror & h_R - , typename ViewTypeA::HostMirror & //h_QR - ) -{ +void getQR(int const m, int const n, + typename ViewTypeA::HostMirror const& // h_A + , + typename ViewTypeTW::HostMirror const& // h_tau + , + typename ViewTypeTW::HostMirror const& // h_work + , + typename ViewTypeA::HostMirror& // h_Q + , + typename ViewTypeA::HostMirror& h_R, + typename ViewTypeA::HostMirror& // h_QR +) { using ScalarA = typename ViewTypeA::value_type; for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { if constexpr (Kokkos::ArithTraits::is_complex) { - h_R(i,j).real() = 0.; - h_R(i,j).imag() = 0.; - } - else { - h_R(i,j) = 0.; + h_R(i, j).real() = 0.; + h_R(i, j).imag() = 0.; + } else { + h_R(i, j) = 0.; } } } @@ -67,21 +68,18 @@ void getQR( int const m for (int i(0); i < m; ++i) { for (int j(0); j < m; ++j) { if constexpr (Kokkos::ArithTraits::is_complex) { - if (i == j) { - h_I(i,j).real() = 1.; - } - else { - h_I(i,j).real() = 0.; - } - h_I(i,j).imag() = 0.; - } - else { - if (i == j) { - h_I(i,j) = 1.; - } - else { - h_I(i,j) = 0.; - } + if (i == j) { + h_I(i, j).real() = 1.; + } else { + h_I(i, j).real() = 0.; + } + h_I(i, j).imag() = 0.; + } else { + if (i == j) { + h_I(i, j) = 1.; + } else { + h_I(i, j) = 0.; + } } } } @@ -91,80 +89,77 @@ template void impl_test_geqrf(int m, int n) { using execution_space = typename Device::execution_space; using ScalarA = typename ViewTypeA::value_type; - //using ats = Kokkos::ArithTraits; + // using ats = Kokkos::ArithTraits; execution_space space{}; Kokkos::Random_XorShift64_Pool rand_pool(13718); - int minMN( std::min(m,n) ); - int lwork( 1 ); + int minMN(std::min(m, n)); + int lwork(1); if (minMN != 0) { lwork = n; } // Create device views - ViewTypeA A ("A", m, n); - ViewTypeTW Tau ("Tau", minMN); + ViewTypeA A("A", m, n); + ViewTypeTW Tau("Tau", minMN); ViewTypeTW Work("Work", lwork); // Create host mirrors of device views. - typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); - typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(A); - typename ViewTypeTW::HostMirror h_tau = Kokkos::create_mirror_view(Tau); - typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror_view(Work); + typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); + typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(A); + typename ViewTypeTW::HostMirror h_tau = Kokkos::create_mirror_view(Tau); + typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror_view(Work); // Initialize data. if ((m == 3) && (n == 3)) { if constexpr (Kokkos::ArithTraits::is_complex) { - h_A(0,0).real() = 12.; - h_A(0,1).real() = -51.; - h_A(0,2).real() = 4.; + h_A(0, 0).real() = 12.; + h_A(0, 1).real() = -51.; + h_A(0, 2).real() = 4.; - h_A(1,0).real() = 6.; - h_A(1,1).real() = 167.; - h_A(1,2).real() = -68.; + h_A(1, 0).real() = 6.; + h_A(1, 1).real() = 167.; + h_A(1, 2).real() = -68.; - h_A(2,0).real() = -4.; - h_A(2,1).real() = 24.; - h_A(2,2).real() = -41.; + h_A(2, 0).real() = -4.; + h_A(2, 1).real() = 24.; + h_A(2, 2).real() = -41.; for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - h_A(i,j).imag() = 0.; - } + h_A(i, j).imag() = 0.; + } } - } - else { - h_A(0,0) = 12.; - h_A(0,1) = -51.; - h_A(0,2) = 4.; - - h_A(1,0) = 6.; - h_A(1,1) = 167.; - h_A(1,2) = -68.; - - h_A(2,0) = -4.; - h_A(2,1) = 24.; - h_A(2,2) = -41.; + } else { + h_A(0, 0) = 12.; + h_A(0, 1) = -51.; + h_A(0, 2) = 4.; + + h_A(1, 0) = 6.; + h_A(1, 1) = 167.; + h_A(1, 2) = -68.; + + h_A(2, 0) = -4.; + h_A(2, 1) = 24.; + h_A(2, 2) = -41.; } Kokkos::deep_copy(A, h_A); - } - else { - Kokkos::fill_random( A - , rand_pool - , Kokkos::rand, ScalarA>::max() - ); + } else { + Kokkos::fill_random(A, rand_pool, + Kokkos::rand, + ScalarA>::max()); Kokkos::deep_copy(h_A, A); } Kokkos::deep_copy(h_Aorig, h_A); -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - std::cout << "A(" << i << "," << j << ") = " << h_A(i,j) << std::endl; + std::cout << "A(" << i << "," << j << ") = " << h_A(i, j) << std::endl; } } #endif @@ -175,9 +170,9 @@ void impl_test_geqrf(int m, int n) { int rc(0); try { rc = KokkosLapack::geqrf(space, A, Tau, Work); - } - catch (const std::runtime_error & e) { - std::cout << "KokkosLapack::geqrf(): caught exception '" << e.what() << "'" << std::endl; + } catch (const std::runtime_error& e) { + std::cout << "KokkosLapack::geqrf(): caught exception '" << e.what() << "'" + << std::endl; FAIL(); return; } @@ -191,11 +186,11 @@ void impl_test_geqrf(int m, int n) { Kokkos::deep_copy(h_tau, Tau); Kokkos::deep_copy(h_work, Work); -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG std::cout << "rc = " << rc << std::endl; for (int i(0); i < minMN; ++i) { for (int j(0); j < n; ++j) { - std::cout << "R(" << i << "," << j << ") = " << h_A(i,j) << std::endl; + std::cout << "R(" << i << "," << j << ") = " << h_A(i, j) << std::endl; } } for (int i(0); i < minMN; ++i) { @@ -206,8 +201,8 @@ void impl_test_geqrf(int m, int n) { } #endif - ViewTypeA Q ("Q", m, m); - ViewTypeA R ("R", m, n); + ViewTypeA Q("Q", m, m); + ViewTypeA R("R", m, n); ViewTypeA QR("QR", m, n); typename ViewTypeA::HostMirror h_Q = Kokkos::create_mirror_view(Q); @@ -216,20 +211,20 @@ void impl_test_geqrf(int m, int n) { getQR(m, n, h_A, h_tau, h_work, h_Q, h_R, h_QR); -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < m; ++j) { - std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl; + std::cout << "Q(" << i << "," << j << ") = " << h_Q(i, j) << std::endl; } } for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - std::cout << "R(" << i << "," << j << ") = " << h_R(i,j) << std::endl; + std::cout << "R(" << i << "," << j << ") = " << h_R(i, j) << std::endl; } } for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - std::cout << "QR(" << i << "," << j << ") = " << h_QR(i,j) << std::endl; + std::cout << "QR(" << i << "," << j << ") = " << h_QR(i, j) << std::endl; } } #endif @@ -258,9 +253,9 @@ void impl_test_geqrf(int m, int n) { // ); // Checking vs ref on CPU, this eps is about 10^-9 - //typedef typename ats::mag_type mag_type; - //const mag_type eps = 3.0e7 * ats::epsilon(); - bool test_flag = true; + // typedef typename ats::mag_type mag_type; + // const mag_type eps = 3.0e7 * ats::epsilon(); + bool test_flag = true; for (int i = 0; i < n; i++) { #if 0 if (ats::abs(h_B(i) - h_X0(i)) > eps) { @@ -284,7 +279,7 @@ void test_geqrf() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - using view_type_a_ll = Kokkos::View; + using view_type_a_ll = Kokkos::View; using view_type_tw_ll = Kokkos::View; Test::impl_test_geqrf(3, 3); From 05c8b958e2cce1352136ea05dcac3540d2348334 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Mon, 20 May 2024 10:29:58 -0600 Subject: [PATCH 08/27] Backup --- lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index 7fbc5ff391..739517ae15 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -464,7 +464,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, Kokkos::Device, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>>::value> { \ using AViewType = \ Kokkos::View, \ From b9fb93d3d47e4e1427634f271ef64c47f913316b Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Mon, 20 May 2024 10:39:27 -0600 Subject: [PATCH 09/27] Formatting --- lapack/tpls/KokkosLapack_Host_tpl.cpp | 64 +++++++++++++++------------ 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp index 89085619e8..9d751f75b6 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.cpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -87,15 +87,16 @@ void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, /// Geqrf /// -void F77_BLAS_MANGLE(sgeqrf, SGEQRF)(int*, int*, float*, int*, float*, float*, int*, - int*); -void F77_BLAS_MANGLE(dgeqrf, DGEQRF)(int*, int*, double*, int*, double*, double*, - int*, int*); -void F77_BLAS_MANGLE(cgeqrf, CGEQRF)(int*, int*, std::complex*, int*, std::complex*, - std::complex*, int*, int*); +void F77_BLAS_MANGLE(sgeqrf, SGEQRF)(int*, int*, float*, int*, float*, float*, + int*, int*); +void F77_BLAS_MANGLE(dgeqrf, DGEQRF)(int*, int*, double*, int*, double*, + double*, int*, int*); +void F77_BLAS_MANGLE(cgeqrf, CGEQRF)(int*, int*, std::complex*, int*, + std::complex*, std::complex*, + int*, int*); void F77_BLAS_MANGLE(zgeqrf, ZGEQRF)(int*, int*, std::complex*, int*, - std::complex*, std::complex*, int*, int*); - + std::complex*, + std::complex*, int*, int*); } #define F77_FUNC_SGESV F77_BLAS_MANGLE(sgesv, SGESV) @@ -192,14 +193,14 @@ int HostLapack::geqrf(int m, int n, double* a, int lda, double* tau, /// template <> -void HostLapack >::gesv(int n, int rhs, - std::complex* a, int lda, - int* ipiv, std::complex* b, - int ldb, int info) { +void HostLapack>::gesv(int n, int rhs, + std::complex* a, int lda, + int* ipiv, std::complex* b, + int ldb, int info) { F77_FUNC_CGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> -void HostLapack >::gesvd( +void HostLapack>::gesvd( const char jobu, const char jobvt, const int m, const int n, std::complex* a, const int lda, float* s, std::complex* u, const int ldu, std::complex* vt, const int ldvt, @@ -208,16 +209,18 @@ void HostLapack >::gesvd( &lwork, rwork, &info); } template <> -int HostLapack >::trtri(const char uplo, const char diag, - int n, const std::complex* a, - int lda) { +int HostLapack>::trtri(const char uplo, const char diag, + int n, const std::complex* a, + int lda) { int info = 0; F77_FUNC_CTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } template <> -int HostLapack>::geqrf(int m, int n, std::complex* a, int lda, std::complex* tau, - std::complex* work, int lwork) { +int HostLapack>::geqrf(int m, int n, std::complex* a, + int lda, std::complex* tau, + std::complex* work, + int lwork) { int info = 0; F77_FUNC_CGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info); return info; @@ -228,14 +231,14 @@ int HostLapack>::geqrf(int m, int n, std::complex* a, /// template <> -void HostLapack >::gesv(int n, int rhs, - std::complex* a, int lda, - int* ipiv, std::complex* b, - int ldb, int info) { +void HostLapack>::gesv(int n, int rhs, + std::complex* a, int lda, + int* ipiv, std::complex* b, + int ldb, int info) { F77_FUNC_ZGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> -void HostLapack >::gesvd( +void HostLapack>::gesvd( const char jobu, const char jobvt, const int m, const int n, std::complex* a, const int lda, double* s, std::complex* u, const int ldu, std::complex* vt, const int ldvt, @@ -244,17 +247,20 @@ void HostLapack >::gesvd( &lwork, rwork, &info); } template <> -int HostLapack >::trtri(const char uplo, const char diag, - int n, - const std::complex* a, - int lda) { +int HostLapack>::trtri(const char uplo, const char diag, + int n, + const std::complex* a, + int lda) { int info = 0; F77_FUNC_ZTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; } template <> -int HostLapack>::geqrf(int m, int n, std::complex* a, int lda, std::complex* tau, - std::complex* work, int lwork) { +int HostLapack>::geqrf(int m, int n, + std::complex* a, int lda, + std::complex* tau, + std::complex* work, + int lwork) { int info = 0; F77_FUNC_ZGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info); return info; From 11221186a36df4a8051c97b55e2e460b0b7c0f95 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Mon, 20 May 2024 11:20:26 -0600 Subject: [PATCH 10/27] Formatting --- .../geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in | 4 ++-- .../KokkosLapack_geqrf_eti_spec_avail.hpp.in | 4 ++-- lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in index 2015898d13..4f4ad91cb6 100644 --- a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in +++ b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in @@ -20,6 +20,6 @@ namespace KokkosLapack { namespace Impl { -@LAPACK_GEQRF_ETI_INST_BLOCK @ -} // namespace Impl +@LAPACK_GEQRF_ETI_INST_BLOCK@ + } // namespace Impl } // namespace KokkosLapack diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in index 2726dddd80..899a8b7604 100644 --- a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in @@ -18,7 +18,7 @@ #define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_ namespace KokkosLapack { namespace Impl { -@LAPACK_GEQRF_ETI_AVAIL_BLOCK @ -} // namespace Impl +@LAPACK_GEQRF_ETI_AVAIL_BLOCK@ + } // namespace Impl } // namespace KokkosLapack #endif diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index 739517ae15..4e040cc358 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -464,7 +464,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, Kokkos::Device, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>>::value> { \ using AViewType = \ Kokkos::View, \ From f578072005288625bc5337c8a6a5cf2c8048c721 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Wed, 22 May 2024 22:12:00 -0600 Subject: [PATCH 11/27] Backup --- lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp index 8a1fcf618d..f291bbe2a8 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp @@ -94,6 +94,8 @@ namespace Impl { Kokkos::View, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View>> { \ enum : bool { value = true }; \ }; @@ -139,6 +141,8 @@ namespace Impl { Kokkos::View, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View>> { \ enum : bool { value = true }; \ }; From e953547eebeb0c990f291e57f5ad2aceb3a13e00 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Wed, 22 May 2024 22:13:52 -0600 Subject: [PATCH 12/27] Backup --- lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index 4e040cc358..056eef24da 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -17,8 +17,6 @@ #ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_ #define KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_ -// AquiEEP - namespace KokkosLapack { namespace Impl { template @@ -248,6 +246,8 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA +#endif // AquiEEP + // CUSOLVER #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER #include "KokkosLapack_cusolver.hpp" @@ -395,6 +395,8 @@ KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#if 0 // AquiEEP + // ROCSOLVER #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER #include From 1459f5e20417f8c3e4d2872c3ea1a4665908923f Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Thu, 23 May 2024 01:29:58 -0600 Subject: [PATCH 13/27] Backup --- lapack/impl/KokkosLapack_geqrf_spec.hpp | 10 +- lapack/src/KokkosLapack_geqrf.hpp | 42 +++--- .../KokkosLapack_geqrf_tpl_spec_avail.hpp | 6 +- .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 139 +++++++++--------- 4 files changed, 100 insertions(+), 97 deletions(-) diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp index 6970c6dd2c..5410520c1c 100644 --- a/lapack/impl/KokkosLapack_geqrf_spec.hpp +++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp @@ -53,7 +53,8 @@ struct geqrf_eti_spec_avail { Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>> { \ enum : bool { value = true }; \ }; @@ -78,7 +79,6 @@ struct GEQRF { }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY -//! Full specialization of geqrf for multi vectors. // Unification layer template struct GEQRF, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>, \ false, true>; @@ -128,7 +129,8 @@ struct GEQRF, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>, \ false, true>; diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index 506d3c60b7..1d26747cd3 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -15,7 +15,7 @@ //@HEADER /// \file KokkosLapack_geqrf.hpp -/// \brief Local dense linear solve +/// \brief QR factorization /// /// This file provides KokkosLapack::geqrf. This function performs a /// local (no MPI) QR factorization of a M-by-N matrix A. @@ -118,31 +118,33 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, } } - typedef Kokkos::View< + using RetArray = Kokkos::View; + RetArray rc("rc", 1); + + using AMatrix_Internal = Kokkos::View< typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, - typename AMatrix::device_type, Kokkos::MemoryTraits > - AMatrix_Internal; - typedef Kokkos::View< + typename AMatrix::device_type, Kokkos::MemoryTraits>; + using TWArray_Internal = Kokkos::View< typename TWArray::non_const_value_type*, typename TWArray::array_layout, - typename TWArray::device_type, Kokkos::MemoryTraits > - TWArray_Internal; - AMatrix_Internal A_i = A; - TWArray_Internal Tau_i = Tau; - TWArray_Internal Work_i = Work; - - // This is the return value type and should always reside on host - using RViewInternalType = - Kokkos::View >; + typename TWArray::device_type, Kokkos::MemoryTraits>; + using RetArray_Internal = Kokkos::View< + int*, typename TWArray::array_layout, + typename TWArray::device_type, Kokkos::MemoryTraits>; - int result; - RViewInternalType R = RViewInternalType(&result); + AMatrix_Internal A_i = A; + TWArray_Internal Tau_i = Tau; + TWArray_Internal Work_i = Work; + RetArray_Internal rc_i = rc; KokkosLapack::Impl::GEQRF::geqrf(space, A_i, Tau_i, Work_i, - R); + RetArray_Internal>::geqrf(space, A_i, Tau_i, Work_i, + rc_i); + + typename RetArray_Internal::HostMirror h_rc = Kokkos::create_mirror_view(rc_i); + + Kokkos::deep_copy(h_rc, rc_i); - return result; + return h_rc[0]; } /// \brief Computes a QR factorization of a matrix A diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp index f291bbe2a8..cc6f1e78a4 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp @@ -36,7 +36,7 @@ struct geqrf_tpl_spec_avail { Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>> { \ enum : bool { value = true }; \ }; @@ -95,7 +95,7 @@ namespace Impl { Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>> { \ enum : bool { value = true }; \ }; @@ -142,7 +142,7 @@ namespace Impl { Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>> { \ enum : bool { value = true }; \ }; diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index 056eef24da..fe25ce19a0 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -54,21 +54,21 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau, using ALayout_t = typename AViewType::array_layout; static_assert(std::is_same_v, "KokkosLapack - geqrf: A needs to have a Kokkos::LayoutLeft"); - const int M = A.extent_int(0); - const int N = A.extent_int(1); - const int LDA = A.stride(1); - const int LWORK = static_cast(Work.extent(0)); + const int m = A.extent_int(0); + const int n = A.extent_int(1); + const int lda = A.stride(1); + const int lwork = static_cast(Work.extent(0)); if constexpr (Kokkos::ArithTraits::is_complex) { using MagType = typename Kokkos::ArithTraits::mag_type; - R() = HostLapack>::geqrf( - M, N, reinterpret_cast*>(A.data()), LDA, + R[0] = HostLapack>::geqrf( + m, n, reinterpret_cast*>(A.data()), lda, reinterpret_cast*>(Tau.data()), - reinterpret_cast*>(Work.data()), LWORK); + reinterpret_cast*>(Work.data()), lwork); } else { - R() = HostLapack::geqrf(M, N, A.data(), LDA, Tau.data(), - Work.data(), LWORK); + R[0] = HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), + Work.data(), lwork); } } @@ -80,7 +80,7 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau, Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>, \ true, \ geqrf_eti_spec_avail< \ @@ -89,7 +89,7 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau, Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>>::value> { \ using AViewType = \ Kokkos::View, \ @@ -97,7 +97,7 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau, using TWViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ - using RType = Kokkos::View, \ Kokkos::MemoryTraits>; \ \ static void geqrf(const EXECSPACE& /* space */, const AViewType& A, \ @@ -255,87 +255,68 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template -void cusolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, - const AViewType& A, const TWViewType& Tau) { +template +void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, + const TWViewType& /* Work */, const TWViewType& Tau, + class RType& R) { + using memory_space = typename AViewType::memory_space; - using Scalar = typename TWViewType::non_const_value_type; - using ALayout_t = typename AViewType::array_layout; - using BLayout_t = typename TWViewType::array_layout; + using Scalar = typename AViewType::non_const_value_type; + using ALayout_t = typename AViewType::array_layout; + static_assert(std::is_same_v, + "KokkosLapack - cusolver geqrf: A needs to have a Kokkos::LayoutLeft"); const int m = A.extent_int(0); const int n = A.extent_int(1); - const int lda = std::is_same_v ? A.stride(0) - : A.stride(1); - - (void)B; - - const int nrhs = B.extent_int(1); - const int ldb = std::is_same_v ? B.stride(0) - : B.stride(1); + const int lda = A.stride(1); int lwork = 0; - Kokkos::View info("getrf info"); + + //Kokkos::View info("cusolver geqrf info"); CudaLapackSingleton& s = CudaLapackSingleton::singleton(); KOKKOS_CUSOLVER_SAFE_CALL_IMPL( cusolverDnSetStream(s.handle, space.cuda_stream())); if constexpr (std::is_same_v) { KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnSgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); - Kokkos::View Workspace("getrf workspace", lwork); - - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgetrf(s.handle, m, n, A.data(), - lda, Workspace.data(), - IPIV.data(), info.data())); + cusolverDnSgeqrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); + Kokkos::View Workspace("cusolver sgeqrf workspace", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnSgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda, - IPIV.data(), B.data(), ldb, info.data())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgeqrf(s.handle, m, n, A.data(), + lda, Tau.data(), + Workspace.data(), lwork, /*info*/R.data())); } if constexpr (std::is_same_v) { KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnDgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); - Kokkos::View Workspace("getrf workspace", lwork); - - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgetrf(s.handle, m, n, A.data(), - lda, Workspace.data(), - IPIV.data(), info.data())); + cusolverDnDgeqrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); + Kokkos::View Workspace("cusolver dgeqrf workspace", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnDgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda, - IPIV.data(), B.data(), ldb, info.data())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgeqrf(s.handle, m, n, A.data(), + lda, Tau.data(), + Workspace.data(), lwork, /*info*/R.data())); } if constexpr (std::is_same_v>) { - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrf_bufferSize( + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf_bufferSize( s.handle, m, n, reinterpret_cast(A.data()), lda, &lwork)); - Kokkos::View Workspace("getrf workspace", lwork); + Kokkos::View Workspace("cusolver cgeqrf workspace", lwork); KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnCgetrf(s.handle, m, n, reinterpret_cast(A.data()), - lda, reinterpret_cast(Workspace.data()), - IPIV.data(), info.data())); - - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrs( - s.handle, CUBLAS_OP_N, m, nrhs, reinterpret_cast(A.data()), - lda, IPIV.data(), reinterpret_cast(B.data()), ldb, - info.data())); + cusolverDnCgeqrf(s.handle, m, n, reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()), + reinterpret_cast(Workspace.data()), + lwork, /*info*/R.data())); } if constexpr (std::is_same_v>) { - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrf_bufferSize( + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf_bufferSize( s.handle, m, n, reinterpret_cast(A.data()), lda, &lwork)); - Kokkos::View Workspace("getrf workspace", + Kokkos::View Workspace("cusolver zgeqrf workspace", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrf( + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf( s.handle, m, n, reinterpret_cast(A.data()), lda, - reinterpret_cast(Workspace.data()), IPIV.data(), - info.data())); - - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrs( - s.handle, CUBLAS_OP_N, m, nrhs, - reinterpret_cast(A.data()), lda, IPIV.data(), - reinterpret_cast(B.data()), ldb, info.data())); + reinterpret_cast(Tau.data()), + reinterpret_cast(Workspace.data()), + lwork, /*info*/R.data())); } KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL)); } @@ -348,6 +329,8 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ true, \ geqrf_eti_spec_avail< \ Kokkos::Cuda, \ @@ -355,6 +338,9 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, Kokkos::Device, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ Kokkos::MemoryTraits>>::value> { \ using AViewType = Kokkos::View>; \ using TWViewType = \ Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using RType = \ + Kokkos::View, \ Kokkos::MemoryTraits>; \ \ static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ - const TWViewType& Tau, const TWViewType& Work) { \ + const TWViewType& Tau, const TWViewType& Work, \ + const RType& R) { \ Kokkos::Profiling::pushRegion( \ "KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR "]"); \ geqrf_print_specialization(); \ \ - cusolverGeqrfWrapper(space, IPIV, A, B); \ + cusolverGeqrfWrapper(space, A, Tau, Work, R); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -420,7 +410,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, const rocblas_int ldb = std::is_same_v ? B.stride(0) : B.stride(1); - Kokkos::View info("rocsolver info"); + Kokkos::View info("rocsolver geqrf info"); KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); @@ -459,6 +449,8 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ + Kokkos::View>, \ true, \ geqrf_eti_spec_avail< \ Kokkos::HIP, \ @@ -467,21 +459,28 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ Kokkos::MemoryTraits>>::value> { \ using AViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ using TWViewType = \ Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using RType = \ + Kokkos::View, \ Kokkos::MemoryTraits>; \ \ static void geqrf(const Kokkos::HIP& space, const AViewType& A, \ - const TWViewType& Tau, const TWViewType& Work) { \ + const TWViewType& Tau, const TWViewType& Work, \ + const RType& R) { \ Kokkos::Profiling::pushRegion( \ "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]"); \ geqrf_print_specialization(); \ \ - rocsolverGeqrfWrapper(space, IPIV, A, B); \ + rocsolverGeqrfWrapper(space, A, Tau, Work, R); \ Kokkos::Profiling::popRegion(); \ } \ }; From ad08d09d701ab3f02a5e970eabd872464dd929a1 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Thu, 23 May 2024 02:41:11 -0600 Subject: [PATCH 14/27] Backup --- lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index fe25ce19a0..8841440a04 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -257,8 +257,8 @@ namespace Impl { template void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, - const TWViewType& /* Work */, const TWViewType& Tau, - class RType& R) { + const TWViewType& Tau, const TWViewType& /* Work */, + const RType& R) { using memory_space = typename AViewType::memory_space; using Scalar = typename AViewType::non_const_value_type; @@ -271,7 +271,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const int lda = A.stride(1); int lwork = 0; - //Kokkos::View info("cusolver geqrf info"); + //Kokkos::View info("cusolver geqrf info"); // AquiEEP CudaLapackSingleton& s = CudaLapackSingleton::singleton(); KOKKOS_CUSOLVER_SAFE_CALL_IMPL( @@ -283,7 +283,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgeqrf(s.handle, m, n, A.data(), lda, Tau.data(), - Workspace.data(), lwork, /*info*/R.data())); + Workspace.data(), lwork, R.data())); } if constexpr (std::is_same_v) { KOKKOS_CUSOLVER_SAFE_CALL_IMPL( @@ -292,7 +292,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgeqrf(s.handle, m, n, A.data(), lda, Tau.data(), - Workspace.data(), lwork, /*info*/R.data())); + Workspace.data(), lwork, R.data())); } if constexpr (std::is_same_v>) { KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf_bufferSize( @@ -303,7 +303,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, cusolverDnCgeqrf(s.handle, m, n, reinterpret_cast(A.data()), lda, reinterpret_cast(Tau.data()), reinterpret_cast(Workspace.data()), - lwork, /*info*/R.data())); + lwork, R.data())); } if constexpr (std::is_same_v>) { KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf_bufferSize( @@ -316,9 +316,11 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, s.handle, m, n, reinterpret_cast(A.data()), lda, reinterpret_cast(Tau.data()), reinterpret_cast(Workspace.data()), - lwork, /*info*/R.data())); + lwork, R.data())); } KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL)); + + //Kokkos::deep_copy(R, info); // AquiEEP } #define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ From 399a18f0258c48e3785e7e946dfba879dd7302d0 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Fri, 24 May 2024 01:06:02 -0600 Subject: [PATCH 15/27] Backup --- lapack/src/KokkosLapack_geqrf.hpp | 135 ++++++++---------- .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 131 +++++++++-------- lapack/unit_test/Test_Lapack_geqrf.hpp | 47 +++--- 3 files changed, 149 insertions(+), 164 deletions(-) diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index 1d26747cd3..7a81818d79 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -34,7 +34,8 @@ namespace KokkosLapack { /// /// \tparam ExecutionSpace The space where the kernel will run. /// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. -/// \tparam TWArray Type of arrays Tau and Work, as a 1-D Kokkos::View. +/// \tparam TArray Type of array Tau, as a 1-D Kokkos::View. +/// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View. /// /// \param space [in] Execution space instance used to specified how to execute /// the geqrf kernels. @@ -51,21 +52,15 @@ namespace KokkosLapack { /// where tau is a complex scalar, and v is a complex vector /// with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on /// exit in A(i+1:M,i), and tau in Tau(i). -/// \param Tau [out] One-dimensional array of size min(M,N) that contains -/// the scalar factors of the elementary reflectors. -/// \param Work [out] One-dimensional array of size max(1,LWORK). -/// If min(M,N) == 0, then LWORK must be >= 1. -/// If min(M,N) != 0, then LWORK must be >= N. -/// If the QR factorization is successful, then the first -/// position of Work contains the optimal LWORK. +/// \param Tau [out] One-dimensional array of size min(M,N) that contains the +/// scalar factors of the elementary reflectors. +/// \param Info [out] One-dimensional array of integers and of size 1: +/// Info[0] = 0: successfull exit +/// Info[0] < 0: if equal to '-i', the i-th argument had an +/// illegal value /// -/// \return = 0: successfull exit -/// < 0: if equal to '-i', the i-th argument had an illegal -/// value -/// -template -int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, - const TWArray& Work) { +template +void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, const InfoArray& Info) { // NOTE: Currently, KokkosLapack::geqrf only supports LAPACK, MAGMA and // rocSOLVER TPLs. // MAGMA/rocSOLVER TPL should be enabled to call the MAGMA/rocSOLVER GPU @@ -77,21 +72,32 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, typename AMatrix::memory_space>::accessible); static_assert( Kokkos::SpaceAccessibility::accessible); + typename TArray::memory_space>::accessible); + static_assert( + Kokkos::SpaceAccessibility::accessible); static_assert(Kokkos::is_view::value, "KokkosLapack::geqrf: A must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, - "KokkosLapack::geqrf: Tau and Work must be Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosLapack::geqrf: Tau must be Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosLapack::geqrf: Info must be Kokkos::View."); + static_assert(static_cast(AMatrix::rank) == 2, "KokkosLapack::geqrf: A must have rank 2."); - static_assert(static_cast(TWArray::rank) == 1, - "KokkosLapack::geqrf: Tau and Work must have rank 1."); + static_assert(static_cast(TArray::rank) == 1, + "KokkosLapack::geqrf: Tau must have rank 1."); + static_assert(static_cast(InfoArray::rank) == 1, + "KokkosLapack::geqrf: Info must have rank 1."); + + static_assert(std::is_same_v, + "KokkosLapack::geqrf: Info must be an array of integers."); int64_t m = A.extent(0); int64_t n = A.extent(1); int64_t tau0 = Tau.extent(0); - int64_t work0 = Work.extent(0); + int64_t info0 = Info.extent(0); // Check validity of dimensions if (tau0 != std::min(m, n)) { @@ -100,57 +106,37 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, << " A: " << m << " x " << n << ", Tau length = " << tau0; KokkosKernels::Impl::throw_runtime_exception(os.str()); } - if ((m == 0) || (n == 0)) { - if (work0 < 1) { - std::ostringstream os; - os << "KokkosLapack::geqrf: In case min(m,n) == 0, then Work must have " - "length >= 1: " - << " A: " << m << " x " << n << ", Work length = " << work0; - KokkosKernels::Impl::throw_runtime_exception(os.str()); - } - } else { - if (work0 < n) { - std::ostringstream os; - os << "KokkosLapack::geqrf: In case min(m,n) != 0, then Work must have " - "length >= n: " - << " A: " << m << " x " << n << ", Work length = " << work0; - KokkosKernels::Impl::throw_runtime_exception(os.str()); - } - } - using RetArray = Kokkos::View; - RetArray rc("rc", 1); + if (info0 == 0) { + std::ostringstream os; + os << "KokkosLapack::geqrf: length of Info must be at least 1: " + << " A: " << m << " x " << n << ", Info length = " << info0; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } using AMatrix_Internal = Kokkos::View< typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, typename AMatrix::device_type, Kokkos::MemoryTraits>; - using TWArray_Internal = Kokkos::View< - typename TWArray::non_const_value_type*, typename TWArray::array_layout, - typename TWArray::device_type, Kokkos::MemoryTraits>; - using RetArray_Internal = Kokkos::View< - int*, typename TWArray::array_layout, - typename TWArray::device_type, Kokkos::MemoryTraits>; - - AMatrix_Internal A_i = A; - TWArray_Internal Tau_i = Tau; - TWArray_Internal Work_i = Work; - RetArray_Internal rc_i = rc; - - KokkosLapack::Impl::GEQRF::geqrf(space, A_i, Tau_i, Work_i, - rc_i); - - typename RetArray_Internal::HostMirror h_rc = Kokkos::create_mirror_view(rc_i); - - Kokkos::deep_copy(h_rc, rc_i); - - return h_rc[0]; + using TArray_Internal = Kokkos::View< + typename TArray::non_const_value_type*, typename TArray::array_layout, + typename TArray::device_type, Kokkos::MemoryTraits>; + using InfoArray_Internal = Kokkos::View< + typename InfoArray::non_const_value_type*, typename InfoArray::array_layout, + typename InfoArray::device_type, Kokkos::MemoryTraits>; + + AMatrix_Internal A_i = A; + TArray_Internal Tau_i = Tau; + InfoArray_Internal Info_i = Info; + + KokkosLapack::Impl::GEQRF::geqrf(space, A_i, Tau_i, Info_i); } /// \brief Computes a QR factorization of a matrix A /// -/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. -/// \tparam TWArray Type of arrays Tau and Work, as a 1-D Kokkos::View. +/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. +/// \tparam TArray Type of array Tau, as a 1-D Kokkos::View. +/// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View. /// /// \param A [in,out] On entry, the M-by-N matrix to be factorized. /// On exit, the elements on and above the diagonal contain @@ -165,22 +151,17 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, /// where tau is a complex scalar, and v is a complex vector /// with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on /// exit in A(i+1:M,i), and tau in Tau(i). -/// \param Tau [out] One-dimensional array of size min(M,N) that contains -/// the scalar factors of the elementary reflectors. -/// \param Work [out] One-dimensional array of size max(1,LWORK). -/// If min(M,N) == 0, then LWORK must be >= 1. -/// If min(M,N) != 0, then LWORK must be >= N. -/// If the QR factorization is successful, then the first -/// position of Work contains the optimal LWORK. -/// -/// \return = 0: successfull exit -/// < 0: if equal to '-i', the i-th argument had an illegal -/// value +/// \param Tau [out] One-dimensional array of size min(M,N) that contains the +/// scalar factors of the elementary reflectors. +/// \param Info [out] One-dimensional array of integers and of size 1: +/// Info[0] = 0: successfull exit +/// Info[0] < 0: if equal to '-i', the i-th argument had an +/// illegal value /// -template -int geqrf(const AMatrix& A, const TWArray& Tau, const TWArray& Work) { +template +void geqrf(const AMatrix& A, const TArray& Tau, const InfoArray& Info) { typename AMatrix::execution_space space{}; - return geqrf(space, A, Tau, Work); + geqrf(space, A, Tau, Info); } } // namespace KokkosLapack diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index 8841440a04..b10edbdac6 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -19,19 +19,19 @@ namespace KokkosLapack { namespace Impl { -template +template inline void geqrf_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA printf("KokkosLapack::geqrf<> TPL MAGMA specialization for < %s , %s, %s >\n", - typeid(AViewType).name(), typeid(TWViewType).name(), - typeid(RType).name()); + typeid(AViewType).name(), typeid(TauViewType).name(), + typeid(InfoViewType).name()); #else #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK printf( "KokkosLapack::geqrf<> TPL Lapack specialization for < %s , %s, %s >\n", - typeid(AViewType).name(), typeid(TWViewType).name(), - typeid(RType).name()); + typeid(AViewType).name(), typeid(TauViewType).name(), + typeid(InfoViewType).name()); #endif #endif #endif @@ -46,29 +46,51 @@ inline void geqrf_print_specialization() { namespace KokkosLapack { namespace Impl { -template -void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau, - const TWViewType& Work, const RType& R) { - using Scalar = typename AViewType::non_const_value_type; - - using ALayout_t = typename AViewType::array_layout; +template +void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau, + const InfoViewType& Info) { + using memory_space = typename AViewType::memory_space; + using Scalar = typename AViewType::non_const_value_type; + using ALayout_t = typename AViewType::array_layout; static_assert(std::is_same_v, "KokkosLapack - geqrf: A needs to have a Kokkos::LayoutLeft"); - const int m = A.extent_int(0); - const int n = A.extent_int(1); - const int lda = A.stride(1); - const int lwork = static_cast(Work.extent(0)); + const int m = A.extent_int(0); + const int n = A.extent_int(1); + const int lda = A.stride(1); + + int lwork = -1; + Kokkos::View work("geqrf work buffer", 1); if constexpr (Kokkos::ArithTraits::is_complex) { using MagType = typename Kokkos::ArithTraits::mag_type; - R[0] = HostLapack>::geqrf( + Info[0] = HostLapack>::geqrf( m, n, reinterpret_cast*>(A.data()), lda, reinterpret_cast*>(Tau.data()), - reinterpret_cast*>(Work.data()), lwork); + reinterpret_cast*>(work.data()), lwork); + + if (Info[0] < 0) return; + + lwork = static_cast(work(0).real()); + + work = Kokkos::View("geqrf work buffer", lwork); + + Info[0] = HostLapack>::geqrf( + m, n, reinterpret_cast*>(A.data()), lda, + reinterpret_cast*>(Tau.data()), + reinterpret_cast*>(work.data()), lwork); } else { - R[0] = HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), - Work.data(), lwork); + Info[0] = HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), + work.data(), lwork); + + if (Info[0] < 0) return; + + lwork = static_cast(work(0)); + + work = Kokkos::View("geqrf work buffer", lwork); + + Info[0] = HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), + work.data(), lwork); } } @@ -94,19 +116,18 @@ void lapackGeqrfWrapper(const AViewType& A, const TWViewType& Tau, using AViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ - using TWViewType = \ + using TauViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ - using RType = Kokkos::View, \ + using InfoViewType = Kokkos::View, \ Kokkos::MemoryTraits>; \ \ static void geqrf(const EXECSPACE& /* space */, const AViewType& A, \ - const TWViewType& Tau, const TWViewType& Work, \ - const RType& R) { \ + const TauViewType& Tau, const InfoViewType& Info) { \ Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK," #SCALAR \ "]"); \ - geqrf_print_specialization(); \ - lapackGeqrfWrapper(A, Tau, Work, R); \ + geqrf_print_specialization(); \ + lapackGeqrfWrapper(A, Tau, Info); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -157,14 +178,14 @@ KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template +template void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A, - const TWViewType& Tau, const TWViewType& Work) { + const TauViewType& Tau) { using scalar_type = typename AViewType::non_const_value_type; Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_MAGMA," + Kokkos::ArithTraits::name() + "]"); - geqrf_print_specialization(); + geqrf_print_specialization(); magma_int_t N = static_cast(A.extent(1)); magma_int_t AST = static_cast(A.stride(1)); @@ -225,13 +246,13 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A, using AViewType = Kokkos::View, \ Kokkos::MemoryTraits>; \ - using TWViewType = \ + using TauViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ \ static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ - const TWViewType& Tau, const TWViewType& Work) { \ - magmaGeqrfWrapper(space, A, Tau, Work); \ + const TauViewType& Tau) { \ + magmaGeqrfWrapper(space, A, Tau); \ } \ }; @@ -255,10 +276,9 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template +template void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, - const TWViewType& Tau, const TWViewType& /* Work */, - const RType& R) { + const TauViewType& Tau, const InfoViewType& Info) { using memory_space = typename AViewType::memory_space; using Scalar = typename AViewType::non_const_value_type; @@ -271,8 +291,6 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const int lda = A.stride(1); int lwork = 0; - //Kokkos::View info("cusolver geqrf info"); // AquiEEP - CudaLapackSingleton& s = CudaLapackSingleton::singleton(); KOKKOS_CUSOLVER_SAFE_CALL_IMPL( cusolverDnSetStream(s.handle, space.cuda_stream())); @@ -283,7 +301,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgeqrf(s.handle, m, n, A.data(), lda, Tau.data(), - Workspace.data(), lwork, R.data())); + Workspace.data(), lwork, Info.data())); } if constexpr (std::is_same_v) { KOKKOS_CUSOLVER_SAFE_CALL_IMPL( @@ -292,7 +310,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgeqrf(s.handle, m, n, A.data(), lda, Tau.data(), - Workspace.data(), lwork, R.data())); + Workspace.data(), lwork, Info.data())); } if constexpr (std::is_same_v>) { KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf_bufferSize( @@ -303,7 +321,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, cusolverDnCgeqrf(s.handle, m, n, reinterpret_cast(A.data()), lda, reinterpret_cast(Tau.data()), reinterpret_cast(Workspace.data()), - lwork, R.data())); + lwork, Info.data())); } if constexpr (std::is_same_v>) { KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf_bufferSize( @@ -316,11 +334,9 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, s.handle, m, n, reinterpret_cast(A.data()), lda, reinterpret_cast(Tau.data()), reinterpret_cast(Workspace.data()), - lwork, R.data())); + lwork, Info.data())); } KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL)); - - //Kokkos::deep_copy(R, info); // AquiEEP } #define KOKKOSLAPACK_GEQRF_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ @@ -348,21 +364,20 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, using AViewType = Kokkos::View, \ Kokkos::MemoryTraits>; \ - using TWViewType = \ + using TauViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ - using RType = \ + using InfoViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ \ static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ - const TWViewType& Tau, const TWViewType& Work, \ - const RType& R) { \ + const TauViewType& Tau, const InfoViewType& Info) { \ Kokkos::Profiling::pushRegion( \ "KokkosLapack::geqrf[TPL_CUSOLVER," #SCALAR "]"); \ - geqrf_print_specialization(); \ + geqrf_print_specialization(); \ \ - cusolverGeqrfWrapper(space, A, Tau, Work, R); \ + cusolverGeqrfWrapper(space, A, Tau, Info); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -397,12 +412,11 @@ KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template -void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, - const AViewType& A, const TWViewType& Tau) { - using Scalar = typename TWViewType::non_const_value_type; +template +void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const TauViewType& Tau) { + using Scalar = typename TauViewType::non_const_value_type; using ALayout_t = typename AViewType::array_layout; - using BLayout_t = typename TWViewType::array_layout; + using BLayout_t = typename TauViewType::array_layout; const rocblas_int N = static_cast(A.extent(0)); const rocblas_int nrhs = static_cast(B.extent(1)); @@ -468,21 +482,20 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const TWViewType& Work, using AViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ - using TWViewType = \ + using TauViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ - using RType = \ + using InfoViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ \ static void geqrf(const Kokkos::HIP& space, const AViewType& A, \ - const TWViewType& Tau, const TWViewType& Work, \ - const RType& R) { \ + const TauViewType& Tau, const InfoViewType& Info) { \ Kokkos::Profiling::pushRegion( \ "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]"); \ - geqrf_print_specialization(); \ + geqrf_print_specialization(); \ \ - rocsolverGeqrfWrapper(space, A, Tau, Work, R); \ + rocsolverGeqrfWrapper(space, A, Tau, Info); \ Kokkos::Profiling::popRegion(); \ } \ }; diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index de662365ac..3aedb0b984 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -37,13 +37,11 @@ namespace Test { -template +template void getQR(int const m, int const n, typename ViewTypeA::HostMirror const& // h_A , - typename ViewTypeTW::HostMirror const& // h_tau - , - typename ViewTypeTW::HostMirror const& // h_work + typename ViewTypeTau::HostMirror const& // h_tau , typename ViewTypeA::HostMirror& // h_Q , @@ -85,8 +83,9 @@ void getQR(int const m, int const n, } } -template +template void impl_test_geqrf(int m, int n) { + using ViewTypeInfo = Kokkos::View; using execution_space = typename Device::execution_space; using ScalarA = typename ViewTypeA::value_type; // using ats = Kokkos::ArithTraits; @@ -96,21 +95,17 @@ void impl_test_geqrf(int m, int n) { Kokkos::Random_XorShift64_Pool rand_pool(13718); int minMN(std::min(m, n)); - int lwork(1); - if (minMN != 0) { - lwork = n; - } // Create device views - ViewTypeA A("A", m, n); - ViewTypeTW Tau("Tau", minMN); - ViewTypeTW Work("Work", lwork); + ViewTypeA A ("A", m, n); + ViewTypeTau Tau ("Tau", minMN); + ViewTypeInfo Info("Info", 1); // Create host mirrors of device views. - typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); - typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(A); - typename ViewTypeTW::HostMirror h_tau = Kokkos::create_mirror_view(Tau); - typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror_view(Work); + typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); + typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(A); + typename ViewTypeTau::HostMirror h_tau = Kokkos::create_mirror_view(Tau); + typename ViewTypeInfo::HostMirror h_info = Kokkos::create_mirror_view(Info); // Initialize data. if ((m == 3) && (n == 3)) { @@ -167,9 +162,8 @@ void impl_test_geqrf(int m, int n) { Kokkos::fence(); // Perform the QR factorization - int rc(0); try { - rc = KokkosLapack::geqrf(space, A, Tau, Work); + KokkosLapack::geqrf(space, A, Tau, Info); } catch (const std::runtime_error& e) { std::cout << "KokkosLapack::geqrf(): caught exception '" << e.what() << "'" << std::endl; @@ -179,15 +173,15 @@ void impl_test_geqrf(int m, int n) { Kokkos::fence(); - EXPECT_EQ(rc, 0) << "Failed geqrf() test: rc = " << rc; + Kokkos::deep_copy(h_info, Info); + EXPECT_EQ(h_info[0], 0) << "Failed geqrf() test: Info[0] = " << h_info[0]; // Get the results Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_tau, Tau); - Kokkos::deep_copy(h_work, Work); #if 1 // def HAVE_KOKKOSKERNELS_DEBUG - std::cout << "rc = " << rc << std::endl; + std::cout << "info[0] = " << h_info[0] << std::endl; for (int i(0); i < minMN; ++i) { for (int j(0); j < n; ++j) { std::cout << "R(" << i << "," << j << ") = " << h_A(i, j) << std::endl; @@ -196,9 +190,6 @@ void impl_test_geqrf(int m, int n) { for (int i(0); i < minMN; ++i) { std::cout << "tau(" << i << ") = " << h_tau[i] << std::endl; } - for (int i(0); i < lwork; ++i) { - std::cout << "work(" << i << ") = " << h_work[i] << std::endl; - } #endif ViewTypeA Q("Q", m, m); @@ -209,7 +200,7 @@ void impl_test_geqrf(int m, int n) { typename ViewTypeA::HostMirror h_R = Kokkos::create_mirror_view(R); typename ViewTypeA::HostMirror h_QR = Kokkos::create_mirror_view(QR); - getQR(m, n, h_A, h_tau, h_work, h_Q, h_R, h_QR); + getQR(m, n, h_A, h_tau, h_Q, h_R, h_QR); #if 1 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { @@ -279,10 +270,10 @@ void test_geqrf() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - using view_type_a_ll = Kokkos::View; - using view_type_tw_ll = Kokkos::View; + using view_type_a_ll = Kokkos::View; + using view_type_tau_ll = Kokkos::View; - Test::impl_test_geqrf(3, 3); + Test::impl_test_geqrf(3, 3); #endif } From ec1115902ec59c6d3cbb900c3a940fc6c44cd85b Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Fri, 24 May 2024 01:32:20 -0600 Subject: [PATCH 16/27] Backup --- lapack/tpls/KokkosLapack_Host_tpl.cpp | 32 +++++++------------ lapack/tpls/KokkosLapack_Host_tpl.hpp | 3 +- .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 18 ++++++----- 3 files changed, 24 insertions(+), 29 deletions(-) diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp index 9d751f75b6..17bc2915a4 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.cpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -148,11 +148,9 @@ int HostLapack::trtri(const char uplo, const char diag, int n, return info; } template <> -int HostLapack::geqrf(int m, int n, float* a, int lda, float* tau, - float* work, int lwork) { - int info = 0; - F77_FUNC_SGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info); - return info; +void HostLapack::geqrf(int m, int n, float* a, int lda, float* tau, + float* work, int lwork, int *info) { + F77_FUNC_SGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); } /// @@ -181,11 +179,9 @@ int HostLapack::trtri(const char uplo, const char diag, int n, return info; } template <> -int HostLapack::geqrf(int m, int n, double* a, int lda, double* tau, - double* work, int lwork) { - int info = 0; - F77_FUNC_DGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info); - return info; +void HostLapack::geqrf(int m, int n, double* a, int lda, double* tau, + double* work, int lwork, int *info) { + F77_FUNC_DGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); } /// @@ -217,13 +213,11 @@ int HostLapack>::trtri(const char uplo, const char diag, return info; } template <> -int HostLapack>::geqrf(int m, int n, std::complex* a, +void HostLapack>::geqrf(int m, int n, std::complex* a, int lda, std::complex* tau, std::complex* work, - int lwork) { - int info = 0; - F77_FUNC_CGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info); - return info; + int lwork, int *info) { + F77_FUNC_CGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); } /// @@ -256,14 +250,12 @@ int HostLapack>::trtri(const char uplo, const char diag, return info; } template <> -int HostLapack>::geqrf(int m, int n, +void HostLapack>::geqrf(int m, int n, std::complex* a, int lda, std::complex* tau, std::complex* work, - int lwork) { - int info = 0; - F77_FUNC_ZGEQRF(&m, &n, a, &lda, tau, work, &lwork, &info); - return info; + int lwork, int *info) { + F77_FUNC_ZGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); } } // namespace Impl diff --git a/lapack/tpls/KokkosLapack_Host_tpl.hpp b/lapack/tpls/KokkosLapack_Host_tpl.hpp index d651c9ca52..8797d2006c 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.hpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.hpp @@ -42,7 +42,8 @@ struct HostLapack { static int trtri(const char uplo, const char diag, int n, const T *a, int lda); - static int geqrf(int m, int n, T *a, int lda, T *tau, T *work, int lwork); + static void geqrf(int m, int n, T *a, int lda, T *tau, T *work, int lwork, + int *info); }; } // namespace Impl } // namespace KokkosLapack diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index b10edbdac6..415dfca32c 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -64,10 +64,11 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau, if constexpr (Kokkos::ArithTraits::is_complex) { using MagType = typename Kokkos::ArithTraits::mag_type; - Info[0] = HostLapack>::geqrf( + HostLapack>::geqrf( m, n, reinterpret_cast*>(A.data()), lda, reinterpret_cast*>(Tau.data()), - reinterpret_cast*>(work.data()), lwork); + reinterpret_cast*>(work.data()), lwork, + Info.data()); if (Info[0] < 0) return; @@ -75,13 +76,14 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau, work = Kokkos::View("geqrf work buffer", lwork); - Info[0] = HostLapack>::geqrf( + HostLapack>::geqrf( m, n, reinterpret_cast*>(A.data()), lda, reinterpret_cast*>(Tau.data()), - reinterpret_cast*>(work.data()), lwork); + reinterpret_cast*>(work.data()), lwork, + Info.data()); } else { - Info[0] = HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), - work.data(), lwork); + HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), + work.data(), lwork, Info.data()); if (Info[0] < 0) return; @@ -89,8 +91,8 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau, work = Kokkos::View("geqrf work buffer", lwork); - Info[0] = HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), - work.data(), lwork); + HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), work.data(), + lwork, Info.data()); } } From 2c03206cd04186502888c7027afe6cd2d6e0de9c Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Fri, 24 May 2024 03:08:11 -0600 Subject: [PATCH 17/27] Backup --- .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 63 ++++++++----------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index 415dfca32c..db28ab541d 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -182,7 +182,7 @@ namespace Impl { template void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A, - const TauViewType& Tau) { + const TauViewType& Tau, const InfoViewType& Info) { using scalar_type = typename AViewType::non_const_value_type; Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_MAGMA," + @@ -253,8 +253,8 @@ void magmaGeqrfWrapper(const ExecSpace& space, const AViewType& A, Kokkos::MemoryTraits>; \ \ static void geqrf(const Kokkos::Cuda& space, const AViewType& A, \ - const TauViewType& Tau) { \ - magmaGeqrfWrapper(space, A, Tau); \ + const TauViewType& Tau, const InfoViewType& Info) { \ + magmaGeqrfWrapper(space, A, Tau, Info); \ } \ }; @@ -281,7 +281,6 @@ namespace Impl { template void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const TauViewType& Tau, const InfoViewType& Info) { - using memory_space = typename AViewType::memory_space; using Scalar = typename AViewType::non_const_value_type; @@ -404,8 +403,6 @@ KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER -#if 0 // AquiEEP - // ROCSOLVER #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER #include @@ -415,47 +412,41 @@ namespace KokkosLapack { namespace Impl { template -void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const TauViewType& Tau) { - using Scalar = typename TauViewType::non_const_value_type; +void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const TauViewType& Tau, + const InfoViewType& Info) { + using Scalar = typename AViewType::non_const_value_type; + using ALayout_t = typename AViewType::array_layout; - using BLayout_t = typename TauViewType::array_layout; - - const rocblas_int N = static_cast(A.extent(0)); - const rocblas_int nrhs = static_cast(B.extent(1)); - const rocblas_int lda = std::is_same_v - ? A.stride(0) - : A.stride(1); - const rocblas_int ldb = std::is_same_v - ? B.stride(0) - : B.stride(1); - Kokkos::View info("rocsolver geqrf info"); + static_assert(std::is_same_v, + "KokkosLapack - rocsolver geqrf: A needs to have a Kokkos::LayoutLeft"); + const rocblas_int m = static_cast(A.extent(0)); + const rocblas_int n = static_cast(A.extent(1)); + const rocblas_int lda = static_cast(A.stride(1)); + rocblas_status rc = rocblas_status_success; KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); KOKKOS_ROCBLAS_SAFE_CALL_IMPL( rocblas_set_stream(s.handle, space.hip_stream())); if constexpr (std::is_same_v) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_sgeqrf(s.handle, N, nrhs, A.data(), - lda, IPIV.data(), B.data(), - ldb, info.data())); + rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_sgeqrf(s.handle, m, n, A.data(), + lda, Tau.data())); } if constexpr (std::is_same_v) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_dgeqrf(s.handle, N, nrhs, A.data(), - lda, IPIV.data(), B.data(), - ldb, info.data())); + rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_dgeqrf(s.handle, m, n, A.data(), + lda, Tau.data())); } if constexpr (std::is_same_v>) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgeqrf( - s.handle, N, nrhs, reinterpret_cast(A.data()), - lda, IPIV.data(), reinterpret_cast(B.data()), - ldb, info.data())); + rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgeqrf( + s.handle, m, n, reinterpret_cast(A.data()), + lda, reinterpret_cast(Tau.data()))); } if constexpr (std::is_same_v>) { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgeqrf( - s.handle, N, nrhs, reinterpret_cast(A.data()), - lda, IPIV.data(), reinterpret_cast(B.data()), - ldb, info.data())); + rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgeqrf( + s.handle, m, n, reinterpret_cast(A.data()), + lda, reinterpret_cast(Tau.data()))); } + Info[0] = static_cast(rc); KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); } @@ -467,7 +458,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, cons Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>, \ true, \ geqrf_eti_spec_avail< \ @@ -479,7 +470,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, cons Kokkos::Device, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>>::value> { \ using AViewType = \ Kokkos::View, \ @@ -513,6 +504,4 @@ KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER -#endif // AquiEEP - #endif From 145fe1032aaa6680eb0f61c589a291ba120ac4cf Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Fri, 24 May 2024 03:14:14 -0600 Subject: [PATCH 18/27] Formatting --- lapack/impl/KokkosLapack_geqrf_spec.hpp | 6 +- lapack/src/KokkosLapack_geqrf.hpp | 27 +++-- .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 114 +++++++++--------- lapack/unit_test/Test_Lapack_geqrf.hpp | 14 +-- 4 files changed, 85 insertions(+), 76 deletions(-) diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp index 5410520c1c..89a253b796 100644 --- a/lapack/impl/KokkosLapack_geqrf_spec.hpp +++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp @@ -53,7 +53,7 @@ struct geqrf_eti_spec_avail { Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>> { \ enum : bool { value = true }; \ @@ -114,7 +114,7 @@ struct GEQRF, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>, \ false, true>; @@ -129,7 +129,7 @@ struct GEQRF, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ Kokkos::MemoryTraits>, \ false, true>; diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index 7a81818d79..4c920e9a74 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -60,7 +60,8 @@ namespace KokkosLapack { /// illegal value /// template -void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, const InfoArray& Info) { +void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, + const InfoArray& Info) { // NOTE: Currently, KokkosLapack::geqrf only supports LAPACK, MAGMA and // rocSOLVER TPLs. // MAGMA/rocSOLVER TPL should be enabled to call the MAGMA/rocSOLVER GPU @@ -117,19 +118,23 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, con using AMatrix_Internal = Kokkos::View< typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, typename AMatrix::device_type, Kokkos::MemoryTraits>; - using TArray_Internal = Kokkos::View< - typename TArray::non_const_value_type*, typename TArray::array_layout, - typename TArray::device_type, Kokkos::MemoryTraits>; - using InfoArray_Internal = Kokkos::View< - typename InfoArray::non_const_value_type*, typename InfoArray::array_layout, - typename InfoArray::device_type, Kokkos::MemoryTraits>; - - AMatrix_Internal A_i = A; - TArray_Internal Tau_i = Tau; + using TArray_Internal = + Kokkos::View>; + using InfoArray_Internal = + Kokkos::View>; + + AMatrix_Internal A_i = A; + TArray_Internal Tau_i = Tau; InfoArray_Internal Info_i = Info; KokkosLapack::Impl::GEQRF::geqrf(space, A_i, Tau_i, Info_i); + InfoArray_Internal>::geqrf(space, A_i, Tau_i, + Info_i); } /// \brief Computes a QR factorization of a matrix A diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index db28ab541d..f3d3be4506 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -68,10 +68,10 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau, m, n, reinterpret_cast*>(A.data()), lda, reinterpret_cast*>(Tau.data()), reinterpret_cast*>(work.data()), lwork, - Info.data()); + Info.data()); if (Info[0] < 0) return; - + lwork = static_cast(work(0).real()); work = Kokkos::View("geqrf work buffer", lwork); @@ -80,10 +80,10 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau, m, n, reinterpret_cast*>(A.data()), lda, reinterpret_cast*>(Tau.data()), reinterpret_cast*>(work.data()), lwork, - Info.data()); + Info.data()); } else { - HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), - work.data(), lwork, Info.data()); + HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), work.data(), + lwork, Info.data()); if (Info[0] < 0) return; @@ -92,7 +92,7 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau, work = Kokkos::View("geqrf work buffer", lwork); HostLapack::geqrf(m, n, A.data(), lda, Tau.data(), work.data(), - lwork, Info.data()); + lwork, Info.data()); } } @@ -121,8 +121,9 @@ void lapackGeqrfWrapper(const AViewType& A, const TauViewType& Tau, using TauViewType = \ Kokkos::View, \ Kokkos::MemoryTraits>; \ - using InfoViewType = Kokkos::View, \ - Kokkos::MemoryTraits>; \ + using InfoViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ \ static void geqrf(const EXECSPACE& /* space */, const AViewType& A, \ const TauViewType& Tau, const InfoViewType& Info) { \ @@ -269,7 +270,7 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA -#endif // AquiEEP +#endif // AquiEEP // CUSOLVER #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER @@ -278,19 +279,21 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template +template void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, - const TauViewType& Tau, const InfoViewType& Info) { + const TauViewType& Tau, const InfoViewType& Info) { using memory_space = typename AViewType::memory_space; - using Scalar = typename AViewType::non_const_value_type; + using Scalar = typename AViewType::non_const_value_type; using ALayout_t = typename AViewType::array_layout; - static_assert(std::is_same_v, - "KokkosLapack - cusolver geqrf: A needs to have a Kokkos::LayoutLeft"); + static_assert( + std::is_same_v, + "KokkosLapack - cusolver geqrf: A needs to have a Kokkos::LayoutLeft"); const int m = A.extent_int(0); const int n = A.extent_int(1); const int lda = A.stride(1); - int lwork = 0; + int lwork = 0; CudaLapackSingleton& s = CudaLapackSingleton::singleton(); KOKKOS_CUSOLVER_SAFE_CALL_IMPL( @@ -298,44 +301,46 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, if constexpr (std::is_same_v) { KOKKOS_CUSOLVER_SAFE_CALL_IMPL( cusolverDnSgeqrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); - Kokkos::View Workspace("cusolver sgeqrf workspace", lwork); + Kokkos::View Workspace("cusolver sgeqrf workspace", + lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgeqrf(s.handle, m, n, A.data(), - lda, Tau.data(), - Workspace.data(), lwork, Info.data())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSgeqrf(s.handle, m, n, A.data(), lda, Tau.data(), + Workspace.data(), lwork, Info.data())); } if constexpr (std::is_same_v) { KOKKOS_CUSOLVER_SAFE_CALL_IMPL( cusolverDnDgeqrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); - Kokkos::View Workspace("cusolver dgeqrf workspace", lwork); + Kokkos::View Workspace("cusolver dgeqrf workspace", + lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgeqrf(s.handle, m, n, A.data(), - lda, Tau.data(), - Workspace.data(), lwork, Info.data())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnDgeqrf(s.handle, m, n, A.data(), lda, Tau.data(), + Workspace.data(), lwork, Info.data())); } if constexpr (std::is_same_v>) { KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf_bufferSize( s.handle, m, n, reinterpret_cast(A.data()), lda, &lwork)); - Kokkos::View Workspace("cusolver cgeqrf workspace", lwork); + Kokkos::View Workspace( + "cusolver cgeqrf workspace", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL( - cusolverDnCgeqrf(s.handle, m, n, reinterpret_cast(A.data()), lda, - reinterpret_cast(Tau.data()), - reinterpret_cast(Workspace.data()), - lwork, Info.data())); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf( + s.handle, m, n, reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()), + reinterpret_cast(Workspace.data()), lwork, Info.data())); } if constexpr (std::is_same_v>) { KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf_bufferSize( s.handle, m, n, reinterpret_cast(A.data()), lda, &lwork)); - Kokkos::View Workspace("cusolver zgeqrf workspace", - lwork); + Kokkos::View Workspace( + "cusolver zgeqrf workspace", lwork); KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf( s.handle, m, n, reinterpret_cast(A.data()), lda, reinterpret_cast(Tau.data()), - reinterpret_cast(Workspace.data()), - lwork, Info.data())); + reinterpret_cast(Workspace.data()), lwork, + Info.data())); } KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL)); } @@ -359,8 +364,7 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>>::value> { \ using AViewType = Kokkos::View, \ @@ -386,17 +390,17 @@ void cusolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSLAPACK_GEQRF_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaSpace) + Kokkos::CudaSpace) KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaSpace) + Kokkos::CudaSpace) #if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) KOKKOSLAPACK_GEQRF_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) KOKKOSLAPACK_GEQRF_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) + Kokkos::CudaUVMSpace) KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace) + Kokkos::CudaUVMSpace) #endif } // namespace Impl @@ -412,34 +416,35 @@ namespace KokkosLapack { namespace Impl { template -void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const TauViewType& Tau, - const InfoViewType& Info) { +void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, + const TauViewType& Tau, const InfoViewType& Info) { using Scalar = typename AViewType::non_const_value_type; using ALayout_t = typename AViewType::array_layout; - static_assert(std::is_same_v, - "KokkosLapack - rocsolver geqrf: A needs to have a Kokkos::LayoutLeft"); + static_assert( + std::is_same_v, + "KokkosLapack - rocsolver geqrf: A needs to have a Kokkos::LayoutLeft"); const rocblas_int m = static_cast(A.extent(0)); const rocblas_int n = static_cast(A.extent(1)); const rocblas_int lda = static_cast(A.stride(1)); - rocblas_status rc = rocblas_status_success; + rocblas_status rc = rocblas_status_success; KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); KOKKOS_ROCBLAS_SAFE_CALL_IMPL( rocblas_set_stream(s.handle, space.hip_stream())); if constexpr (std::is_same_v) { - rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_sgeqrf(s.handle, m, n, A.data(), - lda, Tau.data())); + rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocsolver_sgeqrf(s.handle, m, n, A.data(), lda, Tau.data())); } if constexpr (std::is_same_v) { - rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_dgeqrf(s.handle, m, n, A.data(), - lda, Tau.data())); + rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocsolver_dgeqrf(s.handle, m, n, A.data(), lda, Tau.data())); } if constexpr (std::is_same_v>) { rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgeqrf( - s.handle, m, n, reinterpret_cast(A.data()), - lda, reinterpret_cast(Tau.data()))); + s.handle, m, n, reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()))); } if constexpr (std::is_same_v>) { rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgeqrf( @@ -469,8 +474,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, cons Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>>::value> { \ using AViewType = \ Kokkos::View, \ @@ -488,7 +492,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, cons "KokkosLapack::geqrf[TPL_ROCSOLVER," #SCALAR "]"); \ geqrf_print_specialization(); \ \ - rocsolverGeqrfWrapper(space, A, Tau, Info); \ + rocsolverGeqrfWrapper(space, A, Tau, Info); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -496,9 +500,9 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, cons KOKKOSLAPACK_GEQRF_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) KOKKOSLAPACK_GEQRF_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HIPSpace) + Kokkos::HIPSpace) KOKKOSLAPACK_GEQRF_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HIPSpace) + Kokkos::HIPSpace) } // namespace Impl } // namespace KokkosLapack diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index 3aedb0b984..f619c8fba3 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -85,7 +85,7 @@ void getQR(int const m, int const n, template void impl_test_geqrf(int m, int n) { - using ViewTypeInfo = Kokkos::View; + using ViewTypeInfo = Kokkos::View; using execution_space = typename Device::execution_space; using ScalarA = typename ViewTypeA::value_type; // using ats = Kokkos::ArithTraits; @@ -97,15 +97,15 @@ void impl_test_geqrf(int m, int n) { int minMN(std::min(m, n)); // Create device views - ViewTypeA A ("A", m, n); - ViewTypeTau Tau ("Tau", minMN); + ViewTypeA A("A", m, n); + ViewTypeTau Tau("Tau", minMN); ViewTypeInfo Info("Info", 1); // Create host mirrors of device views. - typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); - typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(A); - typename ViewTypeTau::HostMirror h_tau = Kokkos::create_mirror_view(Tau); - typename ViewTypeInfo::HostMirror h_info = Kokkos::create_mirror_view(Info); + typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); + typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(A); + typename ViewTypeTau::HostMirror h_tau = Kokkos::create_mirror_view(Tau); + typename ViewTypeInfo::HostMirror h_info = Kokkos::create_mirror_view(Info); // Initialize data. if ((m == 3) && (n == 3)) { From 1608cf6c7723bf187b3a2a3d542e810e94237f2c Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Fri, 24 May 2024 04:16:27 -0600 Subject: [PATCH 19/27] Backup --- lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index f3d3be4506..fa42f81591 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -415,7 +415,7 @@ KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { -template + template void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const TauViewType& Tau, const InfoViewType& Info) { using Scalar = typename AViewType::non_const_value_type; @@ -427,31 +427,30 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const rocblas_int m = static_cast(A.extent(0)); const rocblas_int n = static_cast(A.extent(1)); const rocblas_int lda = static_cast(A.stride(1)); - rocblas_status rc = rocblas_status_success; KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); KOKKOS_ROCBLAS_SAFE_CALL_IMPL( rocblas_set_stream(s.handle, space.hip_stream())); if constexpr (std::is_same_v) { - rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( rocsolver_sgeqrf(s.handle, m, n, A.data(), lda, Tau.data())); } if constexpr (std::is_same_v) { - rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( rocsolver_dgeqrf(s.handle, m, n, A.data(), lda, Tau.data())); } if constexpr (std::is_same_v>) { - rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgeqrf( + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgeqrf( s.handle, m, n, reinterpret_cast(A.data()), lda, reinterpret_cast(Tau.data()))); } if constexpr (std::is_same_v>) { - rc = KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgeqrf( + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgeqrf( s.handle, m, n, reinterpret_cast(A.data()), lda, reinterpret_cast(Tau.data()))); } - Info[0] = static_cast(rc); + Info[0] = 0; // success KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); } From df805105c7fa318f9a9db20cad8fbebcf0d57898 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Fri, 24 May 2024 04:18:06 -0600 Subject: [PATCH 20/27] Formatting --- lapack/tpls/KokkosLapack_Host_tpl.cpp | 21 +++++++++++---------- lapack/tpls/KokkosLapack_Host_tpl.hpp | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp index 17bc2915a4..f72d781e5b 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.cpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -149,7 +149,7 @@ int HostLapack::trtri(const char uplo, const char diag, int n, } template <> void HostLapack::geqrf(int m, int n, float* a, int lda, float* tau, - float* work, int lwork, int *info) { + float* work, int lwork, int* info) { F77_FUNC_SGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); } @@ -180,7 +180,7 @@ int HostLapack::trtri(const char uplo, const char diag, int n, } template <> void HostLapack::geqrf(int m, int n, double* a, int lda, double* tau, - double* work, int lwork, int *info) { + double* work, int lwork, int* info) { F77_FUNC_DGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); } @@ -213,10 +213,11 @@ int HostLapack>::trtri(const char uplo, const char diag, return info; } template <> -void HostLapack>::geqrf(int m, int n, std::complex* a, - int lda, std::complex* tau, - std::complex* work, - int lwork, int *info) { +void HostLapack>::geqrf(int m, int n, + std::complex* a, int lda, + std::complex* tau, + std::complex* work, + int lwork, int* info) { F77_FUNC_CGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); } @@ -251,10 +252,10 @@ int HostLapack>::trtri(const char uplo, const char diag, } template <> void HostLapack>::geqrf(int m, int n, - std::complex* a, int lda, - std::complex* tau, - std::complex* work, - int lwork, int *info) { + std::complex* a, int lda, + std::complex* tau, + std::complex* work, + int lwork, int* info) { F77_FUNC_ZGEQRF(&m, &n, a, &lda, tau, work, &lwork, info); } diff --git a/lapack/tpls/KokkosLapack_Host_tpl.hpp b/lapack/tpls/KokkosLapack_Host_tpl.hpp index 8797d2006c..23f6dbc3d6 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.hpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.hpp @@ -43,7 +43,7 @@ struct HostLapack { int lda); static void geqrf(int m, int n, T *a, int lda, T *tau, T *work, int lwork, - int *info); + int *info); }; } // namespace Impl } // namespace KokkosLapack From 29472f54233fb941062393492fd0c17997fa0948 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Fri, 24 May 2024 04:19:07 -0600 Subject: [PATCH 21/27] Formatting --- lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index fa42f81591..7c54a358ff 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -415,7 +415,8 @@ KOKKOSLAPACK_GEQRF_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, namespace KokkosLapack { namespace Impl { - template +template void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, const TauViewType& Tau, const InfoViewType& Info) { using Scalar = typename AViewType::non_const_value_type; @@ -450,7 +451,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, s.handle, m, n, reinterpret_cast(A.data()), lda, reinterpret_cast(Tau.data()))); } - Info[0] = 0; // success + Info[0] = 0; // success KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); } From 746fa3c1050dc997589a6acabe5ea73c5405419c Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Fri, 24 May 2024 12:40:10 -0600 Subject: [PATCH 22/27] Backup --- lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index 7c54a358ff..d9f88549aa 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -451,7 +451,7 @@ void rocsolverGeqrfWrapper(const ExecutionSpace& space, const AViewType& A, s.handle, m, n, reinterpret_cast(A.data()), lda, reinterpret_cast(Tau.data()))); } - Info[0] = 0; // success + Kokkos::deep_copy(Info, 0); // Success KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); } From 8f0a9079e4daf6100b69051a4a4db706e3b3c577 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Sat, 25 May 2024 17:00:11 -0600 Subject: [PATCH 23/27] Backup --- lapack/src/KokkosLapack_geqrf.hpp | 44 +-- .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 4 +- lapack/unit_test/Test_Lapack_geqrf.hpp | 370 ++++++++++++++---- 3 files changed, 311 insertions(+), 107 deletions(-) diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index 4c920e9a74..a81ae2a436 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -34,7 +34,7 @@ namespace KokkosLapack { /// /// \tparam ExecutionSpace The space where the kernel will run. /// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. -/// \tparam TArray Type of array Tau, as a 1-D Kokkos::View. +/// \tparam TauArray Type of array Tau, as a 1-D Kokkos::View. /// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View. /// /// \param space [in] Execution space instance used to specified how to execute @@ -48,10 +48,9 @@ namespace KokkosLapack { /// is represented as a product of elementary reflectors /// Q = H(1) H(2) . . . H(k), where k = min(M,N). /// Each H(i) has the form -/// H(i) = I - Tau * v * v**H -/// where tau is a complex scalar, and v is a complex vector -/// with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on -/// exit in A(i+1:M,i), and tau in Tau(i). +/// H(i) = I - Tau(i) * v * v**H, +/// where v is a vector with v(1:i-1) = 0 and v(i) = 1; +/// v(i+1:M) is stored on exit in A(i+1:M,i). /// \param Tau [out] One-dimensional array of size min(M,N) that contains the /// scalar factors of the elementary reflectors. /// \param Info [out] One-dimensional array of integers and of size 1: @@ -59,8 +58,8 @@ namespace KokkosLapack { /// Info[0] < 0: if equal to '-i', the i-th argument had an /// illegal value /// -template -void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, +template +void geqrf(const ExecutionSpace& space, const AMatrix& A, const TauArray& Tau, const InfoArray& Info) { // NOTE: Currently, KokkosLapack::geqrf only supports LAPACK, MAGMA and // rocSOLVER TPLs. @@ -73,21 +72,21 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, typename AMatrix::memory_space>::accessible); static_assert( Kokkos::SpaceAccessibility::accessible); + typename TauArray::memory_space>::accessible); static_assert( Kokkos::SpaceAccessibility::accessible); static_assert(Kokkos::is_view::value, "KokkosLapack::geqrf: A must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, + static_assert(Kokkos::is_view::value, "KokkosLapack::geqrf: Tau must be Kokkos::View."); static_assert(Kokkos::is_view::value, "KokkosLapack::geqrf: Info must be Kokkos::View."); static_assert(static_cast(AMatrix::rank) == 2, "KokkosLapack::geqrf: A must have rank 2."); - static_assert(static_cast(TArray::rank) == 1, + static_assert(static_cast(TauArray::rank) == 1, "KokkosLapack::geqrf: Tau must have rank 1."); static_assert(static_cast(InfoArray::rank) == 1, "KokkosLapack::geqrf: Info must have rank 1."); @@ -118,9 +117,9 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, using AMatrix_Internal = Kokkos::View< typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, typename AMatrix::device_type, Kokkos::MemoryTraits>; - using TArray_Internal = - Kokkos::View>; using InfoArray_Internal = Kokkos::View>; - AMatrix_Internal A_i = A; - TArray_Internal Tau_i = Tau; + AMatrix_Internal A_i = A; + TauArray_Internal Tau_i = Tau; InfoArray_Internal Info_i = Info; - KokkosLapack::Impl::GEQRF::geqrf(space, A_i, Tau_i, Info_i); } @@ -140,7 +139,7 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, /// \brief Computes a QR factorization of a matrix A /// /// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. -/// \tparam TArray Type of array Tau, as a 1-D Kokkos::View. +/// \tparam TauArray Type of array Tau, as a 1-D Kokkos::View. /// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View. /// /// \param A [in,out] On entry, the M-by-N matrix to be factorized. @@ -152,10 +151,9 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, /// is represented as a product of elementary reflectors /// Q = H(1) H(2) . . . H(k), where k = min(M,N). /// Each H(i) has the form -/// H(i) = I - Tau * v * v**H -/// where tau is a complex scalar, and v is a complex vector -/// with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on -/// exit in A(i+1:M,i), and tau in Tau(i). +/// H(i) = I - Tau(i) * v * v**H, +/// where v is a vector with v(1:i-1) = 0 and v(i) = 1; +/// v(i+1:M) is stored on exit in A(i+1:M,i). /// \param Tau [out] One-dimensional array of size min(M,N) that contains the /// scalar factors of the elementary reflectors. /// \param Info [out] One-dimensional array of integers and of size 1: @@ -163,8 +161,8 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, /// Info[0] < 0: if equal to '-i', the i-th argument had an /// illegal value /// -template -void geqrf(const AMatrix& A, const TArray& Tau, const InfoArray& Info) { +template +void geqrf(const AMatrix& A, const TauArray& Tau, const InfoArray& Info) { typename AMatrix::execution_space space{}; geqrf(space, A, Tau, Info); } diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index d9f88549aa..c7630cc783 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -172,7 +172,7 @@ KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK -#if 0 // AquiEEP +#if 0 // TO DO // MAGMA #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA @@ -270,7 +270,7 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA -#endif // AquiEEP +#endif // TO DO // CUSOLVER #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index f619c8fba3..2a4533b8bc 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -24,63 +24,127 @@ (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \ defined(TEST_THREADS_LAPACK_CPP))) -// AquiEEP - #include #include #include +#include +#include #include -//#include -//#include #include namespace Test { template void getQR(int const m, int const n, - typename ViewTypeA::HostMirror const& // h_A - , - typename ViewTypeTau::HostMirror const& // h_tau - , - typename ViewTypeA::HostMirror& // h_Q - , + typename ViewTypeA::HostMirror const& h_A, + typename ViewTypeTau::HostMirror const& h_tau, + typename ViewTypeA::HostMirror& h_Q, typename ViewTypeA::HostMirror& h_R, - typename ViewTypeA::HostMirror& // h_QR + typename ViewTypeA::HostMirror& h_QR ) { using ScalarA = typename ViewTypeA::value_type; + // Populate h_R for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - if constexpr (Kokkos::ArithTraits::is_complex) { - h_R(i, j).real() = 0.; - h_R(i, j).imag() = 0.; - } else { - h_R(i, j) = 0.; + if ((i <= j) && (i < n)) { + h_R(i,j) = h_A(i,j); + } + else { + h_R(i,j) = Kokkos::ArithTraits::zero(); } } } + // Instantiate the identity matrix ViewTypeA I("I", m, m); typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I); + Kokkos::deep_copy(h_I,Kokkos::ArithTraits::zero()); for (int i(0); i < m; ++i) { - for (int j(0); j < m; ++j) { - if constexpr (Kokkos::ArithTraits::is_complex) { - if (i == j) { - h_I(i, j).real() = 1.; - } else { - h_I(i, j).real() = 0.; - } - h_I(i, j).imag() = 0.; - } else { - if (i == j) { - h_I(i, j) = 1.; - } else { - h_I(i, j) = 0.; - } - } + if constexpr (Kokkos::ArithTraits::is_complex) { + h_I(i,i).real() = 1.; + } else { + h_I(i,i) = 1.; } } + + // Populate h_Q + int minMN(std::min(m, n)); + ViewTypeTau v("v", m); + typename ViewTypeTau::HostMirror h_v = Kokkos::create_mirror_view(v); + + ViewTypeA Qk("Qk", m, m); + typename ViewTypeA::HostMirror h_Qk = Kokkos::create_mirror_view(Qk); + + ViewTypeA auxM("auxM", m, m); + typename ViewTypeA::HostMirror h_auxM = Kokkos::create_mirror_view(auxM); + + // Q = H(0) H(1) . . . H(min(M,N)-1), where for k=0,1,...,min(m,n)-1: + // H(k) = I - Tau(k) * v * v**H, and + // v is a vector of size m with: + // v(0:k-1) = 0, + // v(k) = 1, + // v(k+1:m-1) = A(k+1:m-1,k). + for (int k(0); k < minMN; ++k) { + Kokkos::deep_copy(h_v,Kokkos::ArithTraits::zero()); + h_v[k] = 1.; + for (int index(k+1); index < minMN; ++index) { + h_v[index] = h_A(index,k); + } + + // Rank-1 update of a general matrix: A = A + alpha * x * y^{T,H}. + // void ger( const char trans[] + // , const typename AViewType::const_value_type & alpha + // , const XViewType & x + // , const YViewType & y + // , const AViewType & A + // ); + Kokkos::deep_copy(h_Qk, h_I); + KokkosBlas::ger( "H" + , -h_tau[k] + , h_v + , h_v + , h_Qk + ); + + // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B). + // void gemm( const char transA[] + // , const char transB[] + // , typename AViewType::const_value_type & alpha + // , const AViewType & A + // , const BViewType & B + // , typename CViewType::const_value_type & beta + // , const CViewType & C + // ); + if (k == 0) { + Kokkos::deep_copy(h_Q, h_Qk); + } + else { + Kokkos::deep_copy(h_auxM, Kokkos::ArithTraits::zero()); + KokkosBlas::gemm( "N" + , "N" + , 1. + , h_Q + , h_Qk + , 0. + , h_auxM + ); + Kokkos::deep_copy(h_Q, h_auxM); + } + } // for k + + Kokkos::deep_copy(h_QR, Kokkos::ArithTraits::zero()); + KokkosBlas::gemm( "N" + , "N" + , 1. + , h_Q + , h_R + , 0. + , h_QR + ); + + // AquiEEP: test Q^H Q = I } template @@ -88,7 +152,7 @@ void impl_test_geqrf(int m, int n) { using ViewTypeInfo = Kokkos::View; using execution_space = typename Device::execution_space; using ScalarA = typename ViewTypeA::value_type; - // using ats = Kokkos::ArithTraits; + using ats = Kokkos::ArithTraits; execution_space space{}; @@ -97,13 +161,14 @@ void impl_test_geqrf(int m, int n) { int minMN(std::min(m, n)); // Create device views - ViewTypeA A("A", m, n); - ViewTypeTau Tau("Tau", minMN); - ViewTypeInfo Info("Info", 1); + ViewTypeA A ("A", m, n); + ViewTypeA Aorig("Aorig", m, n); + ViewTypeTau Tau ("Tau", minMN); + ViewTypeInfo Info ("Info", 1); // Create host mirrors of device views. typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); - typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(A); + typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(Aorig); typename ViewTypeTau::HostMirror h_tau = Kokkos::create_mirror_view(Tau); typename ViewTypeInfo::HostMirror h_info = Kokkos::create_mirror_view(Info); @@ -124,7 +189,7 @@ void impl_test_geqrf(int m, int n) { for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - h_A(i, j).imag() = 0.; + h_A(i,j).imag() = 0.; } } } else { @@ -151,10 +216,10 @@ void impl_test_geqrf(int m, int n) { Kokkos::deep_copy(h_Aorig, h_A); -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#ifdef HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - std::cout << "A(" << i << "," << j << ") = " << h_A(i, j) << std::endl; + std::cout << "Aorig(" << i << "," << j << ") = " << h_A(i,j) << std::endl; } } #endif @@ -180,18 +245,86 @@ void impl_test_geqrf(int m, int n) { Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_tau, Tau); -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "info[0] = " << h_info[0] << std::endl; for (int i(0); i < minMN; ++i) { for (int j(0); j < n; ++j) { - std::cout << "R(" << i << "," << j << ") = " << h_A(i, j) << std::endl; + std::cout << "Aoutput(" << i << "," << j << ") = " << std::setprecision(16) << h_A(i,j) << std::endl; } } for (int i(0); i < minMN; ++i) { - std::cout << "tau(" << i << ") = " << h_tau[i] << std::endl; + std::cout << "tau(" << i << ") = " << h_tau[i] << std::setprecision(16) << std::endl; } #endif + const typename Kokkos::ArithTraits::mag_type absTol(1.e-8); + + if ((m == 3) && (n == 3)) { + std::vector> refMatrix(m); + for (int i(0); i < m; ++i) { + refMatrix[i].resize(n,Kokkos::ArithTraits::zero()); + } + + std::vector refTau(m,Kokkos::ArithTraits::zero()); + + if constexpr (Kokkos::ArithTraits::is_complex) { + refMatrix[0][0].real() = -14.; + refMatrix[0][1].real() = -21.; + refMatrix[0][2].real() = 14.; + + refMatrix[1][0].real() = 0.2307692307692308; + refMatrix[1][1].real() = -175.; + refMatrix[1][2].real() = 70.; + + refMatrix[2][0].real() = -0.1538461538461539; + refMatrix[2][1].real() = 1./18.; + refMatrix[2][2].real() = -35.; + + refTau[0].real() = 1.857142857142857; + refTau[1].real() = 1.993846153846154; + refTau[2].real() = 0.; + } + else { + refMatrix[0][0] = -14.; + refMatrix[0][1] = -21.; + refMatrix[0][2] = 14.; + + refMatrix[1][0] = 0.2307692307692308; + refMatrix[1][1] = -175.; + refMatrix[1][2] = 70.; + + refMatrix[2][0] = -0.1538461538461539; + refMatrix[2][1] = 1./18.; + refMatrix[2][2] = -35.; + + refTau[0] = 1.857142857142857; + refTau[1] = 1.993846153846154; + refTau[2] = 0.; + } + + { + bool test_flag_A = true; + for (int i(0); (i < m) && test_flag_A; ++i) { + for (int j(0); (j < n) && test_flag_A; ++j) { + if (ats::abs(h_A(i,j) - refMatrix[i][j]) > absTol) { + test_flag_A = false; + } + } + } + ASSERT_EQ(test_flag_A, true); + } + + { + bool test_flag_tau = true; + for (int i(0); (i < m) && test_flag_tau; ++i) { + if (ats::abs(h_tau[i] - refTau[i]) > absTol) { + test_flag_tau = false; + } + } + ASSERT_EQ(test_flag_tau, true); + } + } + ViewTypeA Q("Q", m, m); ViewTypeA R("R", m, n); ViewTypeA QR("QR", m, n); @@ -202,65 +335,135 @@ void impl_test_geqrf(int m, int n) { getQR(m, n, h_A, h_tau, h_Q, h_R, h_QR); -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#ifdef HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < m; ++j) { - std::cout << "Q(" << i << "," << j << ") = " << h_Q(i, j) << std::endl; + std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl; } } for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - std::cout << "R(" << i << "," << j << ") = " << h_R(i, j) << std::endl; + std::cout << "R(" << i << "," << j << ") = " << h_R(i,j) << std::endl; } } for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - std::cout << "QR(" << i << "," << j << ") = " << h_QR(i, j) << std::endl; + std::cout << "QR(" << i << "," << j << ") = " << h_QR(i,j) << std::endl; } } #endif if ((m == 3) && (n == 3)) { - } + std::vector> refQ(m); + for (int i(0); i < m; ++i) { + refQ[i].resize(n,Kokkos::ArithTraits::zero()); + } - // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B). - // void gemm( const execution_space & space - // , const char transA[] - // , const char transB[] - // , typename AViewType::const_value_type & alpha - // , const AViewType & A - // , const BViewType & B - // , typename CViewType::const_value_type & beta - // , const CViewType & C - // ); - - // Rank-1 update of a general matrix: A = A + alpha * x * y^{T,H}. - // void ger( const ExecutionSpace & space - // , const char trans[] - // , const typename AViewType::const_value_type & alpha - // , const XViewType & x - // , const YViewType & y - // , const AViewType & A - // ); - - // Checking vs ref on CPU, this eps is about 10^-9 - // typedef typename ats::mag_type mag_type; - // const mag_type eps = 3.0e7 * ats::epsilon(); - bool test_flag = true; - for (int i = 0; i < n; i++) { -#if 0 - if (ats::abs(h_B(i) - h_X0(i)) > eps) { - test_flag = false; - printf( - " Error %d, pivot %c, padding %c: result( %.15lf ) !=" - "solution( %.15lf ) at (%d), error=%.15e, eps=%.15e\n", - N, mode[0], padding[0], ats::abs(h_B(i)), ats::abs(h_X0(i)), int(i), - ats::abs(h_B(i) - h_X0(i)), eps); - break; + std::vector> refR(m); + for (int i(0); i < m; ++i) { + refR[i].resize(n,Kokkos::ArithTraits::zero()); } + +#if 0 + Q = [ -6/7 69/175 58/175 + -3/7 -158/175 -6/175 + 2/7 -6/35 33/35 ] + + R = [ -14 -21 14 + 0 -175 70 + 0 0 -35 ] #endif + + if constexpr (Kokkos::ArithTraits::is_complex) { + refQ[0][0].real() = -6./7.; + refQ[0][1].real() = 69./175.; + refQ[0][2].real() = 58./175.; + + refQ[1][0].real() = -3./7.; + refQ[1][1].real() = -158./175.; + refQ[1][2].real() = -6./175.; + + refQ[2][0].real() = 2./7.; + refQ[2][1].real() = -6./35.; + refQ[2][2].real() = 33./35.; + + refR[0][0].real() = -14.; + refR[0][1].real() = -21.; + refR[0][2].real() = 14.; + + refR[1][1].real() = -175.; + refR[1][2].real() = 70.; + + refR[2][2].real() = -35.; + } + else { + refQ[0][0] = -6./7.; + refQ[0][1] = 69./175.; + refQ[0][2] = 58./175.; + + refQ[1][0] = -3./7.; + refQ[1][1] = -158./175.; + refQ[1][2] = -6./175.; + + refQ[2][0] = 2./7.; + refQ[2][1] = -6./35.; + refQ[2][2] = 33./35.; + + refR[0][0] = -14.; + refR[0][1] = -21.; + refR[0][2] = 14.; + + refR[1][1] = -175.; + refR[1][2] = 70.; + + refR[2][2] = -35.; + } + + { + bool test_flag_Q = true; + for (int i(0); (i < m) && test_flag_Q; ++i) { + for (int j(0); (j < n) && test_flag_Q; ++j) { + if (ats::abs(h_Q(i,j) - refQ[i][j]) > absTol) { + test_flag_Q = false; + } + } + } + ASSERT_EQ(test_flag_Q, true); + } + + { + bool test_flag_R = true; + for (int i(0); (i < m) && test_flag_R; ++i) { + for (int j(0); (j < n) && test_flag_R; ++j) { + if (ats::abs(h_R(i,j) - refR[i][j]) > absTol) { + test_flag_R = false; + } + } + } + ASSERT_EQ(test_flag_R, true); + } + } + + { + bool test_flag_QR = true; + for (int i(0); (i < m) && test_flag_QR; ++i) { + for (int j(0); (j < n) && test_flag_QR; ++j) { + if (ats::abs(h_QR(i,j) - h_Aorig(i,j)) > absTol) { + std::cout << "m = " << m + << ", n = " << n + << ", i = " << i + << ", j = " << j + << ", h_Aorig(i,j) = " << std::setprecision(16) << h_Aorig(i,j) + << ", h_QR(i,j) = " << std::setprecision(16) << h_QR(i,j) + << ", |diff| = " << std::setprecision(16) << ats::abs(h_QR(i,j) - h_Aorig(i,j)) + << ", absTol = " << std::setprecision(16) << absTol + << std::endl; + test_flag_QR = false; + } + } + } + ASSERT_EQ(test_flag_QR, true); } - ASSERT_EQ(test_flag, true); } } // namespace Test @@ -274,6 +477,9 @@ void test_geqrf() { using view_type_tau_ll = Kokkos::View; Test::impl_test_geqrf(3, 3); + Test::impl_test_geqrf(100, 100); + //Test::impl_test_geqrf(100, 70); // AquiEEP + Test::impl_test_geqrf(70, 100); #endif } From fad256076753c6f53165359d83b68ddc5e49029e Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Sat, 25 May 2024 18:01:51 -0600 Subject: [PATCH 24/27] Backup --- lapack/unit_test/Test_Lapack_geqrf.hpp | 138 +++++++++++++++++++++---- 1 file changed, 119 insertions(+), 19 deletions(-) diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index 2a4533b8bc..240fde00bc 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -48,7 +48,7 @@ void getQR(int const m, int const n, // Populate h_R for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - if ((i <= j) && (i < n)) { + if (i <= j) { // && (i < n)) { // Aqui h_R(i,j) = h_A(i,j); } else { @@ -57,7 +57,7 @@ void getQR(int const m, int const n, } } - // Instantiate the identity matrix + // Instantiate the m x m identity matrix ViewTypeA I("I", m, m); typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I); Kokkos::deep_copy(h_I,Kokkos::ArithTraits::zero()); @@ -69,7 +69,7 @@ void getQR(int const m, int const n, } } - // Populate h_Q + // Compute h_Q int minMN(std::min(m, n)); ViewTypeTau v("v", m); typename ViewTypeTau::HostMirror h_v = Kokkos::create_mirror_view(v); @@ -92,6 +92,11 @@ void getQR(int const m, int const n, for (int index(k+1); index < minMN; ++index) { h_v[index] = h_A(index,k); } +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG + for (int i(0); i < m; ++i) { + std::cout << "k = " << k << ", h_v[" << i << "] = " << std::setprecision(16) << h_v[i] << std::endl; + } +#endif // Rank-1 update of a general matrix: A = A + alpha * x * y^{T,H}. // void ger( const char trans[] @@ -108,6 +113,14 @@ void getQR(int const m, int const n, , h_Qk ); +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG + for (int i(0); i < m; ++i) { + for (int j(0); j < m; ++j) { + std::cout << "k = " << k << ", hQk(" << i << "," << j << ") = " << h_Qk(i,j) << std::endl; + } + } +#endif + // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B). // void gemm( const char transA[] // , const char transB[] @@ -132,8 +145,17 @@ void getQR(int const m, int const n, ); Kokkos::deep_copy(h_Q, h_auxM); } + +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG + for (int i(0); i < m; ++i) { + for (int j(0); j < m; ++j) { + std::cout << "k = " << k << ", hQ(" << i << "," << j << ") = " << h_Q(i,j) << std::endl; + } + } +#endif } // for k + // Compute h_QR Kokkos::deep_copy(h_QR, Kokkos::ArithTraits::zero()); KokkosBlas::gemm( "N" , "N" @@ -144,7 +166,43 @@ void getQR(int const m, int const n, , h_QR ); - // AquiEEP: test Q^H Q = I + // Check that Q^H Q = I + { + Kokkos::deep_copy(h_auxM, Kokkos::ArithTraits::zero()); + KokkosBlas::gemm( "C" + , "N" + , 1. + , h_Q + , h_Q + , 0. + , h_auxM + ); + + typename Kokkos::ArithTraits::mag_type absTol(1.e-8); + if constexpr (std::is_same_v::mag_type,float>) { + absTol = 5.e-5; + } + + using ats = Kokkos::ArithTraits; + bool test_flag_QHQ = true; + for (int i(0); (i < m) && test_flag_QHQ; ++i) { + for (int j(0); (j < m) && test_flag_QHQ; ++j) { + if (ats::abs(h_auxM(i,j) - h_I(i,j)) > absTol) { + std::cout << "m = " << m + << ", n = " << n + << ", i = " << i + << ", j = " << j + << ", h_auxM(i,j) = " << std::setprecision(16) << h_auxM(i,j) + << ", h_I(i,j) = " << std::setprecision(16) << h_I(i,j) + << ", |diff| = " << std::setprecision(16) << ats::abs(h_auxM(i,j) - h_I(i,j)) + << ", absTol = " << std::setprecision(16) << absTol + << std::endl; + test_flag_QHQ = false; + } + } + } + ASSERT_EQ(test_flag_QHQ, true); + } } template @@ -216,7 +274,7 @@ void impl_test_geqrf(int m, int n) { Kokkos::deep_copy(h_Aorig, h_A); -#ifdef HAVE_KOKKOSKERNELS_DEBUG +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { std::cout << "Aorig(" << i << "," << j << ") = " << h_A(i,j) << std::endl; @@ -245,7 +303,12 @@ void impl_test_geqrf(int m, int n) { Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_tau, Tau); -#ifdef HAVE_KOKKOSKERNELS_DEBUG + typename Kokkos::ArithTraits::mag_type absTol(1.e-8); + if constexpr (std::is_same_v::mag_type,float>) { + absTol = 5.e-5; + } + +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG std::cout << "info[0] = " << h_info[0] << std::endl; for (int i(0); i < minMN; ++i) { for (int j(0); j < n; ++j) { @@ -255,9 +318,9 @@ void impl_test_geqrf(int m, int n) { for (int i(0); i < minMN; ++i) { std::cout << "tau(" << i << ") = " << h_tau[i] << std::setprecision(16) << std::endl; } + std::cout << "absTol = " << absTol << std::endl; #endif - - const typename Kokkos::ArithTraits::mag_type absTol(1.e-8); + std::cout << "absTol = " << absTol << std::endl; // Aqui if ((m == 3) && (n == 3)) { std::vector> refMatrix(m); @@ -307,6 +370,15 @@ void impl_test_geqrf(int m, int n) { for (int i(0); (i < m) && test_flag_A; ++i) { for (int j(0); (j < n) && test_flag_A; ++j) { if (ats::abs(h_A(i,j) - refMatrix[i][j]) > absTol) { + std::cout << "m = " << m + << ", n = " << n + << ", i = " << i + << ", j = " << j + << ", h_Aoutput(i,j) = " << std::setprecision(16) << h_A(i,j) + << ", refMatrix(i,j) = " << std::setprecision(16) << refMatrix[i][j] + << ", |diff| = " << std::setprecision(16) << ats::abs(h_A(i,j) - refMatrix[i][j]) + << ", absTol = " << std::setprecision(16) << absTol + << std::endl; test_flag_A = false; } } @@ -318,6 +390,14 @@ void impl_test_geqrf(int m, int n) { bool test_flag_tau = true; for (int i(0); (i < m) && test_flag_tau; ++i) { if (ats::abs(h_tau[i] - refTau[i]) > absTol) { + std::cout << "m = " << m + << ", n = " << n + << ", i = " << i + << ", h_tau(i,j) = " << std::setprecision(16) << h_tau[i] + << ", refTau(i,j) = " << std::setprecision(16) << refTau[i] + << ", |diff| = " << std::setprecision(16) << ats::abs(h_tau[i] - refTau[i]) + << ", absTol = " << std::setprecision(16) << absTol + << std::endl; test_flag_tau = false; } } @@ -335,7 +415,7 @@ void impl_test_geqrf(int m, int n) { getQR(m, n, h_A, h_tau, h_Q, h_R, h_QR); -#ifdef HAVE_KOKKOSKERNELS_DEBUG +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < m; ++j) { std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl; @@ -367,11 +447,11 @@ void impl_test_geqrf(int m, int n) { #if 0 Q = [ -6/7 69/175 58/175 -3/7 -158/175 -6/175 - 2/7 -6/35 33/35 ] + 2/7 -6/35 33/35 ] - R = [ -14 -21 14 - 0 -175 70 - 0 0 -35 ] + R = [ -14 -21 14 + 0 -175 70 + 0 0 -35 ] #endif if constexpr (Kokkos::ArithTraits::is_complex) { @@ -424,6 +504,15 @@ void impl_test_geqrf(int m, int n) { for (int i(0); (i < m) && test_flag_Q; ++i) { for (int j(0); (j < n) && test_flag_Q; ++j) { if (ats::abs(h_Q(i,j) - refQ[i][j]) > absTol) { + std::cout << "m = " << m + << ", n = " << n + << ", i = " << i + << ", j = " << j + << ", h_Q(i,j) = " << std::setprecision(16) << h_Q(i,j) + << ", refQ(i,j) = " << std::setprecision(16) << refQ[i][j] + << ", |diff| = " << std::setprecision(16) << ats::abs(h_Q(i,j) - refQ[i][j]) + << ", absTol = " << std::setprecision(16) << absTol + << std::endl; test_flag_Q = false; } } @@ -436,6 +525,15 @@ void impl_test_geqrf(int m, int n) { for (int i(0); (i < m) && test_flag_R; ++i) { for (int j(0); (j < n) && test_flag_R; ++j) { if (ats::abs(h_R(i,j) - refR[i][j]) > absTol) { + std::cout << "m = " << m + << ", n = " << n + << ", i = " << i + << ", j = " << j + << ", h_R(i,j) = " << std::setprecision(16) << h_R(i,j) + << ", refR(i,j) = " << std::setprecision(16) << refR[i][j] + << ", |diff| = " << std::setprecision(16) << ats::abs(h_R(i,j) - refR[i][j]) + << ", absTol = " << std::setprecision(16) << absTol + << std::endl; test_flag_R = false; } } @@ -444,6 +542,7 @@ void impl_test_geqrf(int m, int n) { } } + // Check that A = QR { bool test_flag_QR = true; for (int i(0); (i < m) && test_flag_QR; ++i) { @@ -476,10 +575,11 @@ void test_geqrf() { using view_type_a_ll = Kokkos::View; using view_type_tau_ll = Kokkos::View; - Test::impl_test_geqrf(3, 3); - Test::impl_test_geqrf(100, 100); + //Test::impl_test_geqrf(3, 3); + //Test::impl_test_geqrf(100, 100); + Test::impl_test_geqrf(4, 3); // AquiEEP //Test::impl_test_geqrf(100, 70); // AquiEEP - Test::impl_test_geqrf(70, 100); + //Test::impl_test_geqrf(70, 100); #endif } @@ -488,7 +588,7 @@ void test_geqrf() { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, geqrf_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_float"); - test_geqrf(); + //test_geqrf(); Kokkos::Profiling::popRegion(); } #endif @@ -508,7 +608,7 @@ TEST_F(TestCategory, geqrf_double) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, geqrf_complex_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_double"); - test_geqrf, TestDevice>(); + //test_geqrf, TestDevice>(); Kokkos::Profiling::popRegion(); } #endif @@ -518,7 +618,7 @@ TEST_F(TestCategory, geqrf_complex_double) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, geqrf_complex_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_float"); - test_geqrf, TestDevice>(); + //test_geqrf, TestDevice>(); Kokkos::Profiling::popRegion(); } #endif From 51a27b2240fd1051734c998029686fa838435857 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Sat, 25 May 2024 18:32:10 -0600 Subject: [PATCH 25/27] Backup --- lapack/unit_test/Test_Lapack_geqrf.hpp | 136 ++++++++++++++++--------- 1 file changed, 89 insertions(+), 47 deletions(-) diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index 240fde00bc..d3c80b122c 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -45,10 +45,12 @@ void getQR(int const m, int const n, ) { using ScalarA = typename ViewTypeA::value_type; + // ******************************************************************** // Populate h_R + // ******************************************************************** for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - if (i <= j) { // && (i < n)) { // Aqui + if (i <= j) { h_R(i,j) = h_A(i,j); } else { @@ -57,7 +59,9 @@ void getQR(int const m, int const n, } } - // Instantiate the m x m identity matrix + // ******************************************************************** + // Instantiate the m x m identity matrix h_I + // ******************************************************************** ViewTypeA I("I", m, m); typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I); Kokkos::deep_copy(h_I,Kokkos::ArithTraits::zero()); @@ -69,7 +73,9 @@ void getQR(int const m, int const n, } } + // ******************************************************************** // Compute h_Q + // ******************************************************************** int minMN(std::min(m, n)); ViewTypeTau v("v", m); typename ViewTypeTau::HostMirror h_v = Kokkos::create_mirror_view(v); @@ -89,10 +95,10 @@ void getQR(int const m, int const n, for (int k(0); k < minMN; ++k) { Kokkos::deep_copy(h_v,Kokkos::ArithTraits::zero()); h_v[k] = 1.; - for (int index(k+1); index < minMN; ++index) { + for (int index(k+1); index < m; ++index) { h_v[index] = h_A(index,k); } -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { std::cout << "k = " << k << ", h_v[" << i << "] = " << std::setprecision(16) << h_v[i] << std::endl; } @@ -113,7 +119,7 @@ void getQR(int const m, int const n, , h_Qk ); -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < m; ++j) { std::cout << "k = " << k << ", hQk(" << i << "," << j << ") = " << h_Qk(i,j) << std::endl; @@ -146,7 +152,7 @@ void getQR(int const m, int const n, Kokkos::deep_copy(h_Q, h_auxM); } -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < m; ++j) { std::cout << "k = " << k << ", hQ(" << i << "," << j << ") = " << h_Q(i,j) << std::endl; @@ -155,18 +161,9 @@ void getQR(int const m, int const n, #endif } // for k - // Compute h_QR - Kokkos::deep_copy(h_QR, Kokkos::ArithTraits::zero()); - KokkosBlas::gemm( "N" - , "N" - , 1. - , h_Q - , h_R - , 0. - , h_QR - ); - + // ******************************************************************** // Check that Q^H Q = I + // ******************************************************************** { Kokkos::deep_copy(h_auxM, Kokkos::ArithTraits::zero()); KokkosBlas::gemm( "C" @@ -188,7 +185,8 @@ void getQR(int const m, int const n, for (int i(0); (i < m) && test_flag_QHQ; ++i) { for (int j(0); (j < m) && test_flag_QHQ; ++j) { if (ats::abs(h_auxM(i,j) - h_I(i,j)) > absTol) { - std::cout << "m = " << m + std::cout << "QHQ checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j @@ -203,6 +201,19 @@ void getQR(int const m, int const n, } ASSERT_EQ(test_flag_QHQ, true); } + + // ******************************************************************** + // Compute h_QR + // ******************************************************************** + Kokkos::deep_copy(h_QR, Kokkos::ArithTraits::zero()); + KokkosBlas::gemm( "N" + , "N" + , 1. + , h_Q + , h_R + , 0. + , h_QR + ); } template @@ -212,25 +223,29 @@ void impl_test_geqrf(int m, int n) { using ScalarA = typename ViewTypeA::value_type; using ats = Kokkos::ArithTraits; - execution_space space{}; - Kokkos::Random_XorShift64_Pool rand_pool(13718); int minMN(std::min(m, n)); + // ******************************************************************** // Create device views + // ******************************************************************** ViewTypeA A ("A", m, n); ViewTypeA Aorig("Aorig", m, n); ViewTypeTau Tau ("Tau", minMN); ViewTypeInfo Info ("Info", 1); - // Create host mirrors of device views. + // ******************************************************************** + // Create host mirrors of device views + // ******************************************************************** typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(Aorig); typename ViewTypeTau::HostMirror h_tau = Kokkos::create_mirror_view(Tau); typename ViewTypeInfo::HostMirror h_info = Kokkos::create_mirror_view(Info); - // Initialize data. + // ******************************************************************** + // Initialize data + // ******************************************************************** if ((m == 3) && (n == 3)) { if constexpr (Kokkos::ArithTraits::is_complex) { h_A(0, 0).real() = 12.; @@ -274,7 +289,7 @@ void impl_test_geqrf(int m, int n) { Kokkos::deep_copy(h_Aorig, h_A); -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { std::cout << "Aorig(" << i << "," << j << ") = " << h_A(i,j) << std::endl; @@ -284,8 +299,11 @@ void impl_test_geqrf(int m, int n) { Kokkos::fence(); + // ******************************************************************** // Perform the QR factorization + // ******************************************************************** try { + execution_space space{}; KokkosLapack::geqrf(space, A, Tau, Info); } catch (const std::runtime_error& e) { std::cout << "KokkosLapack::geqrf(): caught exception '" << e.what() << "'" @@ -299,7 +317,9 @@ void impl_test_geqrf(int m, int n) { Kokkos::deep_copy(h_info, Info); EXPECT_EQ(h_info[0], 0) << "Failed geqrf() test: Info[0] = " << h_info[0]; + // ******************************************************************** // Get the results + // ******************************************************************** Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_tau, Tau); @@ -308,7 +328,7 @@ void impl_test_geqrf(int m, int n) { absTol = 5.e-5; } -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG std::cout << "info[0] = " << h_info[0] << std::endl; for (int i(0); i < minMN; ++i) { for (int j(0); j < n; ++j) { @@ -320,8 +340,10 @@ void impl_test_geqrf(int m, int n) { } std::cout << "absTol = " << absTol << std::endl; #endif - std::cout << "absTol = " << absTol << std::endl; // Aqui + // ******************************************************************** + // Check outputs h_A and h_tau + // ******************************************************************** if ((m == 3) && (n == 3)) { std::vector> refMatrix(m); for (int i(0); i < m; ++i) { @@ -370,7 +392,8 @@ void impl_test_geqrf(int m, int n) { for (int i(0); (i < m) && test_flag_A; ++i) { for (int j(0); (j < n) && test_flag_A; ++j) { if (ats::abs(h_A(i,j) - refMatrix[i][j]) > absTol) { - std::cout << "m = " << m + std::cout << "h_Aoutput checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j @@ -390,7 +413,8 @@ void impl_test_geqrf(int m, int n) { bool test_flag_tau = true; for (int i(0); (i < m) && test_flag_tau; ++i) { if (ats::abs(h_tau[i] - refTau[i]) > absTol) { - std::cout << "m = " << m + std::cout << "tau checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", h_tau(i,j) = " << std::setprecision(16) << h_tau[i] @@ -405,6 +429,9 @@ void impl_test_geqrf(int m, int n) { } } + // ******************************************************************** + // Compute Q, R, and QR + // ******************************************************************** ViewTypeA Q("Q", m, m); ViewTypeA R("R", m, n); ViewTypeA QR("QR", m, n); @@ -415,7 +442,7 @@ void impl_test_geqrf(int m, int n) { getQR(m, n, h_A, h_tau, h_Q, h_R, h_QR); -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < m; ++j) { std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl; @@ -433,6 +460,9 @@ void impl_test_geqrf(int m, int n) { } #endif + // ******************************************************************** + // Check Q, R, and QR + // ******************************************************************** if ((m == 3) && (n == 3)) { std::vector> refQ(m); for (int i(0); i < m; ++i) { @@ -504,7 +534,8 @@ void impl_test_geqrf(int m, int n) { for (int i(0); (i < m) && test_flag_Q; ++i) { for (int j(0); (j < n) && test_flag_Q; ++j) { if (ats::abs(h_Q(i,j) - refQ[i][j]) > absTol) { - std::cout << "m = " << m + std::cout << "Q checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j @@ -525,7 +556,8 @@ void impl_test_geqrf(int m, int n) { for (int i(0); (i < m) && test_flag_R; ++i) { for (int j(0); (j < n) && test_flag_R; ++j) { if (ats::abs(h_R(i,j) - refR[i][j]) > absTol) { - std::cout << "m = " << m + std::cout << "R checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j @@ -542,13 +574,16 @@ void impl_test_geqrf(int m, int n) { } } + // ******************************************************************** // Check that A = QR + // ******************************************************************** { bool test_flag_QR = true; for (int i(0); (i < m) && test_flag_QR; ++i) { for (int j(0); (j < n) && test_flag_QR; ++j) { if (ats::abs(h_QR(i,j) - h_Aorig(i,j)) > absTol) { - std::cout << "m = " << m + std::cout << "QR checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j @@ -575,11 +610,18 @@ void test_geqrf() { using view_type_a_ll = Kokkos::View; using view_type_tau_ll = Kokkos::View; - //Test::impl_test_geqrf(3, 3); - //Test::impl_test_geqrf(100, 100); - Test::impl_test_geqrf(4, 3); // AquiEEP - //Test::impl_test_geqrf(100, 70); // AquiEEP - //Test::impl_test_geqrf(70, 100); + Test::impl_test_geqrf(1, 1); + Test::impl_test_geqrf(2, 1); + Test::impl_test_geqrf(2, 2); + Test::impl_test_geqrf(3, 1); + Test::impl_test_geqrf(3, 2); + Test::impl_test_geqrf(3, 3); + + Test::impl_test_geqrf(100, 100); + + Test::impl_test_geqrf(100, 70); + + Test::impl_test_geqrf(70, 100); #endif } @@ -588,7 +630,7 @@ void test_geqrf() { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, geqrf_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_float"); - //test_geqrf(); + test_geqrf(); Kokkos::Profiling::popRegion(); } #endif @@ -603,22 +645,22 @@ TEST_F(TestCategory, geqrf_double) { } #endif -#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, geqrf_complex_double) { - Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_double"); - //test_geqrf, TestDevice>(); +TEST_F(TestCategory, geqrf_complex_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_float"); + test_geqrf, TestDevice>(); Kokkos::Profiling::popRegion(); } #endif -#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, geqrf_complex_float) { - Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_float"); - //test_geqrf, TestDevice>(); +TEST_F(TestCategory, geqrf_complex_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::geqrf_complex_double"); + test_geqrf, TestDevice>(); Kokkos::Profiling::popRegion(); } #endif From bd22118cc964d4a711e9f188a8a5b9dd4c319ebb Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Sat, 25 May 2024 18:35:00 -0600 Subject: [PATCH 26/27] Formatting --- lapack/src/KokkosLapack_geqrf.hpp | 11 +- lapack/unit_test/Test_Lapack_geqrf.hpp | 247 +++++++++++-------------- 2 files changed, 115 insertions(+), 143 deletions(-) diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index a81ae2a436..c680120ae3 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -117,18 +117,17 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TauArray& Tau, using AMatrix_Internal = Kokkos::View< typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, typename AMatrix::device_type, Kokkos::MemoryTraits>; - using TauArray_Internal = - Kokkos::View>; + using TauArray_Internal = Kokkos::View< + typename TauArray::non_const_value_type*, typename TauArray::array_layout, + typename TauArray::device_type, Kokkos::MemoryTraits>; using InfoArray_Internal = Kokkos::View>; - AMatrix_Internal A_i = A; - TauArray_Internal Tau_i = Tau; + AMatrix_Internal A_i = A; + TauArray_Internal Tau_i = Tau; InfoArray_Internal Info_i = Info; KokkosLapack::Impl::GEQRF -void getQR(int const m, int const n, - typename ViewTypeA::HostMirror const& h_A, +void getQR(int const m, int const n, typename ViewTypeA::HostMirror const& h_A, typename ViewTypeTau::HostMirror const& h_tau, typename ViewTypeA::HostMirror& h_Q, typename ViewTypeA::HostMirror& h_R, - typename ViewTypeA::HostMirror& h_QR -) { + typename ViewTypeA::HostMirror& h_QR) { using ScalarA = typename ViewTypeA::value_type; // ******************************************************************** @@ -51,10 +49,9 @@ void getQR(int const m, int const n, for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { if (i <= j) { - h_R(i,j) = h_A(i,j); - } - else { - h_R(i,j) = Kokkos::ArithTraits::zero(); + h_R(i, j) = h_A(i, j); + } else { + h_R(i, j) = Kokkos::ArithTraits::zero(); } } } @@ -64,12 +61,12 @@ void getQR(int const m, int const n, // ******************************************************************** ViewTypeA I("I", m, m); typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I); - Kokkos::deep_copy(h_I,Kokkos::ArithTraits::zero()); + Kokkos::deep_copy(h_I, Kokkos::ArithTraits::zero()); for (int i(0); i < m; ++i) { if constexpr (Kokkos::ArithTraits::is_complex) { - h_I(i,i).real() = 1.; + h_I(i, i).real() = 1.; } else { - h_I(i,i) = 1.; + h_I(i, i) = 1.; } } @@ -93,12 +90,12 @@ void getQR(int const m, int const n, // v(k) = 1, // v(k+1:m-1) = A(k+1:m-1,k). for (int k(0); k < minMN; ++k) { - Kokkos::deep_copy(h_v,Kokkos::ArithTraits::zero()); + Kokkos::deep_copy(h_v, Kokkos::ArithTraits::zero()); h_v[k] = 1.; - for (int index(k+1); index < m; ++index) { - h_v[index] = h_A(index,k); + for (int index(k + 1); index < m; ++index) { + h_v[index] = h_A(index, k); } -#if 0 // def HAVE_KOKKOSKERNELS_DEBUG +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { std::cout << "k = " << k << ", h_v[" << i << "] = " << std::setprecision(16) << h_v[i] << std::endl; } @@ -112,14 +109,9 @@ void getQR(int const m, int const n, // , const AViewType & A // ); Kokkos::deep_copy(h_Qk, h_I); - KokkosBlas::ger( "H" - , -h_tau[k] - , h_v - , h_v - , h_Qk - ); - -#if 0 // def HAVE_KOKKOSKERNELS_DEBUG + KokkosBlas::ger("H", -h_tau[k], h_v, h_v, h_Qk); + +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < m; ++j) { std::cout << "k = " << k << ", hQk(" << i << "," << j << ") = " << h_Qk(i,j) << std::endl; @@ -138,62 +130,50 @@ void getQR(int const m, int const n, // ); if (k == 0) { Kokkos::deep_copy(h_Q, h_Qk); - } - else { + } else { Kokkos::deep_copy(h_auxM, Kokkos::ArithTraits::zero()); - KokkosBlas::gemm( "N" - , "N" - , 1. - , h_Q - , h_Qk - , 0. - , h_auxM - ); + KokkosBlas::gemm("N", "N", 1., h_Q, h_Qk, 0., h_auxM); Kokkos::deep_copy(h_Q, h_auxM); } -#if 0 // def HAVE_KOKKOSKERNELS_DEBUG +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < m; ++j) { std::cout << "k = " << k << ", hQ(" << i << "," << j << ") = " << h_Q(i,j) << std::endl; } } #endif - } // for k + } // for k // ******************************************************************** // Check that Q^H Q = I // ******************************************************************** { Kokkos::deep_copy(h_auxM, Kokkos::ArithTraits::zero()); - KokkosBlas::gemm( "C" - , "N" - , 1. - , h_Q - , h_Q - , 0. - , h_auxM - ); - - typename Kokkos::ArithTraits::mag_type absTol(1.e-8); - if constexpr (std::is_same_v::mag_type,float>) { + KokkosBlas::gemm("C", "N", 1., h_Q, h_Q, 0., h_auxM); + + typename Kokkos::ArithTraits< + typename ViewTypeA::non_const_value_type>::mag_type absTol(1.e-8); + if constexpr (std::is_same_v< + typename Kokkos::ArithTraits< + typename ViewTypeA::non_const_value_type>::mag_type, + float>) { absTol = 5.e-5; } - using ats = Kokkos::ArithTraits; + using ats = Kokkos::ArithTraits; bool test_flag_QHQ = true; for (int i(0); (i < m) && test_flag_QHQ; ++i) { for (int j(0); (j < m) && test_flag_QHQ; ++j) { - if (ats::abs(h_auxM(i,j) - h_I(i,j)) > absTol) { + if (ats::abs(h_auxM(i, j) - h_I(i, j)) > absTol) { std::cout << "QHQ checking" - << ", m = " << m - << ", n = " << n - << ", i = " << i + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j - << ", h_auxM(i,j) = " << std::setprecision(16) << h_auxM(i,j) - << ", h_I(i,j) = " << std::setprecision(16) << h_I(i,j) - << ", |diff| = " << std::setprecision(16) << ats::abs(h_auxM(i,j) - h_I(i,j)) - << ", absTol = " << std::setprecision(16) << absTol + << ", h_auxM(i,j) = " << std::setprecision(16) + << h_auxM(i, j) << ", h_I(i,j) = " << std::setprecision(16) + << h_I(i, j) << ", |diff| = " << std::setprecision(16) + << ats::abs(h_auxM(i, j) - h_I(i, j)) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; test_flag_QHQ = false; } @@ -206,14 +186,7 @@ void getQR(int const m, int const n, // Compute h_QR // ******************************************************************** Kokkos::deep_copy(h_QR, Kokkos::ArithTraits::zero()); - KokkosBlas::gemm( "N" - , "N" - , 1. - , h_Q - , h_R - , 0. - , h_QR - ); + KokkosBlas::gemm("N", "N", 1., h_Q, h_R, 0., h_QR); } template @@ -230,10 +203,10 @@ void impl_test_geqrf(int m, int n) { // ******************************************************************** // Create device views // ******************************************************************** - ViewTypeA A ("A", m, n); - ViewTypeA Aorig("Aorig", m, n); - ViewTypeTau Tau ("Tau", minMN); - ViewTypeInfo Info ("Info", 1); + ViewTypeA A("A", m, n); + ViewTypeA Aorig("Aorig", m, n); + ViewTypeTau Tau("Tau", minMN); + ViewTypeInfo Info("Info", 1); // ******************************************************************** // Create host mirrors of device views @@ -262,7 +235,7 @@ void impl_test_geqrf(int m, int n) { for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - h_A(i,j).imag() = 0.; + h_A(i, j).imag() = 0.; } } } else { @@ -289,7 +262,7 @@ void impl_test_geqrf(int m, int n) { Kokkos::deep_copy(h_Aorig, h_A); -#if 0 // def HAVE_KOKKOSKERNELS_DEBUG +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { std::cout << "Aorig(" << i << "," << j << ") = " << h_A(i,j) << std::endl; @@ -323,12 +296,16 @@ void impl_test_geqrf(int m, int n) { Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_tau, Tau); - typename Kokkos::ArithTraits::mag_type absTol(1.e-8); - if constexpr (std::is_same_v::mag_type,float>) { + typename Kokkos::ArithTraits< + typename ViewTypeA::non_const_value_type>::mag_type absTol(1.e-8); + if constexpr (std::is_same_v< + typename Kokkos::ArithTraits< + typename ViewTypeA::non_const_value_type>::mag_type, + float>) { absTol = 5.e-5; } -#if 0 // def HAVE_KOKKOSKERNELS_DEBUG +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG std::cout << "info[0] = " << h_info[0] << std::endl; for (int i(0); i < minMN; ++i) { for (int j(0); j < n; ++j) { @@ -347,10 +324,10 @@ void impl_test_geqrf(int m, int n) { if ((m == 3) && (n == 3)) { std::vector> refMatrix(m); for (int i(0); i < m; ++i) { - refMatrix[i].resize(n,Kokkos::ArithTraits::zero()); + refMatrix[i].resize(n, Kokkos::ArithTraits::zero()); } - std::vector refTau(m,Kokkos::ArithTraits::zero()); + std::vector refTau(m, Kokkos::ArithTraits::zero()); if constexpr (Kokkos::ArithTraits::is_complex) { refMatrix[0][0].real() = -14.; @@ -362,14 +339,13 @@ void impl_test_geqrf(int m, int n) { refMatrix[1][2].real() = 70.; refMatrix[2][0].real() = -0.1538461538461539; - refMatrix[2][1].real() = 1./18.; + refMatrix[2][1].real() = 1. / 18.; refMatrix[2][2].real() = -35.; refTau[0].real() = 1.857142857142857; refTau[1].real() = 1.993846153846154; refTau[2].real() = 0.; - } - else { + } else { refMatrix[0][0] = -14.; refMatrix[0][1] = -21.; refMatrix[0][2] = 14.; @@ -379,7 +355,7 @@ void impl_test_geqrf(int m, int n) { refMatrix[1][2] = 70.; refMatrix[2][0] = -0.1538461538461539; - refMatrix[2][1] = 1./18.; + refMatrix[2][1] = 1. / 18.; refMatrix[2][2] = -35.; refTau[0] = 1.857142857142857; @@ -391,16 +367,17 @@ void impl_test_geqrf(int m, int n) { bool test_flag_A = true; for (int i(0); (i < m) && test_flag_A; ++i) { for (int j(0); (j < n) && test_flag_A; ++j) { - if (ats::abs(h_A(i,j) - refMatrix[i][j]) > absTol) { + if (ats::abs(h_A(i, j) - refMatrix[i][j]) > absTol) { std::cout << "h_Aoutput checking" - << ", m = " << m - << ", n = " << n - << ", i = " << i + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j - << ", h_Aoutput(i,j) = " << std::setprecision(16) << h_A(i,j) - << ", refMatrix(i,j) = " << std::setprecision(16) << refMatrix[i][j] - << ", |diff| = " << std::setprecision(16) << ats::abs(h_A(i,j) - refMatrix[i][j]) - << ", absTol = " << std::setprecision(16) << absTol + << ", h_Aoutput(i,j) = " << std::setprecision(16) + << h_A(i, j) + << ", refMatrix(i,j) = " << std::setprecision(16) + << refMatrix[i][j] + << ", |diff| = " << std::setprecision(16) + << ats::abs(h_A(i, j) - refMatrix[i][j]) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; test_flag_A = false; } @@ -414,13 +391,12 @@ void impl_test_geqrf(int m, int n) { for (int i(0); (i < m) && test_flag_tau; ++i) { if (ats::abs(h_tau[i] - refTau[i]) > absTol) { std::cout << "tau checking" - << ", m = " << m - << ", n = " << n - << ", i = " << i - << ", h_tau(i,j) = " << std::setprecision(16) << h_tau[i] + << ", m = " << m << ", n = " << n << ", i = " << i + << ", h_tau(i,j) = " << std::setprecision(16) << h_tau[i] << ", refTau(i,j) = " << std::setprecision(16) << refTau[i] - << ", |diff| = " << std::setprecision(16) << ats::abs(h_tau[i] - refTau[i]) - << ", absTol = " << std::setprecision(16) << absTol + << ", |diff| = " << std::setprecision(16) + << ats::abs(h_tau[i] - refTau[i]) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; test_flag_tau = false; } @@ -442,7 +418,7 @@ void impl_test_geqrf(int m, int n) { getQR(m, n, h_A, h_tau, h_Q, h_R, h_QR); -#if 0 // def HAVE_KOKKOSKERNELS_DEBUG +#if 0 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < m; ++j) { std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl; @@ -466,12 +442,12 @@ void impl_test_geqrf(int m, int n) { if ((m == 3) && (n == 3)) { std::vector> refQ(m); for (int i(0); i < m; ++i) { - refQ[i].resize(n,Kokkos::ArithTraits::zero()); + refQ[i].resize(n, Kokkos::ArithTraits::zero()); } std::vector> refR(m); for (int i(0); i < m; ++i) { - refR[i].resize(n,Kokkos::ArithTraits::zero()); + refR[i].resize(n, Kokkos::ArithTraits::zero()); } #if 0 @@ -485,17 +461,17 @@ void impl_test_geqrf(int m, int n) { #endif if constexpr (Kokkos::ArithTraits::is_complex) { - refQ[0][0].real() = -6./7.; - refQ[0][1].real() = 69./175.; - refQ[0][2].real() = 58./175.; + refQ[0][0].real() = -6. / 7.; + refQ[0][1].real() = 69. / 175.; + refQ[0][2].real() = 58. / 175.; - refQ[1][0].real() = -3./7.; - refQ[1][1].real() = -158./175.; - refQ[1][2].real() = -6./175.; + refQ[1][0].real() = -3. / 7.; + refQ[1][1].real() = -158. / 175.; + refQ[1][2].real() = -6. / 175.; - refQ[2][0].real() = 2./7.; - refQ[2][1].real() = -6./35.; - refQ[2][2].real() = 33./35.; + refQ[2][0].real() = 2. / 7.; + refQ[2][1].real() = -6. / 35.; + refQ[2][2].real() = 33. / 35.; refR[0][0].real() = -14.; refR[0][1].real() = -21.; @@ -505,19 +481,18 @@ void impl_test_geqrf(int m, int n) { refR[1][2].real() = 70.; refR[2][2].real() = -35.; - } - else { - refQ[0][0] = -6./7.; - refQ[0][1] = 69./175.; - refQ[0][2] = 58./175.; + } else { + refQ[0][0] = -6. / 7.; + refQ[0][1] = 69. / 175.; + refQ[0][2] = 58. / 175.; - refQ[1][0] = -3./7.; - refQ[1][1] = -158./175.; - refQ[1][2] = -6./175.; + refQ[1][0] = -3. / 7.; + refQ[1][1] = -158. / 175.; + refQ[1][2] = -6. / 175.; - refQ[2][0] = 2./7.; - refQ[2][1] = -6./35.; - refQ[2][2] = 33./35.; + refQ[2][0] = 2. / 7.; + refQ[2][1] = -6. / 35.; + refQ[2][2] = 33. / 35.; refR[0][0] = -14.; refR[0][1] = -21.; @@ -533,16 +508,15 @@ void impl_test_geqrf(int m, int n) { bool test_flag_Q = true; for (int i(0); (i < m) && test_flag_Q; ++i) { for (int j(0); (j < n) && test_flag_Q; ++j) { - if (ats::abs(h_Q(i,j) - refQ[i][j]) > absTol) { + if (ats::abs(h_Q(i, j) - refQ[i][j]) > absTol) { std::cout << "Q checking" - << ", m = " << m - << ", n = " << n - << ", i = " << i + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j - << ", h_Q(i,j) = " << std::setprecision(16) << h_Q(i,j) + << ", h_Q(i,j) = " << std::setprecision(16) << h_Q(i, j) << ", refQ(i,j) = " << std::setprecision(16) << refQ[i][j] - << ", |diff| = " << std::setprecision(16) << ats::abs(h_Q(i,j) - refQ[i][j]) - << ", absTol = " << std::setprecision(16) << absTol + << ", |diff| = " << std::setprecision(16) + << ats::abs(h_Q(i, j) - refQ[i][j]) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; test_flag_Q = false; } @@ -555,16 +529,15 @@ void impl_test_geqrf(int m, int n) { bool test_flag_R = true; for (int i(0); (i < m) && test_flag_R; ++i) { for (int j(0); (j < n) && test_flag_R; ++j) { - if (ats::abs(h_R(i,j) - refR[i][j]) > absTol) { + if (ats::abs(h_R(i, j) - refR[i][j]) > absTol) { std::cout << "R checking" - << ", m = " << m - << ", n = " << n - << ", i = " << i + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j - << ", h_R(i,j) = " << std::setprecision(16) << h_R(i,j) + << ", h_R(i,j) = " << std::setprecision(16) << h_R(i, j) << ", refR(i,j) = " << std::setprecision(16) << refR[i][j] - << ", |diff| = " << std::setprecision(16) << ats::abs(h_R(i,j) - refR[i][j]) - << ", absTol = " << std::setprecision(16) << absTol + << ", |diff| = " << std::setprecision(16) + << ats::abs(h_R(i, j) - refR[i][j]) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; test_flag_R = false; } @@ -581,16 +554,16 @@ void impl_test_geqrf(int m, int n) { bool test_flag_QR = true; for (int i(0); (i < m) && test_flag_QR; ++i) { for (int j(0); (j < n) && test_flag_QR; ++j) { - if (ats::abs(h_QR(i,j) - h_Aorig(i,j)) > absTol) { + if (ats::abs(h_QR(i, j) - h_Aorig(i, j)) > absTol) { std::cout << "QR checking" - << ", m = " << m - << ", n = " << n - << ", i = " << i + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j - << ", h_Aorig(i,j) = " << std::setprecision(16) << h_Aorig(i,j) - << ", h_QR(i,j) = " << std::setprecision(16) << h_QR(i,j) - << ", |diff| = " << std::setprecision(16) << ats::abs(h_QR(i,j) - h_Aorig(i,j)) - << ", absTol = " << std::setprecision(16) << absTol + << ", h_Aorig(i,j) = " << std::setprecision(16) + << h_Aorig(i, j) + << ", h_QR(i,j) = " << std::setprecision(16) << h_QR(i, j) + << ", |diff| = " << std::setprecision(16) + << ats::abs(h_QR(i, j) - h_Aorig(i, j)) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; test_flag_QR = false; } From 89bb40c6ad797aa4391bfb411da1c2ea7d6232ac Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Sat, 25 May 2024 18:41:47 -0600 Subject: [PATCH 27/27] Backup --- lapack/unit_test/Test_Lapack_geqrf.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index 7453e2fb3e..0ec9d2679a 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -191,7 +191,8 @@ void getQR(int const m, int const n, typename ViewTypeA::HostMirror const& h_A, template void impl_test_geqrf(int m, int n) { - using ViewTypeInfo = Kokkos::View; + using ALayout_t = typename ViewTypeA::array_layout; + using ViewTypeInfo = Kokkos::View; using execution_space = typename Device::execution_space; using ScalarA = typename ViewTypeA::value_type; using ats = Kokkos::ArithTraits;