Skip to content

Commit

Permalink
Curl Curl solver: 4-color Gauss-Seidel smoother (AMReX-Codes#3778)
Browse files Browse the repository at this point in the history
This implements the 4-color Gauss-Seidel smoother of Li et. al. 2020.
"An Efficient Preconditioner for 3-D Finite Difference Modeling of the
Electromagnetic Diffusion Process in the Frequency Domain", IEEE
Transactions on Geoscience and Remote Sensing, 58, 500-509.
  • Loading branch information
WeiqunZhang authored Mar 1, 2024
1 parent 2ecafcf commit 9751217
Show file tree
Hide file tree
Showing 10 changed files with 959 additions and 333 deletions.
2 changes: 1 addition & 1 deletion Src/Base/AMReX_GpuMemory.H
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ private:

#else

DeviceScalar (T init_val) : d(init_val) {}
DeviceScalar (T const& init_val) : d(init_val) {}
DeviceScalar () = default;
~DeviceScalar () = default;

Expand Down
146 changes: 146 additions & 0 deletions Src/Base/AMReX_LUSolver.H
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#ifndef AMREX_LU_SOLVER_H_
#define AMREX_LU_SOLVER_H_
#include <AMReX_Config.H>

#include <AMReX_Arena.H>
#include <AMReX_Array.H>
#include <cmath>
#include <limits>

namespace amrex {

// https://en.wikipedia.org/wiki/LU_decomposition

template <int N, typename T>
class LUSolver
{
public:

LUSolver () = default;

LUSolver (Array2D<T, 0, N-1, 0, N-1, Order::C> const& a_mat);

void define (Array2D<T, 0, N-1, 0, N-1, Order::C> const& a_mat);

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void operator() (T* AMREX_RESTRICT x, T const* AMREX_RESTRICT b) const
{
for (int i = 0; i < N; ++i) {
x[i] = b[m_piv(i)];
for (int k = 0; k < i; ++k) {
x[i] -= m_mat(i,k) * x[k];
}
}

for (int i = N-1; i >= 0; --i) {
for (int k = i+1; k < N; ++k) {
x[i] -= m_mat(i,k) * x[k];
}
x[i] *= m_mat(i,i);
}
}

[[nodiscard]] AMREX_GPU_HOST_DEVICE
Array2D<T,0,N-1,0,N-1,Order::C> invert () const
{
Array2D<T,0,N-1,0,N-1,Order::C> IA;
for (int j = 0; j < N; ++j) {
for (int i = 0; i < N; ++i) {
IA(i,j) = (m_piv(i) == j) ? T(1.0) : T(0.0);
for (int k = 0; k < i; ++k) {
IA(i,j) -= m_mat(i,k) * IA(k,j);
}
}
for (int i = N-1; i >= 0; --i) {
for (int k = i+1; k < N; ++k) {
IA(i,j) -= m_mat(i,k) * IA(k,j);
}
IA(i,j) *= m_mat(i,i);
}
}
return IA;
}

[[nodiscard]] AMREX_GPU_HOST_DEVICE
T determinant () const
{
T det = m_mat(0,0);
for (int i = 1; i < N; ++i) {
det *= m_mat(i,i);
}
det = T(1.0) / det;
return (m_npivs % 2 == 0) ? det : -det;
}

private:

void define_innard ();

Array2D<T, 0, N-1, 0, N-1, Order::C> m_mat;
Array1D<int, 0, N-1> m_piv;
int m_npivs = 0;
};

template <int N, typename T>
LUSolver<N,T>::LUSolver (Array2D<T, 0, N-1, 0, N-1, Order::C> const& a_mat)
: m_mat(a_mat)
{
define_innard();
}

template <int N, typename T>
void LUSolver<N,T>::define (Array2D<T, 0, N-1, 0, N-1, Order::C> const& a_mat)
{
m_mat = a_mat;
define_innard();
}

template <int N, typename T>
void LUSolver<N,T>::define_innard ()
{
static_assert(N > 1);
static_assert(std::is_floating_point_v<T>);

for (int i = 0; i < N; ++i) { m_piv(i) = i; }
m_npivs = 0;

for (int i = 0; i < N; ++i) {
T maxA = 0;
int imax = i;

for (int k = i; k < N; ++k) {
auto const absA = std::abs(m_mat(k,i));
if (absA > maxA) {
maxA = absA;
imax = k;
}
}

if (maxA < std::numeric_limits<T>::min()) {
amrex::Abort("LUSolver: matrix is degenerate");
}

if (imax != i) {
std::swap(m_piv(i), m_piv(imax));
for (int j = 0; j < N; ++j) {
std::swap(m_mat(i,j), m_mat(imax,j));
}
++m_npivs;
}

for (int j = i+1; j < N; ++j) {
m_mat(j,i) /= m_mat(i,i);
for (int k = i+1; k < N; ++k) {
m_mat(j,k) -= m_mat(j,i) * m_mat(i,k);
}
}
}

for (int i = 0; i < N; ++i) {
m_mat(i,i) = T(1) / m_mat(i,i);
}
}

}

#endif
2 changes: 2 additions & 0 deletions Src/Base/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,8 @@ foreach(D IN LISTS AMReX_SPACEDIM)
Parser/amrex_iparser.tab.cpp
Parser/amrex_iparser.tab.nolint.H
Parser/amrex_iparser.tab.h
# Dense linear algebra solver using LU decomposition
AMReX_LUSolver.H
# AMReX Hydro -----------------------------------------------------
AMReX_Slopes_K.H
# Forward declaration -----------------------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions Src/Base/Make.package
Original file line number Diff line number Diff line change
Expand Up @@ -326,5 +326,8 @@ CEXE_sources += AMReX_IParser_Exe.cpp
CEXE_headers += AMReX_IParser.H
CEXE_sources += AMReX_IParser.cpp

# Dense linear algebra solver using LU decomposition
CEXE_headers += AMReX_LUSolver.H

VPATH_LOCATIONS += $(AMREX_HOME)/Src/Base/Parser
INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Base/Parser
25 changes: 17 additions & 8 deletions Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <AMReX_Config.H>

#include <AMReX_MLLinOp.H>
#include <AMReX_MLCurlCurl_K.H>

namespace amrex {

Expand All @@ -12,8 +13,16 @@ namespace amrex {
* Here E is an Array of 3 MultiFabs on staggered grid, alpha is a positive
* scalar, and beta is a non-negative scalar.
*
* It's the caller's responsibility to make sure rhs has consistent nodal
* data. If needed, one could use FabArray::OverrideSync to synchronize
* nodal data.
*
* The smoother is based on the 4-color Gauss-Seidel smoother of Li
* et. al. 2020. "An Efficient Preconditioner for 3-D Finite Difference
* Modeling of the Electromagnetic Diffusion Process in the Frequency
* Domain", IEEE Transactions on Geoscience and Remote Sensing, 58, 500-509.
*
* TODO: If beta is zero, the system could be singular.
* TODO: Try different restriction & interpolation strategies.
*/
class MLCurlCurl
: public MLLinOpT<Array<MultiFab,3> >
Expand Down Expand Up @@ -92,21 +101,20 @@ public:

// public for cuda

void smooth (int amrlev, int mglev, MF& sol, MultiFab const& rhs,
int redblack) const;
void smooth4 (int amrlev, int mglev, MF& sol, MF const& rhs, int color) const;

void compresid (int amrlev, int mglev, MF& resid, MF const& b) const;

void applyPhysBC (int amrlev, int mglev, MultiFab& mf) const;
void applyPhysBC (int amrlev, int mglev, MultiFab& mf, CurlCurlStateType type) const;

private:

void applyBC (int amrlev, int mglev, MF& in) const;
void applyBC (int amrlev, int mglev, MultiFab& mf) const;
void applyBC (int amrlev, int mglev, MF& in, CurlCurlStateType type) const;

[[nodiscard]] iMultiFab const& getDotMask (int amrlev, int mglev, int idim) const;

[[nodiscard]] int getDirichlet (int amrlev, int mglev, int idim, int face) const;
[[nodiscard]] CurlCurlDirichletInfo getDirichletInfo (int amrlev, int mglev) const;
[[nodiscard]] CurlCurlSymmetryInfo getSymmetryInfo (int amrlev, int mglev) const;

RT m_alpha = std::numeric_limits<RT>::lowest();
RT m_beta = std::numeric_limits<RT>::lowest();
Expand All @@ -118,9 +126,10 @@ private:
{IntVect(0,1), IntVect(1,0), IntVect(1,1)};
#endif

Vector<Vector<Array<std::unique_ptr<iMultiFab>,3>>> m_dirichlet_mask;
mutable Vector<Vector<Array<std::unique_ptr<iMultiFab>,3>>> m_dotmask;
static constexpr int m_ncomp = 1;
Vector<Vector<std::unique_ptr<Gpu::DeviceScalar
<LUSolver<AMREX_SPACEDIM*2,RT>>>>> m_lusolver;
};

}
Expand Down
Loading

0 comments on commit 9751217

Please sign in to comment.