diff --git a/Src/Base/AMReX_GpuMemory.H b/Src/Base/AMReX_GpuMemory.H index 1ffee387015..7f2b937fdf3 100644 --- a/Src/Base/AMReX_GpuMemory.H +++ b/Src/Base/AMReX_GpuMemory.H @@ -104,7 +104,7 @@ private: #else - DeviceScalar (T init_val) : d(init_val) {} + DeviceScalar (T const& init_val) : d(init_val) {} DeviceScalar () = default; ~DeviceScalar () = default; diff --git a/Src/Base/AMReX_LUSolver.H b/Src/Base/AMReX_LUSolver.H new file mode 100644 index 00000000000..bd69822ea5a --- /dev/null +++ b/Src/Base/AMReX_LUSolver.H @@ -0,0 +1,146 @@ +#ifndef AMREX_LU_SOLVER_H_ +#define AMREX_LU_SOLVER_H_ +#include + +#include +#include +#include +#include + +namespace amrex { + +// https://en.wikipedia.org/wiki/LU_decomposition + +template +class LUSolver +{ +public: + + LUSolver () = default; + + LUSolver (Array2D const& a_mat); + + void define (Array2D const& a_mat); + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void operator() (T* AMREX_RESTRICT x, T const* AMREX_RESTRICT b) const + { + for (int i = 0; i < N; ++i) { + x[i] = b[m_piv(i)]; + for (int k = 0; k < i; ++k) { + x[i] -= m_mat(i,k) * x[k]; + } + } + + for (int i = N-1; i >= 0; --i) { + for (int k = i+1; k < N; ++k) { + x[i] -= m_mat(i,k) * x[k]; + } + x[i] *= m_mat(i,i); + } + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE + Array2D invert () const + { + Array2D IA; + for (int j = 0; j < N; ++j) { + for (int i = 0; i < N; ++i) { + IA(i,j) = (m_piv(i) == j) ? T(1.0) : T(0.0); + for (int k = 0; k < i; ++k) { + IA(i,j) -= m_mat(i,k) * IA(k,j); + } + } + for (int i = N-1; i >= 0; --i) { + for (int k = i+1; k < N; ++k) { + IA(i,j) -= m_mat(i,k) * IA(k,j); + } + IA(i,j) *= m_mat(i,i); + } + } + return IA; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE + T determinant () const + { + T det = m_mat(0,0); + for (int i = 1; i < N; ++i) { + det *= m_mat(i,i); + } + det = T(1.0) / det; + return (m_npivs % 2 == 0) ? det : -det; + } + +private: + + void define_innard (); + + Array2D m_mat; + Array1D m_piv; + int m_npivs = 0; +}; + +template +LUSolver::LUSolver (Array2D const& a_mat) + : m_mat(a_mat) +{ + define_innard(); +} + +template +void LUSolver::define (Array2D const& a_mat) +{ + m_mat = a_mat; + define_innard(); +} + +template +void LUSolver::define_innard () +{ + static_assert(N > 1); + static_assert(std::is_floating_point_v); + + for (int i = 0; i < N; ++i) { m_piv(i) = i; } + m_npivs = 0; + + for (int i = 0; i < N; ++i) { + T maxA = 0; + int imax = i; + + for (int k = i; k < N; ++k) { + auto const absA = std::abs(m_mat(k,i)); + if (absA > maxA) { + maxA = absA; + imax = k; + } + } + + if (maxA < std::numeric_limits::min()) { + amrex::Abort("LUSolver: matrix is degenerate"); + } + + if (imax != i) { + std::swap(m_piv(i), m_piv(imax)); + for (int j = 0; j < N; ++j) { + std::swap(m_mat(i,j), m_mat(imax,j)); + } + ++m_npivs; + } + + for (int j = i+1; j < N; ++j) { + m_mat(j,i) /= m_mat(i,i); + for (int k = i+1; k < N; ++k) { + m_mat(j,k) -= m_mat(j,i) * m_mat(i,k); + } + } + } + + for (int i = 0; i < N; ++i) { + m_mat(i,i) = T(1) / m_mat(i,i); + } +} + +} + +#endif diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index 740ff03c770..cebd1f9bce1 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -270,6 +270,8 @@ foreach(D IN LISTS AMReX_SPACEDIM) Parser/amrex_iparser.tab.cpp Parser/amrex_iparser.tab.nolint.H Parser/amrex_iparser.tab.h + # Dense linear algebra solver using LU decomposition + AMReX_LUSolver.H # AMReX Hydro ----------------------------------------------------- AMReX_Slopes_K.H # Forward declaration ----------------------------------------------------- diff --git a/Src/Base/Make.package b/Src/Base/Make.package index f45256b3472..dfbfb4f03a1 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -326,5 +326,8 @@ CEXE_sources += AMReX_IParser_Exe.cpp CEXE_headers += AMReX_IParser.H CEXE_sources += AMReX_IParser.cpp +# Dense linear algebra solver using LU decomposition +CEXE_headers += AMReX_LUSolver.H + VPATH_LOCATIONS += $(AMREX_HOME)/Src/Base/Parser INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Base/Parser diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H index 38a0ff5c5c0..3e8f06c58e6 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H @@ -3,6 +3,7 @@ #include #include +#include namespace amrex { @@ -12,8 +13,16 @@ namespace amrex { * Here E is an Array of 3 MultiFabs on staggered grid, alpha is a positive * scalar, and beta is a non-negative scalar. * + * It's the caller's responsibility to make sure rhs has consistent nodal + * data. If needed, one could use FabArray::OverrideSync to synchronize + * nodal data. + * + * The smoother is based on the 4-color Gauss-Seidel smoother of Li + * et. al. 2020. "An Efficient Preconditioner for 3-D Finite Difference + * Modeling of the Electromagnetic Diffusion Process in the Frequency + * Domain", IEEE Transactions on Geoscience and Remote Sensing, 58, 500-509. + * * TODO: If beta is zero, the system could be singular. - * TODO: Try different restriction & interpolation strategies. */ class MLCurlCurl : public MLLinOpT > @@ -92,21 +101,20 @@ public: // public for cuda - void smooth (int amrlev, int mglev, MF& sol, MultiFab const& rhs, - int redblack) const; + void smooth4 (int amrlev, int mglev, MF& sol, MF const& rhs, int color) const; void compresid (int amrlev, int mglev, MF& resid, MF const& b) const; - void applyPhysBC (int amrlev, int mglev, MultiFab& mf) const; + void applyPhysBC (int amrlev, int mglev, MultiFab& mf, CurlCurlStateType type) const; private: - void applyBC (int amrlev, int mglev, MF& in) const; - void applyBC (int amrlev, int mglev, MultiFab& mf) const; + void applyBC (int amrlev, int mglev, MF& in, CurlCurlStateType type) const; [[nodiscard]] iMultiFab const& getDotMask (int amrlev, int mglev, int idim) const; - [[nodiscard]] int getDirichlet (int amrlev, int mglev, int idim, int face) const; + [[nodiscard]] CurlCurlDirichletInfo getDirichletInfo (int amrlev, int mglev) const; + [[nodiscard]] CurlCurlSymmetryInfo getSymmetryInfo (int amrlev, int mglev) const; RT m_alpha = std::numeric_limits::lowest(); RT m_beta = std::numeric_limits::lowest(); @@ -118,9 +126,10 @@ private: {IntVect(0,1), IntVect(1,0), IntVect(1,1)}; #endif - Vector,3>>> m_dirichlet_mask; mutable Vector,3>>> m_dotmask; static constexpr int m_ncomp = 1; + Vector>>>> m_lusolver; }; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp index 11096bbc594..6918dfca966 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp @@ -1,6 +1,4 @@ - #include -#include namespace amrex { @@ -24,9 +22,9 @@ void MLCurlCurl::define (const Vector& a_geom, m_dotmask[amrlev].resize(this->m_num_mg_levels[amrlev]); } - m_dirichlet_mask.resize(this->m_num_amr_levels); + m_lusolver.resize(this->m_num_amr_levels); for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { - m_dirichlet_mask[amrlev].resize(this->m_num_mg_levels[amrlev]); + m_lusolver[amrlev].resize(this->m_num_mg_levels[amrlev]); } } @@ -48,7 +46,9 @@ void MLCurlCurl::restriction (int amrlev, int cmglev, MF& crse, MF& fine) const IntVect ratio = (amrlev > 0) ? IntVect(2) : this->mg_coarsen_ratio_vec[cmglev-1]; AMREX_ALWAYS_ASSERT(ratio == 2); - applyBC(amrlev, cmglev-1, fine); + applyBC(amrlev, cmglev-1, fine, CurlCurlStateType::r); + + auto dinfo = getDirichletInfo(amrlev,cmglev-1); for (int idim = 0; idim < 3; ++idim) { bool need_parallel_copy = !amrex::isMFIterSafe(crse[idim], fine[idim]); @@ -62,10 +62,9 @@ void MLCurlCurl::restriction (int amrlev, int cmglev, MF& crse, MF& fine) const auto const& crsema = pcrse->arrays(); auto const& finema = fine[idim].const_arrays(); - auto const& dmsk = m_dirichlet_mask[amrlev][cmglev-1][idim]->const_arrays(); ParallelFor(*pcrse, [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) { - mlcurlcurl_restriction(idim,i,j,k,crsema[bno],finema[bno],dmsk[bno]); + mlcurlcurl_restriction(idim,i,j,k,crsema[bno],finema[bno],dinfo); }); Gpu::streamSynchronize(); @@ -81,6 +80,8 @@ void MLCurlCurl::interpolation (int amrlev, int fmglev, MF& fine, IntVect ratio = (amrlev > 0) ? IntVect(2) : this->mg_coarsen_ratio_vec[fmglev]; AMREX_ALWAYS_ASSERT(ratio == 2); + auto dinfo = getDirichletInfo(amrlev,fmglev); + for (int idim = 0; idim < 3; ++idim) { bool need_parallel_copy = !amrex::isMFIterSafe(crse[idim], fine[idim]); MultiFab cfine; @@ -93,10 +94,9 @@ void MLCurlCurl::interpolation (int amrlev, int fmglev, MF& fine, } auto const& finema = fine[idim].arrays(); auto const& crsema = cmf->const_arrays(); - auto const& dmsk = m_dirichlet_mask[amrlev][fmglev][idim]->const_arrays(); ParallelFor(fine[idim], [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) { - if (!dmsk[bno](i,j,k)) { + if (!dinfo.is_dirichlet_edge(idim,i,j,k)) { mlcurlcurl_interpadd(idim,i,j,k,finema[bno],crsema[bno]); } }); @@ -108,12 +108,16 @@ void MLCurlCurl::apply (int amrlev, int mglev, MF& out, MF& in, BCMode /*bc_mode*/, StateMode /*s_mode*/, const MLMGBndryT* /*bndry*/) const { - applyBC(amrlev, mglev, in); + applyBC(amrlev, mglev, in, CurlCurlStateType::x); - auto const& dxinv = this->m_geom[amrlev][mglev].InvCellSizeArray(); - auto const a = m_alpha; + auto adxinv = this->m_geom[amrlev][mglev].InvCellSizeArray(); + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + adxinv[idim] *= std::sqrt(m_alpha); + } auto const b = m_beta; + auto dinfo = getDirichletInfo(amrlev,mglev); + #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif @@ -128,32 +132,29 @@ MLCurlCurl::apply (int amrlev, int mglev, MF& out, MF& in, BCMode /*bc_mode*/, auto const& xin = in[0].array(mfi); auto const& yin = in[1].array(mfi); auto const& zin = in[2].array(mfi); - auto const& mx = m_dirichlet_mask[amrlev][mglev][0]->const_array(mfi); - auto const& my = m_dirichlet_mask[amrlev][mglev][1]->const_array(mfi); - auto const& mz = m_dirichlet_mask[amrlev][mglev][2]->const_array(mfi); amrex::ParallelFor(xbx, ybx, zbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - if (mx(i,j,k)) { + if (dinfo.is_dirichlet_x_edge(i,j,k)) { xout(i,j,k) = Real(0.0); } else { - mlcurlcurl_adotx_x(i,j,k,xout,xin,yin,zin,a,b,dxinv); + mlcurlcurl_adotx_x(i,j,k,xout,xin,yin,zin,b,adxinv); } }, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - if (my(i,j,k)) { + if (dinfo.is_dirichlet_y_edge(i,j,k)) { yout(i,j,k) = Real(0.0); } else { - mlcurlcurl_adotx_y(i,j,k,yout,xin,yin,zin,a,b,dxinv); + mlcurlcurl_adotx_y(i,j,k,yout,xin,yin,zin,b,adxinv); } }, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - if (mz(i,j,k)) { + if (dinfo.is_dirichlet_z_edge(i,j,k)) { zout(i,j,k) = Real(0.0); } else { - mlcurlcurl_adotx_z(i,j,k,zout,xin,yin,zin,a,b,dxinv); + mlcurlcurl_adotx_z(i,j,k,zout,xin,yin,zin,b,adxinv); } }); } @@ -162,84 +163,54 @@ MLCurlCurl::apply (int amrlev, int mglev, MF& out, MF& in, BCMode /*bc_mode*/, void MLCurlCurl::smooth (int amrlev, int mglev, MF& sol, const MF& rhs, bool skip_fillboundary) const { - if (!skip_fillboundary) { - applyBC(amrlev, mglev, sol); - } - - smooth(amrlev, mglev, sol, rhs[0], 0); // Ex red - applyBC(amrlev, mglev, sol[0]); + AMREX_ASSERT(rhs[0].nGrowVect().allGE(IntVect(1))); - smooth(amrlev, mglev, sol, rhs[1], 0); // Ey red - applyBC(amrlev, mglev, sol[1]); + applyBC(amrlev, mglev, const_cast(rhs), CurlCurlStateType::b); - smooth(amrlev, mglev, sol, rhs[2], 0); // Ez red - applyBC(amrlev, mglev, sol[2]); + for (int color = 0; color < 4; ++color) { + if (!skip_fillboundary) { + applyBC(amrlev, mglev, sol, CurlCurlStateType::x); + } + skip_fillboundary = false; + smooth4(amrlev, mglev, sol, rhs, color); + } +} - smooth(amrlev, mglev, sol, rhs[0], 1); // Ex black - applyBC(amrlev, mglev, sol[0]); +void MLCurlCurl::smooth4 (int amrlev, int mglev, MF& sol, MF const& rhs, + int color) const +{ + auto const& ex = sol[0].arrays(); + auto const& ey = sol[1].arrays(); + auto const& ez = sol[2].arrays(); + auto const& rhsx = rhs[0].const_arrays(); + auto const& rhsy = rhs[1].const_arrays(); + auto const& rhsz = rhs[2].const_arrays(); - smooth(amrlev, mglev, sol, rhs[1], 1); // Ey black -#if (AMREX_SPACEDIM == 3) - applyBC(amrlev, mglev, sol[1]); +#if (AMREX_SPACEDIM == 2) + auto b = m_beta; #endif - smooth(amrlev, mglev, sol, rhs[2], 1); // Ez black - - for (int idim = 0; idim < 3; ++idim) { - amrex::OverrideSync(sol[idim], getDotMask(amrlev,mglev,idim), - this->m_geom[amrlev][mglev].periodicity()); + auto adxinv = this->m_geom[amrlev][mglev].InvCellSizeArray(); + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + adxinv[idim] *= std::sqrt(m_alpha); } -} -void MLCurlCurl::smooth (int amrlev, int mglev, MF& sol, MultiFab const& rhs, - int redblack) const -{ - auto const& dxinv = this->m_geom[amrlev][mglev].InvCellSizeArray(); - auto const a = m_alpha; - auto const b = m_beta; + auto* plusolver = m_lusolver[amrlev][mglev]->dataPtr(); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(rhs,TilingIfNotGPU()); mfi.isValid(); ++mfi) + auto dinfo = getDirichletInfo(amrlev,mglev); + auto sinfo = getSymmetryInfo(amrlev,mglev); + + MultiFab nmf(amrex::convert(rhs[0].boxArray(),IntVect(1)), + rhs[0].DistributionMap(), 1, 0, MFInfo().SetAlloc(false)); + ParallelFor(nmf, [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) { - Box const& bx = mfi.tilebox(); - auto const& rh = rhs.const_array(mfi); - if (rhs.ixType() == sol[0].ixType()) { - auto const& ex = sol[0].array(mfi); - auto const& ey = sol[1].const_array(mfi); - auto const& ez = sol[2].const_array(mfi); - auto const& mx = m_dirichlet_mask[amrlev][mglev][0]->const_array(mfi); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - if (!mx(i,j,k)) { - mlcurlcurl_gsrb_x(i,j,k,ex,ey,ez,rh,a,b,dxinv,redblack); - } - }); - } else if (rhs.ixType() == sol[1].ixType()) { - auto const& ex = sol[0].const_array(mfi); - auto const& ey = sol[1].array(mfi); - auto const& ez = sol[2].const_array(mfi); - auto const& my = m_dirichlet_mask[amrlev][mglev][1]->const_array(mfi); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - if (!my(i,j,k)) { - mlcurlcurl_gsrb_y(i,j,k,ex,ey,ez,rh,a,b,dxinv,redblack); - } - }); - } else { - auto const& ex = sol[0].const_array(mfi); - auto const& ey = sol[1].const_array(mfi); - auto const& ez = sol[2].array(mfi); - auto const& mz = m_dirichlet_mask[amrlev][mglev][2]->const_array(mfi); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - if (!mz(i,j,k)) { - mlcurlcurl_gsrb_z(i,j,k,ex,ey,ez,rh,a,b,dxinv,redblack); - } - }); - } - } + mlcurlcurl_gs4(i,j,k,ex[bno],ey[bno],ez[bno],rhsx[bno],rhsy[bno],rhsz[bno], +#if (AMREX_SPACEDIM == 2) + b, +#endif + adxinv,color,*plusolver,dinfo,sinfo); + }); + Gpu::streamSynchronize(); } void MLCurlCurl::solutionResidual (int amrlev, MF& resid, MF& x, const MF& b, @@ -262,6 +233,8 @@ void MLCurlCurl::correctionResidual (int amrlev, int mglev, MF& resid, MF& x, void MLCurlCurl::compresid (int amrlev, int mglev, MF& resid, MF const& b) const { + auto dinfo = getDirichletInfo(amrlev,mglev); + #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif @@ -276,13 +249,10 @@ void MLCurlCurl::compresid (int amrlev, int mglev, MF& resid, MF const& b) const auto const& bx = b[0].array(mfi); auto const& by = b[1].array(mfi); auto const& bz = b[2].array(mfi); - auto const& mx = m_dirichlet_mask[amrlev][mglev][0]->const_array(mfi); - auto const& my = m_dirichlet_mask[amrlev][mglev][1]->const_array(mfi); - auto const& mz = m_dirichlet_mask[amrlev][mglev][2]->const_array(mfi); amrex::ParallelFor(xbx, ybx, zbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - if (mx(i,j,k)) { + if (dinfo.is_dirichlet_x_edge(i,j,k)) { resx(i,j,k) = Real(0.0); } else { resx(i,j,k) = bx(i,j,k) - resx(i,j,k); @@ -290,7 +260,7 @@ void MLCurlCurl::compresid (int amrlev, int mglev, MF& resid, MF const& b) const }, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - if (my(i,j,k)) { + if (dinfo.is_dirichlet_y_edge(i,j,k)) { resy(i,j,k) = Real(0.0); } else { resy(i,j,k) = by(i,j,k) - resy(i,j,k); @@ -298,7 +268,7 @@ void MLCurlCurl::compresid (int amrlev, int mglev, MF& resid, MF const& b) const }, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - if (mz(i,j,k)) { + if (dinfo.is_dirichlet_z_edge(i,j,k)) { resz(i,j,k) = Real(0.0); } else { resz(i,j,k) = bz(i,j,k) - resz(i,j,k); @@ -309,50 +279,85 @@ void MLCurlCurl::compresid (int amrlev, int mglev, MF& resid, MF const& b) const void MLCurlCurl::prepareForSolve () { - for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) { - for (int idim = 0; idim < 3; ++idim) { - m_dirichlet_mask[amrlev][mglev][idim] = std::make_unique - (amrex::convert(this->m_grids[amrlev][mglev], m_etype[idim]), - this->m_dmap[amrlev][mglev], 1, 0); - } - - int const dirichlet_xlo = getDirichlet(amrlev, mglev, 0, 0); - int const dirichlet_xhi = getDirichlet(amrlev, mglev, 0, 1); - int const dirichlet_ylo = getDirichlet(amrlev, mglev, 1, 0); - int const dirichlet_yhi = getDirichlet(amrlev, mglev, 1, 1); - int const dirichlet_zlo = getDirichlet(amrlev, mglev, 2, 0); - int const dirichlet_zhi = getDirichlet(amrlev, mglev, 2, 1); - -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) + auto const& dxinv = this->m_geom[amrlev][mglev].InvCellSizeArray(); + Real dxx = dxinv[0]*dxinv[0]; + Real dyy = dxinv[1]*dxinv[1]; + Real dxy = dxinv[0]*dxinv[1]; +#if (AMREX_SPACEDIM == 2) + Array2D A + {m_alpha*dyy*Real(2.0) + m_beta, + Real(0.0), + -m_alpha*dxy, + m_alpha*dxy, + // + Real(0.0), + m_alpha*dyy*Real(2.0) + m_beta, + m_alpha*dxy, + -m_alpha*dxy, + // + -m_alpha*dxy, + m_alpha*dxy, + m_alpha*dxx*Real(2.0) + m_beta, + Real(0.0), + // + m_alpha*dxy, + -m_alpha*dxy, + Real(0.0), + m_alpha*dxx*Real(2.0) + m_beta}; +#else + Real dzz = dxinv[2]*dxinv[2]; + Real dxz = dxinv[0]*dxinv[2]; + Real dyz = dxinv[1]*dxinv[2]; + + Array2D A + {m_alpha*(dyy+dzz)*Real(2.0) + m_beta, + Real(0.0), + -m_alpha*dxy, + m_alpha*dxy, + -m_alpha*dxz, + m_alpha*dxz, + // + Real(0.0), + m_alpha*(dyy+dzz)*Real(2.0) + m_beta, + m_alpha*dxy, + -m_alpha*dxy, + m_alpha*dxz, + -m_alpha*dxz, + // + -m_alpha*dxy, + m_alpha*dxy, + m_alpha*(dxx+dzz)*Real(2.0) + m_beta, + Real(0.0), + -m_alpha*dyz, + m_alpha*dyz, + // + m_alpha*dxy, + -m_alpha*dxy, + Real(0.0), + m_alpha*(dxx+dzz)*Real(2.0) + m_beta, + m_alpha*dyz, + -m_alpha*dyz, + // + -m_alpha*dxz, + m_alpha*dxz, + -m_alpha*dyz, + m_alpha*dyz, + m_alpha*(dxx+dyy)*Real(2.0) + m_beta, + Real(0.0), + // + m_alpha*dxz, + -m_alpha*dxz, + m_alpha*dyz, + -m_alpha*dyz, + Real(0.0), + m_alpha*(dxx+dyy)*Real(2.0) + m_beta}; #endif - for (MFIter mfi(*m_dirichlet_mask[amrlev][mglev][0], - TilingIfNotGPU()); mfi.isValid(); ++mfi) - { - Box const& xbx = mfi.tilebox(m_etype[0]); - Box const& ybx = mfi.tilebox(m_etype[1]); - Box const& zbx = mfi.tilebox(m_etype[2]); - auto const& mx = m_dirichlet_mask[amrlev][mglev][0]->array(mfi); - auto const& my = m_dirichlet_mask[amrlev][mglev][1]->array(mfi); - auto const& mz = m_dirichlet_mask[amrlev][mglev][2]->array(mfi); - amrex::ParallelFor(xbx, ybx, zbx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - mx(i,j,k) = int(j == dirichlet_ylo || j == dirichlet_yhi || - k == dirichlet_zlo || k == dirichlet_zhi); - }, - [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - my(i,j,k) = int(i == dirichlet_xlo || i == dirichlet_xhi || - k == dirichlet_zlo || k == dirichlet_zhi); - }, - [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - mz(i,j,k) = (i == dirichlet_xlo || i == dirichlet_xhi || - j == dirichlet_ylo || j == dirichlet_yhi); - }); - } + + m_lusolver[amrlev][mglev] + = std::make_unique>>(A); } } } @@ -446,21 +451,24 @@ MLCurlCurl::makeCoarseAmr (int famrlev, IntVect const& ng) const return r; } -void MLCurlCurl::applyBC (int amrlev, int mglev, MF& in) const +void MLCurlCurl::applyBC (int amrlev, int mglev, MF& in, CurlCurlStateType type) const { - Vector mfs{in.data(),&(in[1]),&(in[2])}; + int nmfs = 3; +#if (AMREX_SPACEDIM == 2) + if (CurlCurlStateType::b == type) { + nmfs = 2; // no need to applyBC on Ez + } +#endif + Vector mfs(nmfs); + for (int imf = 0; imf < nmfs; ++imf) { + mfs[imf] = in.data() + imf; + } FillBoundary(mfs, this->m_geom[amrlev][mglev].periodicity()); - for (auto& mf : in) { - applyPhysBC(amrlev, mglev, mf); + for (auto* mf : mfs) { + applyPhysBC(amrlev, mglev, *mf, type); } } -void MLCurlCurl::applyBC (int amrlev, int mglev, MultiFab& mf) const -{ - mf.FillBoundary(this->m_geom[amrlev][mglev].periodicity()); - applyPhysBC(amrlev, mglev, mf); -} - #ifdef AMREX_USE_GPU struct MLCurlCurlBCTag { Array4 fab; @@ -470,12 +478,25 @@ struct MLCurlCurlBCTag { [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const& box() const noexcept { return bx; } }; + +struct MLCurlCurlEdgeBCTag { + Array4 fab; + Box bx; + Dim3 offset; + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + Box const& box() const noexcept { return bx; } +}; #endif -void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf) const +void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf, CurlCurlStateType type) const { + if (CurlCurlStateType::b == type) { return; } + auto const idxtype = mf.ixType(); Box const domain = amrex::convert(this->m_geom[amrlev][mglev].Domain(), idxtype); + Box const gdomain = amrex::convert + (this->m_geom[amrlev][mglev].growPeriodicDomain(1), idxtype); MFItInfo mfi_info{}; @@ -496,10 +517,24 @@ void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf) const bool is_symmetric = face.isLow() ? m_lobc[0][idim] == LinOpBCType::symmetry : m_hibc[0][idim] == LinOpBCType::symmetry; - if (domain[face] == vbx[face] && is_symmetric) { + if (domain[face] == vbx[face] && is_symmetric && + ((type == CurlCurlStateType::x) || + (type == CurlCurlStateType::r && idxtype.nodeCentered(idim)))) // transverse direction only + { Box b = vbx; - int shift = face.isLow() ? -1 : 1; - b.setRange(idim, domain[face] + shift, 1); + for (int jdim = 0; jdim < AMREX_SPACEDIM; ++jdim) { + if (jdim == idim) { + int shift = face.isLow() ? -1 : 1; + b.setRange(jdim, domain[face] + shift, 1); + } else { + if (b.smallEnd(jdim) > gdomain.smallEnd(jdim)) { + b.growLo(jdim); + } + if (b.bigEnd(jdim) < gdomain.bigEnd(jdim)) { + b.growHi(jdim); + } + } + } #ifdef AMREX_USE_GPU tags.emplace_back(MLCurlCurlBCTag{a,b,face}); #else @@ -519,6 +554,72 @@ void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf) const mlcurlcurl_bc_symmetry(i, j, k, tag.face, idxtype, tag.fab); }); #endif + + if (CurlCurlStateType::r == type) { // fix domain edges + auto sinfo = getSymmetryInfo(amrlev,mglev); + +#ifdef AMREX_USE_GPU + Vector tags2; +#endif + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(mf,mfi_info); mfi.isValid(); ++mfi) { + auto const& vbx = mfi.validbox(); + auto const& a = mf.array(mfi); + for (int idim = 0; idim < AMREX_SPACEDIM-1; ++idim) { + for (int jdim = idim+1; jdim < AMREX_SPACEDIM; ++jdim) { + if (idxtype.nodeCentered(idim) && + idxtype.nodeCentered(jdim)) + { + for (int iside = 0; iside < 2; ++iside) { + int ii = (iside == 0) ? vbx.smallEnd(idim) : vbx.bigEnd(idim); + for (int jside = 0; jside < 2; ++jside) { + int jj = (jside == 0) ? vbx.smallEnd(jdim) : vbx.bigEnd(jdim); + if (sinfo.is_symmetric(idim,iside,ii) && + sinfo.is_symmetric(jdim,jside,jj)) + { + IntVect oiv(0); + oiv[idim] = (iside == 0) ? 2 : -2; + oiv[jdim] = (jside == 0) ? 2 : -2; + Dim3 offset = oiv.dim3(); + + Box b = vbx; + if (iside == 0) { + b.setRange(idim,vbx.smallEnd(idim)-1); + } else { + b.setRange(idim,vbx.bigEnd(idim)+1); + } + if (jside == 0) { + b.setRange(jdim,vbx.smallEnd(jdim)-1); + } else { + b.setRange(jdim,vbx.bigEnd(jdim)+1); + } +#ifdef AMREX_USE_GPU + tags2.emplace_back(MLCurlCurlEdgeBCTag{a,b,offset}); +#else + amrex::LoopOnCpu(b, [&] (int i, int j, int k) + { + a(i,j,k) = a(i+offset.x,j+offset.y,k+offset.z); + }); +#endif + } + } + } + } + } + } + } + +#ifdef AMREX_USE_GPU + ParallelFor(tags2, + [=] AMREX_GPU_DEVICE (int i, int j, int k, MLCurlCurlEdgeBCTag const& tag) + { + tag.fab(i,j,k) = tag.fab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z); + }); +#endif + } } iMultiFab const& MLCurlCurl::getDotMask (int amrlev, int mglev, int idim) const @@ -532,27 +633,72 @@ iMultiFab const& MLCurlCurl::getDotMask (int amrlev, int mglev, int idim) const return *m_dotmask[amrlev][mglev][idim]; } -int MLCurlCurl::getDirichlet (int amrlev, int mglev, int idim, int face) const +CurlCurlDirichletInfo MLCurlCurl::getDirichletInfo (int amrlev, int mglev) const { + + auto helper = [&] (int idim, int face) -> int + { #if (AMREX_SPACEDIM == 2) - if (idim == 2) { - return std::numeric_limits::lowest(); - } + if (idim == 2) { + return std::numeric_limits::lowest(); + } #endif - if (face == 0) { - if (m_lobc[0][idim] == LinOpBCType::Dirichlet) { - return m_geom[amrlev][mglev].Domain().smallEnd(idim); + if (face == 0) { + if (m_lobc[0][idim] == LinOpBCType::Dirichlet) { + return m_geom[amrlev][mglev].Domain().smallEnd(idim); + } else { + return std::numeric_limits::lowest(); + } } else { + if (m_hibc[0][idim] == LinOpBCType::Dirichlet) { + return m_geom[amrlev][mglev].Domain().bigEnd(idim) + 1; + } else { + return std::numeric_limits::max(); + } + } + }; + + return CurlCurlDirichletInfo{IntVect(AMREX_D_DECL(helper(0,0), + helper(1,0), + helper(2,0))), + IntVect(AMREX_D_DECL(helper(0,1), + helper(1,1), + helper(2,1)))}; +} + +CurlCurlSymmetryInfo MLCurlCurl::getSymmetryInfo (int amrlev, int mglev) const +{ + + auto helper = [&] (int idim, int face) -> int + { +#if (AMREX_SPACEDIM == 2) + if (idim == 2) { return std::numeric_limits::lowest(); } - } else { - if (m_hibc[0][idim] == LinOpBCType::Dirichlet) { - return m_geom[amrlev][mglev].Domain().bigEnd(idim) + 1; +#endif + + if (face == 0) { + if (m_lobc[0][idim] == LinOpBCType::symmetry) { + return m_geom[amrlev][mglev].Domain().smallEnd(idim); + } else { + return std::numeric_limits::lowest(); + } } else { - return std::numeric_limits::max(); + if (m_hibc[0][idim] == LinOpBCType::symmetry) { + return m_geom[amrlev][mglev].Domain().bigEnd(idim) + 1; + } else { + return std::numeric_limits::max(); + } } - } + }; + + return CurlCurlSymmetryInfo{IntVect(AMREX_D_DECL(helper(0,0), + helper(1,0), + helper(2,0))), + IntVect(AMREX_D_DECL(helper(0,1), + helper(1,1), + helper(2,1)))}; } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H index 9c69336f81a..0816d141183 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H @@ -3,6 +3,7 @@ #include #include +#include namespace amrex { @@ -15,18 +16,309 @@ namespace amrex { * (curl E)_z : (0,0,1) */ +/* + Notes for gs4: + + Interior nodes: + + For v = [Ex(i-1,j,k), Ex(i,j,k), Ey(i,j-1,k), Ey(i,j,k), Ez(i,j,k-1), Ez(i,j,k)]^T, + we have A*v = b, where + + A00 = alpha*(dyy+dzz)*2 + beta + A01 = 0 + A02 = -alpha*dxy + A03 = alpha*dxy + A04 = -alpha*dxz + A05 = alpha*dxz + + A10 = 0 + A11 = alpha*(dyy+dzz)*2 + beta + A12 = alpha*dxy + A13 = -alpha*dxy + A14 = alpha*dxz + A15 = -alpha*dxz + + A20 = -alpha*dxy + A21 = alpha*dxy + A22 = alpha*(dxx+dzz)*2 + beta + A23 = 0 + A24 = -alpha*dyz + A25 = alpha*dyz + + A30 = alpha*dxy + A31 = -alpha*dxy + A32 = 0 + A33 = alpha*(dxx+dzz)*2 + beta + A34 = alpha*dyz + A35 = -alpha*dyz + + A40 = -alpha*dxz + A41 = alpha*dxz + A42 = -alpha*dyz + A43 = alpha*dyz + A44 = alpha*(dxx+dyy)*2 + beta + A45 = 0 + + A50 = alpha*dxz + A51 = -alpha*dxz + A52 = alpha*dyz + A53 = -alpha*dyz + A54 = 0 + A55 = alpha*(dxx+dyy)*2 + beta + + b0 = rhsx(i-1,j,k) - (alpha*ccex), where + ccex = - dyy * (ex(i-1,j-1,k ) + + ex(i-1,j+1,k )) + - dzz * (ex(i-1,j ,k+1) + + ex(i-1,j ,k-1)) + + dxy * (ey(i-1,j-1,k ) + - ey(i-1,j ,k )) + + dxz * (ez(i-1,j ,k-1) + - ez(i-1,j ,k )) + b1 = rhsx(i,j,k) - (alpha*ccex), where + ccex = - dyy * ( ex(i ,j-1,k ) + + ex(i ,j+1,k )) + - dzz * ( ex(i ,j ,k+1) + + ex(i ,j ,k-1)) + + dxy * (-ey(i+1,j-1,k ) + + ey(i+1,j ,k )) + + dxz * (-ez(i+1,j ,k-1) + + ez(i+1,j ,k )); + b2 = rhsy(i,j-1,k) - alpha*ccey, where + ccey = - dxx * (ey(i-1,j-1,k ) + + ey(i+1,j-1,k )) + - dzz * (ey(i ,j-1,k-1) + + ey(i ,j-1,k+1)) + + dxy * (ex(i-1,j-1,k ) + - ex(i ,j-1,k )) + + dyz * (ez(i ,j-1,k-1) + - ez(i ,j-1,k )) + b3 = rhsy(i,j,k) - alpha*ccey, where + ccey = - dxx * ( ey(i-1,j ,k ) + + ey(i+1,j ,k )) + - dzz * ( ey(i ,j ,k-1) + + ey(i ,j ,k+1)) + + dxy * (-ex(i-1,j+1,k ) + + ex(i ,j+1,k )) + + dyz * (-ez(i ,j+1,k-1) + + ez(i ,j+1,k )); + b4 = rhsz(i,j,k-1) - alpha*ccez, where + ccez = - dxx * (ez(i-1,j ,k-1) + + ez(i+1,j ,k-1)) + - dyy * (ez(i ,j-1,k-1) + + ez(i ,j+1,k-1)) + + dxz * (ex(i-1,j ,k-1) + - ex(i ,j ,k-1)) + + dyz * (ey(i ,j-1,k-1) + - ey(i ,j ,k-1)) + b5 = rhsz(i,j,k) - alpha*ccez, where + ccez = - dxx * ( ez(i-1,j ,k ) + + ez(i+1,j ,k )) + - dyy * ( ez(i ,j-1,k ) + + ez(i ,j+1,k )) + + dxz * (-ex(i-1,j ,k+1) + + ex(i ,j ,k+1)) + + dyz * (-ey(i ,j-1,k+1) + + ey(i ,j ,k+1)); + + dxx = 1/(dx*dx) + dyy = 1/(dy*dy) + dzz = 1/(dz*dz) + dxy = 1/(dx*dy) + dxz = 1/(dx*dz) + dyz = 1/(dy*dz) + + For Dirichlet boundary nodes, we don't do anything. + + For symmetric boundary nodes, we treat it as interior nodes because the + rhs outside the domain has been filled properly. + + In 2D, + + For v = [Ex(i-1,j,k), Ex(i,j,k), Ey(i,j-1,k), Ey(i,j,k)]^T, + we have A*v = b, where + + A00 = alpha*dyy*2 + beta + A01 = 0 + A02 = -alpha*dxy + A03 = alpha*dxy + + A10 = 0 + A11 = alpha*dyy*2 + beta + A12 = alpha*dxy + A13 = -alpha*dxy + + A20 = -alpha*dxy + A21 = alpha*dxy + A22 = alpha*dxx*2 + beta + A23 = 0 + + A30 = alpha*dxy + A31 = -alpha*dxy + A32 = 0 + A33 = alpha*dxx*2 + beta + + b0 = rhsx(i-1,j,k) - (alpha*ccex), where + ccex = - dyy * (ex(i-1,j-1,k ) + + ex(i-1,j+1,k )) + + dxy * (ey(i-1,j-1,k ) + - ey(i-1,j ,k )) + b1 = rhsx(i,j,k) - (alpha*ccex), where + ccex = - dyy * ( ex(i ,j-1,k ) + + ex(i ,j+1,k )) + + dxy * (-ey(i+1,j-1,k ) + + ey(i+1,j ,k )) + b2 = rhsy(i,j-1,k) - alpha*ccey, where + ccey = - dxx * (ey(i-1,j-1,k ) + + ey(i+1,j-1,k )) + + dxy * (ex(i-1,j-1,k ) + - ex(i ,j-1,k )) + b3 = rhsy(i,j,k) - alpha*ccey, where + ccey = - dxx * ( ey(i-1,j ,k ) + + ey(i+1,j ,k )) + + dxy * (-ex(i-1,j+1,k ) + + ex(i ,j+1,k )) +*/ + +struct CurlCurlDirichletInfo +{ + IntVect dirichlet_lo; + IntVect dirichlet_hi; + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool is_dirichlet_node (int i, int j, int k) const + { +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(k); + return (i == dirichlet_lo[0]) || (i == dirichlet_hi[0]) + || (j == dirichlet_lo[1]) || (j == dirichlet_hi[1]); +#else + return (i == dirichlet_lo[0]) || (i == dirichlet_hi[0]) + || (j == dirichlet_lo[1]) || (j == dirichlet_hi[1]) + || (k == dirichlet_lo[2]) || (k == dirichlet_hi[2]); +#endif + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool is_dirichlet_x_edge (int, int j, int k) const + { +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(k); + return (j == dirichlet_lo[1]) || (j == dirichlet_hi[1]); +#else + return (j == dirichlet_lo[1]) || (j == dirichlet_hi[1]) + || (k == dirichlet_lo[2]) || (k == dirichlet_hi[2]); +#endif + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool is_dirichlet_y_edge (int i, int, int k) const + { +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(k); + return (i == dirichlet_lo[0]) || (i == dirichlet_hi[0]); +#else + return (i == dirichlet_lo[0]) || (i == dirichlet_hi[0]) + || (k == dirichlet_lo[2]) || (k == dirichlet_hi[2]); +#endif + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool is_dirichlet_z_edge (int i, int j, int) const + { + return (i == dirichlet_lo[0]) || (i == dirichlet_hi[0]) + || (j == dirichlet_lo[1]) || (j == dirichlet_hi[1]); + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool is_dirichlet_edge (int dim, int i, int j, int k) const + { + if (dim == 0) { + return is_dirichlet_x_edge(i,j,k); + } else if (dim == 1) { + return is_dirichlet_y_edge(i,j,k); + } else { + return is_dirichlet_z_edge(i,j,k); + } + } +}; + +struct CurlCurlSymmetryInfo +{ + IntVect symmetry_lo; + IntVect symmetry_hi; + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool xlo_is_symmetric (int i) const + { + return i == symmetry_lo[0]; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool xhi_is_symmetric (int i) const + { + return i == symmetry_hi[0]; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool ylo_is_symmetric (int j) const + { + return j == symmetry_lo[1]; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool yhi_is_symmetric (int j) const + { + return j == symmetry_hi[1]; + } + +#if (AMREX_SPACEDIM == 3) + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool zlo_is_symmetric (int k) const + { + return k == symmetry_lo[2]; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool zhi_is_symmetric (int k) const + { + return k == symmetry_hi[2]; + } +#endif + + [[nodiscard]] bool is_symmetric (int dir, int side, int idx) const + { +#if (AMREX_SPACEDIM == 2) + if (dir == 0) { + return (side == 0) ? xlo_is_symmetric(idx) : xhi_is_symmetric(idx); + } else { + return (side == 0) ? ylo_is_symmetric(idx) : yhi_is_symmetric(idx); + } +#else + if (dir == 0) { + return (side == 0) ? xlo_is_symmetric(idx) : xhi_is_symmetric(idx); + } else if (dir == 1) { + return (side == 0) ? ylo_is_symmetric(idx) : yhi_is_symmetric(idx); + } else { + return (side == 0) ? zlo_is_symmetric(idx) : zhi_is_symmetric(idx); + } +#endif + } +}; + +enum struct CurlCurlStateType { x, b, r }; // x, b & r=b-Ax + AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlcurlcurl_adotx_x (int i, int j, int k, Array4 const& Ax, Array4 const& ex, Array4 const& ey, Array4 const& ez, - Real alpha, Real beta, - GpuArray const& dxinv) + Real beta, GpuArray const& adxinv) { #if (AMREX_SPACEDIM == 2) amrex::ignore_unused(ez); - Real dyy = dxinv[1] * dxinv[1]; - Real dxy = dxinv[0] * dxinv[1]; + Real dyy = adxinv[1] * adxinv[1]; + Real dxy = adxinv[0] * adxinv[1]; Real ccex = ex(i ,j ,k ) * dyy * Real(2.0) - dyy * (ex(i ,j-1,k ) + ex(i ,j+1,k )) @@ -35,10 +327,10 @@ void mlcurlcurl_adotx_x (int i, int j, int k, Array4 const& Ax, - ey(i+1,j-1,k ) + ey(i+1,j ,k )); #else - Real dyy = dxinv[1] * dxinv[1]; - Real dzz = dxinv[2] * dxinv[2]; - Real dxy = dxinv[0] * dxinv[1]; - Real dxz = dxinv[0] * dxinv[2]; + Real dyy = adxinv[1] * adxinv[1]; + Real dzz = adxinv[2] * adxinv[2]; + Real dxy = adxinv[0] * adxinv[1]; + Real dxz = adxinv[0] * adxinv[2]; Real ccex = ex(i ,j ,k ) * (dyy+dzz)*Real(2.0) - dyy * (ex(i ,j-1,k ) + ex(i ,j+1,k )) @@ -53,7 +345,7 @@ void mlcurlcurl_adotx_x (int i, int j, int k, Array4 const& Ax, - ez(i+1,j ,k-1) + ez(i+1,j ,k )); #endif - Ax(i,j,k) = alpha * ccex + beta * ex(i,j,k); + Ax(i,j,k) = ccex + beta * ex(i,j,k); } AMREX_GPU_DEVICE AMREX_FORCE_INLINE @@ -61,13 +353,12 @@ void mlcurlcurl_adotx_y (int i, int j, int k, Array4 const& Ay, Array4 const& ex, Array4 const& ey, Array4 const& ez, - Real alpha, Real beta, - GpuArray const& dxinv) + Real beta, GpuArray const& adxinv) { #if (AMREX_SPACEDIM == 2) amrex::ignore_unused(ez); - Real dxx = dxinv[0] * dxinv[0]; - Real dxy = dxinv[0] * dxinv[1]; + Real dxx = adxinv[0] * adxinv[0]; + Real dxy = adxinv[0] * adxinv[1]; Real ccey = ey(i ,j ,k ) * dxx * Real(2.0) - dxx * (ey(i-1,j ,k ) + ey(i+1,j ,k )) @@ -76,10 +367,10 @@ void mlcurlcurl_adotx_y (int i, int j, int k, Array4 const& Ay, - ex(i-1,j+1,k ) + ex(i ,j+1,k )); #else - Real dxx = dxinv[0] * dxinv[0]; - Real dzz = dxinv[2] * dxinv[2]; - Real dxy = dxinv[0] * dxinv[1]; - Real dyz = dxinv[1] * dxinv[2]; + Real dxx = adxinv[0] * adxinv[0]; + Real dzz = adxinv[2] * adxinv[2]; + Real dxy = adxinv[0] * adxinv[1]; + Real dyz = adxinv[1] * adxinv[2]; Real ccey = ey(i ,j ,k ) * (dxx+dzz)*Real(2.0) - dxx * (ey(i-1,j ,k ) + ey(i+1,j ,k )) @@ -94,7 +385,7 @@ void mlcurlcurl_adotx_y (int i, int j, int k, Array4 const& Ay, - ez(i ,j+1,k-1) + ez(i ,j+1,k )); #endif - Ay(i,j,k) = alpha * ccey + beta * ey(i,j,k); + Ay(i,j,k) = ccey + beta * ey(i,j,k); } AMREX_GPU_DEVICE AMREX_FORCE_INLINE @@ -102,23 +393,22 @@ void mlcurlcurl_adotx_z (int i, int j, int k, Array4 const& Az, Array4 const& ex, Array4 const& ey, Array4 const& ez, - Real alpha, Real beta, - GpuArray const& dxinv) + Real beta, GpuArray const& adxinv) { #if (AMREX_SPACEDIM == 2) amrex::ignore_unused(ex,ey); - Real dxx = dxinv[0] * dxinv[0]; - Real dyy = dxinv[1] * dxinv[1]; + Real dxx = adxinv[0] * adxinv[0]; + Real dyy = adxinv[1] * adxinv[1]; Real ccez = ez(i ,j ,k ) * (dxx+dyy)*Real(2.0) - dxx * (ez(i-1,j ,k ) + ez(i+1,j ,k )) - dyy * (ez(i ,j-1,k ) + ez(i ,j+1,k )); #else - Real dxx = dxinv[0] * dxinv[0]; - Real dyy = dxinv[1] * dxinv[1]; - Real dxz = dxinv[0] * dxinv[2]; - Real dyz = dxinv[1] * dxinv[2]; + Real dxx = adxinv[0] * adxinv[0]; + Real dyy = adxinv[1] * adxinv[1]; + Real dxz = adxinv[0] * adxinv[2]; + Real dyz = adxinv[1] * adxinv[2]; Real ccez = ez(i ,j ,k ) * (dxx+dyy)*Real(2.0) - dxx * (ez(i-1,j ,k ) + ez(i+1,j ,k )) @@ -133,152 +423,179 @@ void mlcurlcurl_adotx_z (int i, int j, int k, Array4 const& Az, - ey(i ,j-1,k+1) + ey(i ,j ,k+1)); #endif - Az(i,j,k) = alpha * ccez + beta * ez(i,j,k); + Az(i,j,k) = ccez + beta * ez(i,j,k); } AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void mlcurlcurl_gsrb_x (int i, int j, int k, - Array4 const& ex, - Array4 const& ey, - Array4 const& ez, - Array4 const& rhs, - Real alpha, Real beta, - GpuArray const& dxinv, int redblack) +void mlcurlcurl_gs4 (int i, int j, int k, + Array4 const& ex, + Array4 const& ey, + Array4 const& ez, + Array4 const& rhsx, + Array4 const& rhsy, + Array4 const& rhsz, +#if (AMREX_SPACEDIM == 2) + Real beta, +#endif + GpuArray const& adxinv, + int color, LUSolver const& lusolver, + CurlCurlDirichletInfo const& dinfo, + CurlCurlSymmetryInfo const& sinfo) { - constexpr Real omega = Real(1.15); + if (dinfo.is_dirichlet_node(i,j,k)) { return; } - if ((i+j+k+redblack) % 2 != 0) { return; } + int my_color = i%2 + 2*(j%2); + if (k%2 != 0) { + my_color = 3 - my_color; + } #if (AMREX_SPACEDIM == 2) - amrex::ignore_unused(ez); - Real dyy = dxinv[1] * dxinv[1]; - Real dxy = dxinv[0] * dxinv[1]; - Real gamma = alpha * (dyy)*Real(2.0) + beta; - Real ccex = - - dyy * (ex(i ,j-1,k ) + - ex(i ,j+1,k )) - + dxy * (ey(i ,j-1,k ) - - ey(i ,j ,k ) - - ey(i+1,j-1,k ) - + ey(i+1,j ,k )); -#else - Real dyy = dxinv[1] * dxinv[1]; - Real dzz = dxinv[2] * dxinv[2]; - Real dxy = dxinv[0] * dxinv[1]; - Real dxz = dxinv[0] * dxinv[2]; - Real gamma = alpha * (dyy+dzz)*Real(2.0) + beta; - Real ccex = - - dyy * (ex(i ,j-1,k ) + - ex(i ,j+1,k )) - - dzz * (ex(i ,j ,k+1) + - ex(i ,j ,k-1)) - + dxy * (ey(i ,j-1,k ) - - ey(i ,j ,k ) - - ey(i+1,j-1,k ) - + ey(i+1,j ,k )) - + dxz * (ez(i ,j ,k-1) - - ez(i ,j ,k ) - - ez(i+1,j ,k-1) - + ez(i+1,j ,k )); -#endif - Real res = rhs(i,j,k) - (gamma*ex(i,j,k) + alpha*ccex); - ex(i,j,k) += omega/gamma * res; -} -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void mlcurlcurl_gsrb_y (int i, int j, int k, - Array4 const& ex, - Array4 const& ey, - Array4 const& ez, - Array4 const& rhs, - Real alpha, Real beta, - GpuArray const& dxinv, int redblack) -{ - constexpr Real omega = Real(1.15); + Real dxx = adxinv[0] * adxinv[0]; + Real dyy = adxinv[1] * adxinv[1]; - if ((i+j+k+redblack) % 2 != 0) { return; } + if (((my_color == 0 || my_color == 3) && (color == 0 || color == 3)) || + ((my_color == 1 || my_color == 2) && (color == 1 || color == 2))) + { + Real gamma = (dxx+dyy)*Real(2.0) + beta; + Real ccez = - dxx * (ez(i-1,j ,k ) + + ez(i+1,j ,k )) + - dyy * (ez(i ,j-1,k ) + + ez(i ,j+1,k )); + Real res = rhsz(i,j,k) - (gamma*ez(i,j,k) + ccez); + constexpr Real omega = Real(1.15); + ez(i,j,k) += omega/gamma * res; + } -#if (AMREX_SPACEDIM == 2) - amrex::ignore_unused(ez); - Real dxx = dxinv[0] * dxinv[0]; - Real dxy = dxinv[0] * dxinv[1]; - Real gamma = alpha * (dxx)*Real(2.0) + beta; - Real ccey = - - dxx * (ey(i-1,j ,k ) + - ey(i+1,j ,k )) - + dxy * (ex(i-1,j ,k ) - - ex(i ,j ,k ) - - ex(i-1,j+1,k ) - + ex(i ,j+1,k )); -#else - Real dxx = dxinv[0] * dxinv[0]; - Real dzz = dxinv[2] * dxinv[2]; - Real dxy = dxinv[0] * dxinv[1]; - Real dyz = dxinv[1] * dxinv[2]; - Real gamma = alpha * (dxx+dzz)*Real(2.0) + beta; - Real ccey = - - dxx * (ey(i-1,j ,k ) + - ey(i+1,j ,k )) - - dzz * (ey(i ,j ,k-1) + - ey(i ,j ,k+1)) - + dxy * (ex(i-1,j ,k ) - - ex(i ,j ,k ) - - ex(i-1,j+1,k ) - + ex(i ,j+1,k )) - + dyz * (ez(i ,j ,k-1) - - ez(i ,j ,k ) - - ez(i ,j+1,k-1) - + ez(i ,j+1,k )); -#endif - Real res = rhs(i,j,k) - (gamma*ey(i,j,k) + alpha*ccey); - ey(i,j,k) += omega/gamma * res; -} + if (my_color != color) { return; } -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void mlcurlcurl_gsrb_z (int i, int j, int k, - Array4 const& ex, - Array4 const& ey, - Array4 const& ez, - Array4 const& rhs, - Real alpha, Real beta, - GpuArray const& dxinv, int redblack) -{ - constexpr Real omega = Real(1.15); + Real dxy = adxinv[0]*adxinv[1]; - if ((i+j+k+redblack) % 2 != 0) { return; } + GpuArray b + {rhsx(i-1,j,k) - (-dyy * ( ex(i-1,j-1,k ) + + ex(i-1,j+1,k )) + + dxy * ( ey(i-1,j-1,k ) + -ey(i-1,j ,k ))), + rhsx(i ,j,k) - (-dyy * ( ex(i ,j-1,k ) + + ex(i ,j+1,k )) + + dxy * (-ey(i+1,j-1,k ) + +ey(i+1,j ,k ))), + rhsy(i,j-1,k) - (-dxx * ( ey(i-1,j-1,k ) + + ey(i+1,j-1,k )) + + dxy * ( ex(i-1,j-1,k ) + -ex(i ,j-1,k ))), + rhsy(i,j ,k) - (-dxx * ( ey(i-1,j ,k ) + + ey(i+1,j ,k )) + + dxy * (-ex(i-1,j+1,k ) + +ex(i ,j+1,k )))}; + + if (sinfo.xlo_is_symmetric(i)) { + b[0] = -b[1]; + } else if (sinfo.xhi_is_symmetric(i)) { + b[1] = -b[0]; + } + + if (sinfo.ylo_is_symmetric(j)) { + b[2] = -b[3]; + } else if (sinfo.yhi_is_symmetric(j)) { + b[3] = -b[2]; + } + + GpuArray x; + lusolver(x.data(), b.data()); + ex(i-1,j ,k ) = x[0]; + ex(i ,j ,k ) = x[1]; + ey(i ,j-1,k ) = x[2]; + ey(i ,j ,k ) = x[3]; -#if (AMREX_SPACEDIM == 2) - amrex::ignore_unused(ex,ey); - Real dxx = dxinv[0] * dxinv[0]; - Real dyy = dxinv[1] * dxinv[1]; - Real gamma = alpha * (dxx+dyy)*Real(2.0) + beta; - Real ccez = - - dxx * (ez(i-1,j ,k ) + - ez(i+1,j ,k )) - - dyy * (ez(i ,j-1,k ) + - ez(i ,j+1,k )); #else - Real dxx = dxinv[0] * dxinv[0]; - Real dyy = dxinv[1] * dxinv[1]; - Real dxz = dxinv[0] * dxinv[2]; - Real dyz = dxinv[1] * dxinv[2]; - Real gamma = alpha * (dxx+dyy)*Real(2.0) + beta; - Real ccez = - - dxx * (ez(i-1,j ,k ) + - ez(i+1,j ,k )) - - dyy * (ez(i ,j-1,k ) + - ez(i ,j+1,k )) - + dxz * (ex(i-1,j ,k ) - - ex(i ,j ,k ) - - ex(i-1,j ,k+1) - + ex(i ,j ,k+1)) - + dyz * (ey(i ,j-1,k ) - - ey(i ,j ,k ) - - ey(i ,j-1,k+1) - + ey(i ,j ,k+1)); + + if (my_color != color) { return; } + + Real dxx = adxinv[0]*adxinv[0]; + Real dyy = adxinv[1]*adxinv[1]; + Real dzz = adxinv[2]*adxinv[2]; + Real dxy = adxinv[0]*adxinv[1]; + Real dxz = adxinv[0]*adxinv[2]; + Real dyz = adxinv[1]*adxinv[2]; + + GpuArray b + {rhsx(i-1,j,k) - (-dyy * ( ex(i-1,j-1,k ) + + ex(i-1,j+1,k )) + - dzz * ( ex(i-1,j ,k+1) + + ex(i-1,j ,k-1)) + + dxy * ( ey(i-1,j-1,k ) + -ey(i-1,j ,k )) + + dxz * ( ez(i-1,j ,k-1) + -ez(i-1,j ,k ))), + rhsx(i ,j,k) - (-dyy * ( ex(i ,j-1,k ) + + ex(i ,j+1,k )) + - dzz * ( ex(i ,j ,k+1) + + ex(i ,j ,k-1)) + + dxy * (-ey(i+1,j-1,k ) + +ey(i+1,j ,k )) + + dxz * (-ez(i+1,j ,k-1) + +ez(i+1,j ,k ))), + rhsy(i,j-1,k) - (-dxx * ( ey(i-1,j-1,k ) + + ey(i+1,j-1,k )) + - dzz * ( ey(i ,j-1,k-1) + + ey(i ,j-1,k+1)) + + dxy * ( ex(i-1,j-1,k ) + -ex(i ,j-1,k )) + + dyz * ( ez(i ,j-1,k-1) + -ez(i ,j-1,k ))), + rhsy(i,j ,k) - (-dxx * ( ey(i-1,j ,k ) + + ey(i+1,j ,k )) + - dzz * ( ey(i ,j ,k-1) + + ey(i ,j ,k+1)) + + dxy * (-ex(i-1,j+1,k ) + +ex(i ,j+1,k )) + + dyz * (-ez(i ,j+1,k-1) + +ez(i ,j+1,k ))), + rhsz(i,j,k-1) - (-dxx * ( ez(i-1,j ,k-1) + + ez(i+1,j ,k-1)) + - dyy * ( ez(i ,j-1,k-1) + + ez(i ,j+1,k-1)) + + dxz * ( ex(i-1,j ,k-1) + -ex(i ,j ,k-1)) + + dyz * ( ey(i ,j-1,k-1) + -ey(i ,j ,k-1))), + rhsz(i,j,k ) - (-dxx * ( ez(i-1,j ,k ) + + ez(i+1,j ,k )) + - dyy * ( ez(i ,j-1,k ) + + ez(i ,j+1,k )) + + dxz * (-ex(i-1,j ,k+1) + +ex(i ,j ,k+1)) + + dyz * (-ey(i ,j-1,k+1) + +ey(i ,j ,k+1)))}; + + if (sinfo.xlo_is_symmetric(i)) { + b[0] = -b[1]; + } else if (sinfo.xhi_is_symmetric(i)) { + b[1] = -b[0]; + } + + if (sinfo.ylo_is_symmetric(j)) { + b[2] = -b[3]; + } else if (sinfo.yhi_is_symmetric(j)) { + b[3] = -b[2]; + } + + if (sinfo.zlo_is_symmetric(k)) { + b[4] = -b[5]; + } else if (sinfo.zhi_is_symmetric(k)) { + b[5] = -b[4]; + } + + GpuArray x; + lusolver(x.data(), b.data()); + ex(i-1,j ,k ) = x[0]; + ex(i ,j ,k ) = x[1]; + ey(i ,j-1,k ) = x[2]; + ey(i ,j ,k ) = x[3]; + ez(i ,j ,k-1) = x[4]; + ez(i ,j ,k ) = x[5]; #endif - Real res = rhs(i,j,k) - (gamma*ez(i,j,k) + alpha*ccez); - ez(i,j,k) += omega/gamma * res; } AMREX_GPU_DEVICE AMREX_FORCE_INLINE @@ -338,12 +655,12 @@ AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlcurlcurl_restriction (int dir, int i, int j, int k, Array4 const& crse, Array4 const& fine, - Array4 const& dmsk) + CurlCurlDirichletInfo const& dinfo) { int ii = i*2; int jj = j*2; int kk = k*2; - if (dmsk(ii,jj,kk)) { + if (dinfo.is_dirichlet_edge(dir,ii,jj,kk)) { crse(i,j,k) = Real(0.0); } else diff --git a/Tests/LinearSolvers/CurlCurl/MyTest.H b/Tests/LinearSolvers/CurlCurl/MyTest.H index 0a4a8d723eb..aac7f506c62 100644 --- a/Tests/LinearSolvers/CurlCurl/MyTest.H +++ b/Tests/LinearSolvers/CurlCurl/MyTest.H @@ -38,7 +38,7 @@ private: amrex::Array exact; amrex::Array rhs; - amrex::Real alpha_over_dx2 = 1.0; + amrex::Real alpha_over_dx2 = 100.0; amrex::Real alpha; amrex::Real beta = 1.0; }; diff --git a/Tests/LinearSolvers/CurlCurl/MyTest.cpp b/Tests/LinearSolvers/CurlCurl/MyTest.cpp index fa72fcbca25..6a5bc5a1250 100644 --- a/Tests/LinearSolvers/CurlCurl/MyTest.cpp +++ b/Tests/LinearSolvers/CurlCurl/MyTest.cpp @@ -39,6 +39,9 @@ MyTest::solve () for (auto& mf : solution) { mf.setVal(Real(0)); } + for (auto& mf : rhs) { + mf.OverrideSync(geom.periodicity()); + } mlmg.solve({&solution}, {&rhs}, Real(1.0e-10), Real(0)); amrex::Print() << " Number of cells: " << n_cell << std::endl; diff --git a/Tests/LinearSolvers/CurlCurl/inputs b/Tests/LinearSolvers/CurlCurl/inputs index 375562867ac..2ba1eb4ff25 100644 --- a/Tests/LinearSolvers/CurlCurl/inputs +++ b/Tests/LinearSolvers/CurlCurl/inputs @@ -5,7 +5,7 @@ max_grid_size = 64 verbose = 2 bottom_verbose = 2 -alpha_over_dx2 = 1.0 +alpha_over_dx2 = 100.0 beta = 1.0 amrex.fpe_trap_invalid=1