From 77b5b6d09b2fde78ca01e6552fab80733904bd7f Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 21 Aug 2024 17:17:12 -0700 Subject: [PATCH] WarpX Poisson solver: Option to use staircases approximation Add an option to MLEBNodeFDLaplacian (i.e., WarpX Poisson solver) to allow for using staircases approximation for EB. --- .../MLMG/AMReX_MLEBNodeFDLap_2D_K.H | 266 ++++++++++++++++++ .../MLMG/AMReX_MLEBNodeFDLap_3D_K.H | 139 +++++++++ .../MLMG/AMReX_MLEBNodeFDLap_K.H | 129 +++++++++ .../MLMG/AMReX_MLEBNodeFDLaplacian.H | 7 +- .../MLMG/AMReX_MLEBNodeFDLaplacian.cpp | 209 ++++++++++---- 5 files changed, 689 insertions(+), 61 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H index 57bf89bba29..a61e18c7d99 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H @@ -95,6 +95,73 @@ void mlebndfdlap_adotx_eb (int i, int j, int k, Array4 const& y, bx, by); } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_adotx_eb_sc_doit (int i, int j, int k, Array4 const& y, + Array4 const& x, Array4 const& levset, + Array4 const& dmsk, + F const& xeb, Real bx, Real by) noexcept +{ + if (dmsk(i,j,k)) { + y(i,j,k) = Real(0.0); + } else { + Real tmp, out; + + if (levset(i+1,j,k) < Real(0.0)) { + tmp = x(i+1,j,k) - x(i,j,k); + } else { + tmp = (xeb(i+1,j,k) - x(i,j,k)); + } + + if (levset(i-1,j,k) < Real(0.0)) { + tmp += x(i-1,j,k) - x(i,j,k); + } else { + tmp += (xeb(i-1,j,k) - x(i,j,k)); + } + + out = tmp * bx; + + if (levset(i,j+1,k) < Real(0.0)) { + tmp = x(i,j+1,k) - x(i,j,k); + } else { + tmp = (xeb(i,j+1,k) - x(i,j,k)); + } + + if (levset(i,j-1,k) < Real(0.0)) { + tmp += x(i,j-1,k) - x(i,j,k); + } else { + tmp += (xeb(i,j-1,k) - x(i,j,k)); + } + + out += tmp * by; + + y(i,j,k) = out; + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_adotx_eb_sc (int i, int j, int k, Array4 const& y, + Array4 const& x, Array4 const& levset, + Array4 const& dmsk, + Real xeb, Real bx, Real by) noexcept +{ + mlebndfdlap_adotx_eb_sc_doit(i, j, k, y, x, levset, dmsk, + [=] (int, int, int) -> Real { return xeb; }, + bx, by); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_adotx_eb_sc (int i, int j, int k, Array4 const& y, + Array4 const& x, Array4 const& levset, + Array4 const& dmsk, + Array4 const& xeb, Real bx, Real by) noexcept +{ + mlebndfdlap_adotx_eb_sc_doit(i, j, k, y, x, levset, dmsk, + [=] (int i1, int i2, int i3) -> Real { + return xeb(i1,i2,i3); }, + bx, by); +} + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, @@ -172,6 +239,51 @@ void mlebndfdlap_gsrb_eb (int i, int j, int k, Array4 const& x, } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_gsrb_eb_sc (int i, int j, int k, Array4 const& x, + Array4 const& rhs, Array4 const& levset, + Array4 const& dmsk, + Real bx, Real by, int redblack) noexcept +{ + if ((i+j+k+redblack)%2 == 0) { + if (dmsk(i,j,k)) { + x(i,j,k) = Real(0.); + } else { + Real tmp1; + + if (levset(i+1,j,k) < Real(0.0)) { // regular + tmp1 = x(i+1,j,k); + } else { + tmp1 = Real(0.0); + } + + if (levset(i-1,j,k) < Real(0.0)) { + tmp1 += x(i-1,j,k); + } + + Real gamma = Real(-2.0) * bx; + Real rho = tmp1 * bx; + + if (levset(i,j+1,k) < Real(0.0)) { + tmp1 = x(i,j+1,k); + } else { + tmp1 = Real(0.0); + } + + if (levset(i,j-1,k) < Real(0.0)) { + tmp1 += x(i,j-1,k); + } + + gamma += Real(-2.0) * by; + rho += tmp1 * by; + + Real Ax = rho + gamma*x(i,j,k); + constexpr Real omega = Real(1.25); + x(i,j,k) += (rhs(i,j,k) - Ax) * (omega / (gamma)); + } + } +} + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_gsrb (int i, int j, int k, Array4 const& x, Array4 const& rhs, Array4 const& dmsk, @@ -288,6 +400,88 @@ void mlebndfdlap_adotx_rz_eb (int i, int j, int k, Array4 const& y, sigr, dr, dz, rlo, alpha); } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_adotx_rz_eb_sc_doit (int i, int j, int k, Array4 const& y, + Array4 const& x, Array4 const& levset, + Array4 const& dmsk, + F const& xeb, Real sigr, Real dr, Real dz, Real rlo, Real alpha) noexcept +{ + Real const r = rlo + Real(i) * dr; + if (dmsk(i,j,k) || (r == Real(0.0) && alpha != Real(0.0))) { + y(i,j,k) = Real(0.0); + } else { + Real tmp, out; + + if (r == Real(0.0)) { + if (levset(i+1,j,k) < Real(0.0)) { // regular + out = Real(4.0) * sigr * (x(i+1,j,k)-x(i,j,k)) / (dr*dr); + } else { + out = Real(4.0) * sigr * (xeb(i+1,j,k)-x(i,j,k)) / (dr*dr); + } + } else { + if (levset(i+1,j,k) < Real(0.0)) { // regular + tmp = (x(i+1,j,k) - x(i,j,k)) * (r + Real(0.5) * dr); + } else { + tmp = (xeb(i+1,j,k) - x(i,j,k)) * (r + Real(0.5) * dr); + } + + if (levset(i-1,j,k) < Real(0.0)) { + tmp += (x(i-1,j,k) - x(i,j,k)) * (r - Real(0.5) * dr); + } else { + tmp += (xeb(i-1,j,k) - x(i,j,k)) * (r - Real(0.5) * dr); + } + + out = tmp * sigr / (r * dr * dr); + } + + if (levset(i,j+1,k) < Real(0.0)) { + tmp = x(i,j+1,k) - x(i,j,k); + } else { + tmp = (xeb(i,j+1,k) - x(i,j,k)); + } + + + if (levset(i,j-1,k) < Real(0.0)) { + tmp += x(i,j-1,k) - x(i,j,k); + } else { + tmp += (xeb(i,j-1,k) - x(i,j,k)); + } + + out += tmp / (dz * dz); + + if (r != Real(0.0)) { + out -= alpha/(r*r) * x(i,j,k); + } + + y(i,j,k) = out; + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_adotx_rz_eb_sc (int i, int j, int k, Array4 const& y, + Array4 const& x, Array4 const& levset, + Array4 const& dmsk, + Real xeb, Real sigr, Real dr, Real dz, Real rlo, Real alpha) noexcept +{ + mlebndfdlap_adotx_rz_eb_sc_doit(i, j, k, y, x, levset, dmsk, + [=] (int, int, int) -> Real { return xeb; }, + sigr, dr, dz, rlo, alpha); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_adotx_rz_eb_sc (int i, int j, int k, Array4 const& y, + Array4 const& x, Array4 const& levset, + Array4 const& dmsk, + Array4 const& xeb, Real sigr, Real dr, Real dz, Real rlo, + Real alpha) noexcept +{ + mlebndfdlap_adotx_rz_eb_sc_doit(i, j, k, y, x, levset, dmsk, + [=] (int i1, int i2, int i3) -> Real { + return xeb(i1,i2,i3); }, + sigr, dr, dz, rlo, alpha); +} + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx_rz (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, @@ -393,6 +587,78 @@ void mlebndfdlap_gsrb_rz_eb (int i, int j, int k, Array4 const& x, } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_gsrb_rz_eb_sc (int i, int j, int k, Array4 const& x, + Array4 const& rhs, Array4 const& levset, + Array4 const& dmsk, + Real sigr, Real dr, Real dz, Real rlo, int redblack, Real alpha) noexcept +{ + if ((i+j+k+redblack)%2 == 0) { + Real const r = rlo + Real(i) * dr; + if (dmsk(i,j,k) || (r == Real(0.0) && alpha != Real(0.0))) { + x(i,j,k) = Real(0.); + } else { + Real tmp, tmp0, Ax, gamma; + + if (r == Real(0.0)) { + if (levset(i+1,j,k) < Real(0.0)) { // regular + Ax = (Real(4.0) * sigr / (dr*dr)) * (x(i+1,j,k)-x(i,j,k)); + gamma = -(Real(4.0) * sigr / (dr*dr)); + } else { + gamma = -(Real(4.0) * sigr / (dr*dr)); + Ax = gamma * x(i,j,k); + } + } else { + if (levset(i+1,j,k) < Real(0.0)) { // regular + tmp = (x(i+1,j,k) - x(i,j,k)) * (r + Real(0.5) * dr); + tmp0 = -(r + Real(0.5) * dr); + } else { + tmp0 = Real(-1.0) * (r + Real(0.5) * dr); + tmp = tmp0 * x(i,j,k); + } + + if (levset(i-1,j,k) < Real(0.0)) { + tmp += (x(i-1,j,k) - x(i,j,k)) * (r - Real(0.5) * dr); + tmp0 += -(r - Real(0.5) * dr); + } else { + tmp += -x(i,j,k) * (r - Real(0.5) * dr); + tmp0 += Real(-1.0) * (r - Real(0.5) * dr); + } + + Ax = tmp * sigr / (r * dr * dr); + gamma = tmp0 * sigr / (r * dr * dr); + } + + if (levset(i,j+1,k) < Real(0.0)) { + tmp = x(i,j+1,k) - x(i,j,k); + tmp0 = Real(-1.0); + } else { + tmp0 = Real(-1.0); + tmp = tmp0 * x(i,j,k); + } + + if (levset(i,j-1,k) < Real(0.0)) { + tmp += x(i,j-1,k) - x(i,j,k); + tmp0 += Real(-1.0); + } else { + tmp += -x(i,j,k); + tmp0 += Real(-1.0); + } + + Ax += tmp / (dz * dz); + gamma += tmp0 / (dz * dz); + + if (r != Real(0.0)) { + Ax -= alpha/(r*r) * x(i,j,k); + gamma -= alpha/(r*r); + } + + constexpr Real omega = Real(1.25); + x(i,j,k) += (rhs(i,j,k) - Ax) * (omega / (gamma)); + } + } +} + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_gsrb_rz (int i, int j, int k, Array4 const& x, Array4 const& rhs, Array4 const& dmsk, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_3D_K.H index 9b7fc0fc2bd..80bd3989b1a 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_3D_K.H @@ -117,6 +117,87 @@ void mlebndfdlap_adotx_eb (int i, int j, int k, Array4 const& y, bx, by, bz); } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_adotx_eb_sc_doit (int i, int j, int k, Array4 const& y, + Array4 const& x, Array4 const& levset, + Array4 const& dmsk, F const& xeb, + Real bx, Real by, Real bz) noexcept +{ + if (dmsk(i,j,k)) { + y(i,j,k) = Real(0.0); + } else { + Real tmp, out; + + if (levset(i+1,j,k) < Real(0.0)) { // regular + tmp = x(i+1,j,k) - x(i,j,k); + } else { + tmp = (xeb(i+1,j,k) - x(i,j,k)); + } + + if (levset(i-1,j,k) < Real(0.0)) { + tmp += x(i-1,j,k) - x(i,j,k); + } else { + tmp += (xeb(i-1,j,k) - x(i,j,k)); + } + + out = tmp * bx; + + if (levset(i,j+1,k) < Real(0.0)) { + tmp = x(i,j+1,k) - x(i,j,k); + } else { + tmp = (xeb(i,j+1,k) - x(i,j,k)); + } + + if (levset(i,j-1,k) < Real(0.0)) { + tmp += x(i,j-1,k) - x(i,j,k); + } else { + tmp += (xeb(i,j-1,k) - x(i,j,k)); + } + + out += tmp * by; + + if (levset(i,j,k+1) < Real(0.0)) { + tmp = x(i,j,k+1) - x(i,j,k); + } else { + tmp = (xeb(i,j,k+1) - x(i,j,k)); + } + + if (levset(i,j,k-1) < Real(0.0)) { + tmp += x(i,j,k-1) - x(i,j,k); + } else { + tmp += (xeb(i,j,k-1) - x(i,j,k)); + } + + out += tmp * bz; + + y(i,j,k) = out; + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_adotx_eb_sc (int i, int j, int k, Array4 const& y, + Array4 const& x, Array4 const& levset, + Array4 const& dmsk, Real xeb, + Real bx, Real by, Real bz) noexcept +{ + mlebndfdlap_adotx_eb_sc_doit(i, j, k, y, x, levset, dmsk, + [=] (int, int, int) -> Real { return xeb; }, + bx, by, bz); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_adotx_eb_sc (int i, int j, int k, Array4 const& y, + Array4 const& x, Array4 const& levset, + Array4 const& dmsk, Array4 const& xeb, + Real bx, Real by, Real bz) noexcept +{ + mlebndfdlap_adotx_eb_sc_doit(i, j, k, y, x, levset, dmsk, + [=] (int i1, int i2, int i3) -> Real { + return xeb(i1,i2,i3); }, + bx, by, bz); +} + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, @@ -216,6 +297,64 @@ void mlebndfdlap_gsrb_eb (int i, int j, int k, Array4 const& x, } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_gsrb_eb_sc (int i, int j, int k, Array4 const& x, + Array4 const& rhs, Array4 const& levset, + Array4 const& dmsk, Real bx, Real by, Real bz, + int redblack) noexcept +{ + if ((i+j+k+redblack)%2 == 0) { + if (dmsk(i,j,k)) { + x(i,j,k) = Real(0.); + } else { + Real tmp1; + + if (levset(i+1,j,k) < Real(0.0)) { // regular + tmp1 = x(i+1,j,k); + } else { + tmp1 = Real(0.0); + } + + if (levset(i-1,j,k) < Real(0.0)) { + tmp1 += x(i-1,j,k); + } + + Real gamma = Real(-2.0) * bx; + Real rho = tmp1 * bx; + + if (levset(i,j+1,k) < Real(0.0)) { + tmp1 = x(i,j+1,k); + } else { + tmp1 = Real(0.0); + } + + if (levset(i,j-1,k) < Real(0.0)) { + tmp1 += x(i,j-1,k); + } + + gamma += Real(-2.0) * by; + rho += tmp1 * by; + + if (levset(i,j,k+1) < Real(0.0)) { + tmp1 = x(i,j,k+1); + } else { + tmp1 = Real(0.0); + } + + if (levset(i,j,k-1) < Real(0.0)) { + tmp1 += x(i,j,k-1); + } + + gamma += Real(-2.0) * bz; + rho += tmp1 * bz; + + Real Ax = rho + gamma*x(i,j,k); + constexpr Real omega = Real(1.25); + x(i,j,k) += (rhs(i,j,k) - Ax) * (omega / (gamma)); + } + } +} + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_gsrb (int i, int j, int k, Array4 const& x, Array4 const& rhs, Array4 const& dmsk, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_K.H index d389af59d18..b0dc23987b8 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_K.H @@ -150,6 +150,135 @@ void mlebndfdlap_grad_z (Box const& b, Array4 const& pz, Array4 +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_grad_x_doit (int i, int j, int k, Array4 const& px, + Array4 const& p, Array4 const& dmsk, + F const& phieb, Real dxi) +{ + if (dmsk(i,j,k) >= 0 && dmsk(i+1,j,k) >= 0) { + px(i,j,k) = dxi * (p(i+1,j,k) - p(i,j,k)); + } else if (dmsk(i,j,k) < 0 && dmsk(i+1,j,k) < 0) { + px(i,j,k) = Real(0.0); + } else if (dmsk(i,j,k) < 0) { + px(i,j,k) = dxi * (p(i+1,j,k) - phieb(i,j,k)); + } else { // + px(i,j,k) = dxi * (phieb(i+1,j,k) - p(i,j,k)); + } +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_grad_y_doit (int i, int j, int k, Array4 const& py, + Array4 const& p, Array4 const& dmsk, + F const& phieb, Real dyi) +{ + if (dmsk(i,j,k) >= 0 && dmsk(i,j+1,k) >= 0) { + py(i,j,k) = dyi * (p(i,j+1,k) - p(i,j,k)); + } else if (dmsk(i,j,k) < 0 && dmsk(i,j+1,k) < 0) { + py(i,j,k) = Real(0.0); + } else if (dmsk(i,j,k) < 0) { + py(i,j,k) = dyi * (p(i,j+1,k) - phieb(i,j,k)); + } else { // + py(i,j,k) = dyi * (phieb(i,j+1,k) - p(i,j,k)); + } +} + +#if (AMREX_SPACEDIM > 2) +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_grad_z_doit (int i, int j, int k, Array4 const& pz, + Array4 const& p, Array4 const& dmsk, + F const& phieb, Real dzi) +{ + if (dmsk(i,j,k) >= 0 && dmsk(i,j,k+1) >= 0) { + pz(i,j,k) = dzi * (p(i,j,k+1) - p(i,j,k)); + } else if (dmsk(i,j,k) < 0 && dmsk(i,j,k+1) < 0) { + pz(i,j,k) = Real(0.0); + } else if (dmsk(i,j,k) < 0) { + pz(i,j,k) = dzi * (p(i,j,k+1) - phieb(i,j,k)); + } else { // + pz(i,j,k) = dzi * (phieb(i,j,k+1) - p(i,j,k)); + } +} +#endif + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_grad_x (Box const& b, Array4 const& px, Array4 const& p, + Array4 const& dmsk, Real phieb, Real dxi) +{ + AMREX_LOOP_3D(b, i, j, k, + { + mlebndfdlap_grad_x_doit(i,j,k, px, p, dmsk, + [=] (int, int, int) -> Real { return phieb; }, + dxi); + }); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_grad_x (Box const& b, Array4 const& px, Array4 const& p, + Array4 const& dmsk, Array4 const& phieb, Real dxi) +{ + AMREX_LOOP_3D(b, i, j, k, + { + mlebndfdlap_grad_x_doit(i,j,k, px, p, dmsk, + [=] (int i1, int i2, int i3) -> Real { return phieb(i1,i2,i3); }, + dxi); + }); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_grad_y (Box const& b, Array4 const& py, Array4 const& p, + Array4 const& dmsk, Real phieb, Real dyi) +{ + AMREX_LOOP_3D(b, i, j, k, + { + mlebndfdlap_grad_y_doit(i,j,k, py, p, dmsk, + [=] (int, int, int) -> Real { return phieb; }, + dyi); + }); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_grad_y (Box const& b, Array4 const& py, Array4 const& p, + Array4 const& dmsk, Array4 const& phieb, Real dyi) +{ + AMREX_LOOP_3D(b, i, j, k, + { + mlebndfdlap_grad_y_doit(i,j,k, py, p, dmsk, + [=] (int i1, int i2, int i3) -> Real { return phieb(i1,i2,i3); }, + dyi); + }); +} + +#if (AMREX_SPACEDIM > 2) + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_grad_z (Box const& b, Array4 const& pz, Array4 const& p, + Array4 const& dmsk, Real phieb, Real dzi) +{ + AMREX_LOOP_3D(b, i, j, k, + { + mlebndfdlap_grad_z_doit(i,j,k, pz, p, dmsk, + [=] (int, int, int) -> Real { return phieb; }, + dzi); + }); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_grad_z (Box const& b, Array4 const& pz, Array4 const& p, + Array4 const& dmsk, Array4 const& phieb, Real dzi) +{ + AMREX_LOOP_3D(b, i, j, k, + { + mlebndfdlap_grad_z_doit(i,j,k, pz, p, dmsk, + [=] (int i1, int i2, int i3) -> Real { return phieb(i1,i2,i3); }, + dzi); + }); +} + +#endif + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_grad_x (Box const& b, Array4 const& px, Array4 const& p, Real dxi) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H index 6ebbd2c65e7..732c79ef841 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H @@ -36,7 +36,8 @@ public: const Vector& a_grids, const Vector& a_dmap, const LPInfo& a_info, - const Vector& a_factory); + const Vector& a_factory, + bool a_use_staircases = false); #endif MLEBNodeFDLaplacian (const Vector& a_geom, @@ -72,7 +73,8 @@ public: const Vector& a_grids, const Vector& a_dmap, const LPInfo& a_info, - const Vector& a_factory); + const Vector& a_factory, + bool a_use_staircases = false); [[nodiscard]] std::unique_ptr > makeFactory (int amrlev, int mglev) const final; @@ -127,6 +129,7 @@ private: Vector m_phi_eb; int m_rz = false; Real m_rz_alpha = 0._rt; + bool m_use_staircases = false; }; #ifdef AMREX_USE_EB diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp index af4a6a6d742..f2b46108be1 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp @@ -17,9 +17,10 @@ MLEBNodeFDLaplacian::MLEBNodeFDLaplacian ( const Vector& a_grids, const Vector& a_dmap, const LPInfo& a_info, - const Vector& a_factory) + const Vector& a_factory, + bool a_use_staircases) { - define(a_geom, a_grids, a_dmap, a_info, a_factory); + define(a_geom, a_grids, a_dmap, a_info, a_factory, a_use_staircases); } #endif @@ -86,12 +87,15 @@ MLEBNodeFDLaplacian::define (const Vector& a_geom, const Vector& a_grids, const Vector& a_dmap, const LPInfo& a_info, - const Vector& a_factory) + const Vector& a_factory, + bool a_use_staircases) { static_assert(AMREX_SPACEDIM > 1, "MLEBNodeFDLaplacian: 1D not supported"); BL_PROFILE("MLEBNodeFDLaplacian::define()"); + m_use_staircases = a_use_staircases; + // This makes sure grids are cell-centered; Vector cc_grids = a_grids; for (auto& ba : cc_grids) { @@ -366,6 +370,8 @@ MLEBNodeFDLaplacian::prepareForSolve () void MLEBNodeFDLaplacian::scaleRHS (int amrlev, MultiFab& rhs) const { + if (m_use_staircases) { return; } + const auto *factory = dynamic_cast(m_factory[amrlev][0].get()); if (!factory) { return; } @@ -399,6 +405,10 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa { BL_PROFILE("MLEBNodeFDLaplacian::Fapply()"); + if (m_has_sigma_mf) { + AMREX_ALWAYS_ASSERT(m_use_staircases == false); + } + const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); #if (AMREX_SPACEDIM == 2) const auto sig0 = m_sigma[0]; @@ -442,11 +452,19 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa auto const& phiebarr = m_phi_eb[amrlev].const_array(mfi); #if (AMREX_SPACEDIM == 2) if (m_rz) { - AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, - { - mlebndfdlap_adotx_rz_eb(i,j,k,yarr,xarr,levset,dmarr,ecx,ecy, - phiebarr, sig0, dx0, dx1, xlo, alpha); - }); + if (m_use_staircases) { + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_adotx_rz_eb_sc(i,j,k,yarr,xarr,levset,dmarr, + phiebarr, sig0, dx0, dx1, xlo, alpha); + }); + } else { + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_adotx_rz_eb(i,j,k,yarr,xarr,levset,dmarr,ecx,ecy, + phiebarr, sig0, dx0, dx1, xlo, alpha); + }); + } } else #endif if (m_has_sigma_mf) { @@ -458,23 +476,40 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa sigarr, vfrc, phiebarr, AMREX_D_DECL(bx,by,bz)); }); } else { - AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, - { - mlebndfdlap_adotx_eb(i,j,k,yarr,xarr,levset,dmarr,AMREX_D_DECL(ecx,ecy,ecz), - phiebarr, AMREX_D_DECL(bx,by,bz)); - }); + if (m_use_staircases) { + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_adotx_eb_sc(i,j,k,yarr,xarr,levset,dmarr, + phiebarr, AMREX_D_DECL(bx,by,bz)); + }); + } else { + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_adotx_eb(i,j,k,yarr,xarr,levset,dmarr,AMREX_D_DECL(ecx,ecy,ecz), + phiebarr, AMREX_D_DECL(bx,by,bz)); + }); + } } } else { #if (AMREX_SPACEDIM == 2) if (m_rz) { - AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, - { - mlebndfdlap_adotx_rz_eb(i,j,k,yarr,xarr,levset,dmarr,ecx,ecy, - phieb, sig0, dx0, dx1, xlo, alpha); - }); + if (m_use_staircases) { + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_adotx_rz_eb_sc(i,j,k,yarr,xarr,levset,dmarr, + phieb, sig0, dx0, dx1, xlo, alpha); + }); + } else { + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_adotx_rz_eb(i,j,k,yarr,xarr,levset,dmarr,ecx,ecy, + phieb, sig0, dx0, dx1, xlo, alpha); + }); + } } else #endif if (m_has_sigma_mf) { + AMREX_ALWAYS_ASSERT(m_use_staircases == false); auto const& sigarr = m_sigma_mf[amrlev][mglev]->const_array(mfi); auto const& vfrc = factory->getVolFrac().const_array(mfi); AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, @@ -483,11 +518,19 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa sigarr, vfrc, phieb, AMREX_D_DECL(bx,by,bz)); }); } else { - AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, - { - mlebndfdlap_adotx_eb(i,j,k,yarr,xarr,levset,dmarr,AMREX_D_DECL(ecx,ecy,ecz), - phieb, AMREX_D_DECL(bx,by,bz)); - }); + if (m_use_staircases) { + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_adotx_eb_sc(i,j,k,yarr,xarr,levset,dmarr, + phieb, AMREX_D_DECL(bx,by,bz)); + }); + } else { + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_adotx_eb(i,j,k,yarr,xarr,levset,dmarr,AMREX_D_DECL(ecx,ecy,ecz), + phieb, AMREX_D_DECL(bx,by,bz)); + }); + } } } } else @@ -567,11 +610,19 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF auto const& levset = factory->getLevelSet().const_array(mfi); #if (AMREX_SPACEDIM == 2) if (m_rz) { - AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, - { - mlebndfdlap_gsrb_rz_eb(i,j,k,solarr,rhsarr,levset,dmskarr,ecx,ecy, - sig0, dx0, dx1, xlo, redblack, alpha); - }); + if (m_use_staircases) { + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_gsrb_rz_eb_sc(i,j,k,solarr,rhsarr,levset,dmskarr, + sig0, dx0, dx1, xlo, redblack, alpha); + }); + } else { + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_gsrb_rz_eb(i,j,k,solarr,rhsarr,levset,dmskarr,ecx,ecy, + sig0, dx0, dx1, xlo, redblack, alpha); + }); + } } else #endif if (m_has_sigma_mf) { @@ -583,11 +634,19 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF sigarr, vfrc, AMREX_D_DECL(bx,by,bz), redblack); }); } else { - AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, - { - mlebndfdlap_gsrb_eb(i,j,k,solarr,rhsarr,levset,dmskarr,AMREX_D_DECL(ecx,ecy,ecz), - AMREX_D_DECL(bx,by,bz), redblack); - }); + if (m_use_staircases) { + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_gsrb_eb_sc(i,j,k,solarr,rhsarr,levset,dmskarr, + AMREX_D_DECL(bx,by,bz), redblack); + }); + } else { + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_gsrb_eb(i,j,k,solarr,rhsarr,levset,dmskarr,AMREX_D_DECL(ecx,ecy,ecz), + AMREX_D_DECL(bx,by,bz), redblack); + }); + } } } else #endif // AMREX_USE_EB @@ -681,33 +740,65 @@ MLEBNodeFDLaplacian::compGrad (int amrlev, const Array = cutfab ? edgecent[2]->const_array(mfi) : Array4{};) if (phieb == std::numeric_limits::lowest()) { auto const& phiebarr = m_phi_eb[amrlev].const_array(mfi); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM( - xbox, txbox, - { - mlebndfdlap_grad_x(txbox, gpx, p, dmarr, ecx, phiebarr, dxi); - } - , ybox, tybox, - { - mlebndfdlap_grad_y(tybox, gpy, p, dmarr, ecy, phiebarr, dyi); - } - , zbox, tzbox, - { - mlebndfdlap_grad_z(tzbox, gpz, p, dmarr, ecz, phiebarr, dzi); - }); + if (m_use_staircases) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM( + xbox, txbox, + { + mlebndfdlap_grad_x(txbox, gpx, p, dmarr, phiebarr, dxi); + } + , ybox, tybox, + { + mlebndfdlap_grad_y(tybox, gpy, p, dmarr, phiebarr, dyi); + } + , zbox, tzbox, + { + mlebndfdlap_grad_z(tzbox, gpz, p, dmarr, phiebarr, dzi); + }); + } else { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM( + xbox, txbox, + { + mlebndfdlap_grad_x(txbox, gpx, p, dmarr, ecx, phiebarr, dxi); + } + , ybox, tybox, + { + mlebndfdlap_grad_y(tybox, gpy, p, dmarr, ecy, phiebarr, dyi); + } + , zbox, tzbox, + { + mlebndfdlap_grad_z(tzbox, gpz, p, dmarr, ecz, phiebarr, dzi); + }); + } } else { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM( - xbox, txbox, - { - mlebndfdlap_grad_x(txbox, gpx, p, dmarr, ecx, phieb, dxi); - } - , ybox, tybox, - { - mlebndfdlap_grad_y(tybox, gpy, p, dmarr, ecy, phieb, dyi); - } - , zbox, tzbox, - { - mlebndfdlap_grad_z(tzbox, gpz, p, dmarr, ecz, phieb, dzi); - }); + if (m_use_staircases) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM( + xbox, txbox, + { + mlebndfdlap_grad_x(txbox, gpx, p, dmarr, phieb, dxi); + } + , ybox, tybox, + { + mlebndfdlap_grad_y(tybox, gpy, p, dmarr, phieb, dyi); + } + , zbox, tzbox, + { + mlebndfdlap_grad_z(tzbox, gpz, p, dmarr, phieb, dzi); + }); + } else { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM( + xbox, txbox, + { + mlebndfdlap_grad_x(txbox, gpx, p, dmarr, ecx, phieb, dxi); + } + , ybox, tybox, + { + mlebndfdlap_grad_y(tybox, gpy, p, dmarr, ecy, phieb, dyi); + } + , zbox, tzbox, + { + mlebndfdlap_grad_z(tzbox, gpz, p, dmarr, ecz, phieb, dzi); + }); + } } #else AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(