From fe145ad7a3d4e17d0de21505e8e81750e0a303a4 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 14 Nov 2023 11:13:47 -0800 Subject: [PATCH] Plotfile Tools: GPU support (#3626) ## Summary Add GPU support for fcompare, fextrema, fsnapshot and fvolumesum. ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_PlotFileDataImpl.cpp | 2 +- Tools/Plotfile/fextrema.cpp | 35 ++++---- Tools/Plotfile/fsnapshot.cpp | 4 +- Tools/Plotfile/fvolumesum.cpp | 130 ++++++++++------------------ 4 files changed, 66 insertions(+), 105 deletions(-) diff --git a/Src/Base/AMReX_PlotFileDataImpl.cpp b/Src/Base/AMReX_PlotFileDataImpl.cpp index 1fbf5044a50..b85c17ad93c 100644 --- a/Src/Base/AMReX_PlotFileDataImpl.cpp +++ b/Src/Base/AMReX_PlotFileDataImpl.cpp @@ -141,7 +141,7 @@ PlotFileDataImpl::get (int level, std::string const& varname) noexcept int gid = mfi.index(); FArrayBox& dstfab = mf[mfi]; std::unique_ptr srcfab(m_vismf[level]->readFAB(gid, icomp)); - dstfab.copy(*srcfab); + dstfab.copy(*srcfab); } } return mf; diff --git a/Tools/Plotfile/fextrema.cpp b/Tools/Plotfile/fextrema.cpp index 44cfaf161d4..55596bf952d 100644 --- a/Tools/Plotfile/fextrema.cpp +++ b/Tools/Plotfile/fextrema.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -80,23 +81,23 @@ void main_main() pf.boxArray(ilev+1), ratio); for (int ivar = 0; ivar < var_names.size(); ++ivar) { const MultiFab& mf = pf.get(ilev, var_names[ivar]); - for (MFIter mfi(mf); mfi.isValid(); ++mfi) { - const Box& bx = mfi.validbox(); - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - const auto& ifab = mask.array(mfi); - const auto& fab = mf.array(mfi); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - if (ifab(i,j,k) == 0) { - vvmin[ivar] = std::min(fab(i,j,k),vvmin[ivar]); - vvmax[ivar] = std::max(fab(i,j,k),vvmax[ivar]); - } - } - } - } - } + auto const& ma = mf.const_arrays(); + auto const& ima = mask.const_arrays(); + auto rr = ParReduce(TypeList{}, + TypeList{}, mf, + [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + -> GpuTuple + { + if (ima[bno](i,j,k) == 0) { + auto x = ma[bno](i,j,k); + return {x,x}; + } else { + return {std::numeric_limits::max(), + std::numeric_limits::lowest()}; + } + }); + vvmin[ivar] = std::min(amrex::get<0>(rr), vvmin[ivar]); + vvmax[ivar] = std::max(amrex::get<1>(rr), vvmax[ivar]); } } } diff --git a/Tools/Plotfile/fsnapshot.cpp b/Tools/Plotfile/fsnapshot.cpp index e68f8a33b6d..c4b9fd3f361 100644 --- a/Tools/Plotfile/fsnapshot.cpp +++ b/Tools/Plotfile/fsnapshot.cpp @@ -278,7 +278,7 @@ void main_main() gmx = std::log10(gmx); } - BaseFab intdat; + BaseFab intdat(The_Pinned_Arena()); for (int idir = ndir_begin; idir < ndir_end; ++idir) { intdat.resize(finebox[idir],1); const int width = (idir == 0) ? finebox[idir].length(1) : finebox[idir].length(0); @@ -286,7 +286,7 @@ void main_main() const auto& intarr = intdat.array(); const auto& realarr = datamf[idir].array(0); Real fac = Real(253.999) / (gmx-gmn); - amrex::LoopOnCpu(finebox[idir], [=] (int i, int j, int k) + amrex::ParallelFor(finebox[idir], [=] AMREX_GPU_DEVICE (int i, int j, int k) { int jj = (idir == 2) ? height - 1 - j : j; // flip the data in second image direction int kk = (idir == 2) ? k : height - 1 - k; diff --git a/Tools/Plotfile/fvolumesum.cpp b/Tools/Plotfile/fvolumesum.cpp index 2f9f03cc522..ec6e461dcc7 100644 --- a/Tools/Plotfile/fvolumesum.cpp +++ b/Tools/Plotfile/fvolumesum.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -80,7 +81,6 @@ void main_main() const int dim = pf.spaceDim(); - int fine_level = pf.finestLevel(); Vector pos; @@ -95,6 +95,35 @@ void main_main() Array dx = pf.cellSize(ilev); + Real volfac = AMREX_D_TERM(dx[0], *dx[1], *dx[2]); + + if (coord == 1) { + AMREX_ALWAYS_ASSERT(AMREX_SPACEDIM == 2); + // axisymmetric V = pi (r_r**2 - r_l**2) * dz + // = pi dr * dz * (r_r + r_l) + // = 2 pi r dr dz + volfac *= 2 * pi; // 2 * pi * dr * dz part here + } else if (coord == 2) { + AMREX_ALWAYS_ASSERT(AMREX_SPACEDIM == 1); + // 1-d spherical V = 4/3 pi (r_r**3 - r_l**3) + volfac *= (4.0_rt/3.0_rt) * pi; // 4/3 * pi * dr part here + } + + auto xlo = problo[0]; + auto dx0 = dx[0]; + AMREX_ASSERT(coord == 0 || coord == 1 || coord == 2); + auto f_vol = [=] AMREX_GPU_DEVICE (int i) { + if (coord == 0) { + return volfac; + } else if (coord == 1) { + return volfac * (xlo + (Real(i)+0.5_rt)*dx0); + } else { + Real r_r = xlo + Real(i+1)*dx0; + Real r_l = xlo + Real(i )*dx0; + return volfac * (r_r*r_r + r_l*r_r + r_l*r_l); + } + }; + if (ilev < fine_level) { IntVect ratio{pf.refRatio(ilev)}; for (int idim = dim; idim < AMREX_SPACEDIM; ++idim) { @@ -103,97 +132,28 @@ void main_main() const iMultiFab mask = makeFineMask(pf.boxArray(ilev), pf.DistributionMap(ilev), pf.boxArray(ilev+1), ratio); const MultiFab& mf = pf.get(ilev, var_name); - for (MFIter mfi(mf); mfi.isValid(); ++mfi) { - const Box& bx = mfi.validbox(); - if (bx.ok()) { - const auto& m = mask.array(mfi); - const auto& fab = mf.array(mfi); - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - if (m(i,j,k) == 0) { // not covered by fine - Array p - = {AMREX_D_DECL(problo[0]+static_cast(i+0.5)*dx[0], - problo[1]+static_cast(j+0.5)*dx[1], - problo[2]+static_cast(k+0.5)*dx[2])}; - - // compute the volume - Real vol = std::numeric_limits::quiet_NaN(); - if (coord == 0) { - // Cartesian - vol = 1.0_rt; - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - vol *= dx[idim]; - } - } else if (coord == 1) { - // axisymmetric V = pi (r_r**2 - r_l**2) * dz - // = pi dr * dz * (r_r + r_l) - // = 2 pi r dr dz - vol = 2 * pi * p[0] * dx[0] * dx[1]; - } else if (coord == 2) { - // 1-d spherical V = 4/3 pi (r_r**3 - r_l**3) - Real r_r = problo[0]+static_cast(i+1)*dx[0]; - Real r_l = problo[0]+static_cast(i)*dx[0]; - vol = (4.0_rt/3.0_rt) * pi * dx[0] * (r_r*r_r + r_l*r_r + r_l*r_l); - } - - lsum += fab(i,j,k) * vol; - } - } - } - } - } - } + auto const& ima = mask.const_arrays(); + auto const& ma = mf.const_arrays(); + lsum += ParReduce(TypeList{}, TypeList{}, mf, + [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + -> GpuTuple + { + return { (ima[bno](i,j,k) == 0) ? ma[bno](i,j,k)*f_vol(i) : 0._rt }; + }); } else { const MultiFab& mf = pf.get(ilev, var_name); - for (MFIter mfi(mf); mfi.isValid(); ++mfi) { - const Box& bx = mfi.validbox(); - if (bx.ok()) { - const auto& fab = mf.array(mfi); - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - Array p - = {AMREX_D_DECL(problo[0]+static_cast(i+0.5)*dx[0], - problo[1]+static_cast(j+0.5)*dx[1], - problo[2]+static_cast(k+0.5)*dx[2])}; - - // compute the volume - Real vol = std::numeric_limits::quiet_NaN(); - if (coord == 0) { - // Cartesian - vol = 1.0_rt; - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - vol *= dx[idim]; - } - } else if (coord == 1) { - // axisymmetric V = pi (r_r**2 - r_l**2) * dz - // = pi dr * dz * (r_r + r_l) - // = 2 pi r dr dz - vol = 2 * pi * p[0] * dx[0] * dx[1]; - } else if (coord == 2) { - // 1-d spherical V = 4/3 pi (r_r**3 - r_l**3) - Real r_r = problo[0]+static_cast(i+1)*dx[0]; - Real r_l = problo[0]+static_cast(i)*dx[0]; - vol = (4.0_rt/3.0_rt) * pi * dx[0] * (r_r*r_r + r_l*r_r + r_l*r_l); - } - - lsum += fab(i,j,k) * vol; - } - } - } - } - } + auto const& ma = mf.const_arrays(); + lsum += ParReduce(TypeList{}, TypeList{}, mf, + [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + -> GpuTuple + { + return { ma[bno](i,j,k)*f_vol(i) }; + }); } } ParallelDescriptor::ReduceRealSum(lsum); - if (ParallelDescriptor::IOProcessor()) { std::cout << "integral of " << var_name << " = " << lsum << std::endl;