From 6e2b831245f6fdac9a714c64417bd5ab21fb613d Mon Sep 17 00:00:00 2001 From: Edward Basso Date: Mon, 13 Nov 2023 15:00:36 -0800 Subject: [PATCH 1/4] solve_bicgstab: use linop.make instead of MF constructor (#3619) ## Summary This PR replaces the explicit use of MF constructors in ```MLCGSolverT::solve_bicgstab``` with calls to the `make` method of the linear operator associated with the MLCGSolverT object. ## Additional background The use of `MLLinOpT::make` allows for inheritance of MLCGSolverT without an override of `solve_bicgstab` even if the MF class lacks a constructor with the same arguments as those MultiFab. For the MLMG template classes, `make` should generally be used instead of explicit MF constructors. Another PR to change this in `solve_cg` will follow once this is fully vetted and approved. --- Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index 3764fa38f8a..fce7b1d5005 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -90,22 +90,18 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) const int ncomp = sol.nComp(); - const BoxArray& ba = sol.boxArray(); - const DistributionMapping& dm = sol.DistributionMap(); - const auto& factory = sol.Factory(); - - MF ph(ba, dm, ncomp, sol.nGrowVect(), MFInfo(), factory); - MF sh(ba, dm, ncomp, sol.nGrowVect(), MFInfo(), factory); + MF ph = Lp.make(amrlev, mglev, sol.nGrowVect()); + MF sh = Lp.make(amrlev, mglev, sol.nGrowVect()); ph.setVal(RT(0.0)); sh.setVal(RT(0.0)); - MF sorig(ba, dm, ncomp, nghost, MFInfo(), factory); - MF p (ba, dm, ncomp, nghost, MFInfo(), factory); - MF r (ba, dm, ncomp, nghost, MFInfo(), factory); - MF s (ba, dm, ncomp, nghost, MFInfo(), factory); - MF rh (ba, dm, ncomp, nghost, MFInfo(), factory); - MF v (ba, dm, ncomp, nghost, MFInfo(), factory); - MF t (ba, dm, ncomp, nghost, MFInfo(), factory); + MF sorig = Lp.make(amrlev, mglev, nghost); + MF p = Lp.make(amrlev, mglev, nghost); + MF r = Lp.make(amrlev, mglev, nghost); + MF s = Lp.make(amrlev, mglev, nghost); + MF rh = Lp.make(amrlev, mglev, nghost); + MF v = Lp.make(amrlev, mglev, nghost); + MF t = Lp.make(amrlev, mglev, nghost); Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT::BCMode::Homogeneous); From fa3743fd1fdd5e3f1b12d431b0f6bb4b15bb7b95 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 13 Nov 2023 15:00:56 -0800 Subject: [PATCH 2/4] CArena: shrink_in_place and operator<< (#3621) ## Summary Implement CArena::shrink_in_place, which is used by PODVector::shrink_to_fit. It avoids a new memory allocation and data movement. Add operator<< to CArena. This helps debugging. ## Additional background Follow-up on #3426. ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_CArena.H | 19 ++++---- Src/Base/AMReX_CArena.cpp | 96 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 105 insertions(+), 10 deletions(-) diff --git a/Src/Base/AMReX_CArena.H b/Src/Base/AMReX_CArena.H index d68285bc878..163039df2ef 100644 --- a/Src/Base/AMReX_CArena.H +++ b/Src/Base/AMReX_CArena.H @@ -5,13 +5,14 @@ #include #include -#include -#include +#include +#include #include #include -#include -#include +#include #include +#include +#include namespace amrex { @@ -57,7 +58,7 @@ public: * Try to shrink in-place */ [[nodiscard]] void* - shrink_in_place (void* pt, std::size_t sz) final; + shrink_in_place (void* pt, std::size_t new_size) final; /** * \brief Free up allocated memory. Merge neighboring free memory chunks @@ -164,15 +165,15 @@ protected: MemStat* m_stat; }; + //! The list of blocks allocated via ::operator new(). + std::vector > m_alloc; + /** * \brief The type of our freelist and blocklist. * We use a set sorted from lo to hi memory addresses. */ using NL = std::set; - //! The list of blocks allocated via ::operator new(). - std::vector > m_alloc; - /** * \brief The free list of allocated but not currently used blocks. * Maintained in lo to hi memory sorted order. @@ -198,6 +199,8 @@ protected: std::mutex carena_mutex; + + friend std::ostream& operator<< (std::ostream& os, const CArena& arena); }; } diff --git a/Src/Base/AMReX_CArena.cpp b/Src/Base/AMReX_CArena.cpp index 6f7979d4750..c47f8f5ed26 100644 --- a/Src/Base/AMReX_CArena.cpp +++ b/Src/Base/AMReX_CArena.cpp @@ -14,6 +14,7 @@ namespace amrex { #include #include +#include namespace amrex { @@ -203,9 +204,61 @@ CArena::alloc_in_place (void* pt, std::size_t szmin, std::size_t szmax) } void* -CArena::shrink_in_place (void* /*pt*/, std::size_t sz) +CArena::shrink_in_place (void* pt, std::size_t new_size) { - return alloc(sz); // xxxxx TODO + if ((pt == nullptr) || (new_size == 0)) { return nullptr; } + + new_size = Arena::align(new_size); + + std::lock_guard lock(carena_mutex); + + auto busy_it = m_busylist.find(Node(pt,nullptr,0)); + if (busy_it == m_busylist.end()) { + amrex::Abort("CArena::shrink_in_place: unknown pointer"); + return nullptr; + } + AMREX_ASSERT(m_freelist.find(*busy_it) == m_freelist.end()); + + auto const old_size = busy_it->size(); + + if (new_size > old_size) { + amrex::Abort("CArena::shrink_in_place: wrong size. Cannot shrink to a larger size."); + return nullptr; + } else if (new_size == old_size) { + return pt; + } else { + auto const leftover_size = old_size - new_size; + + void* pt2 = static_cast(pt) + new_size; + Node new_free_node(pt2, busy_it->owner(), leftover_size); + + void* pt_end = static_cast(pt) + old_size; + auto free_it = m_freelist.find(Node(pt_end,nullptr,0)); + if ((free_it == m_freelist.end()) || ! new_free_node.coalescable(*free_it)) { + m_freelist.insert(free_it, new_free_node); + } else { + auto& node = const_cast(*free_it); + // This is safe because the free list is std::set and the + // modification of `block` does not change the order of elements + // in the container, even though Node's operator< uses block. + node.block(pt2); + node.size(leftover_size + node.size()); + } + + const_cast(*busy_it).size(new_size); + + m_actually_used -= leftover_size; + +#ifdef AMREX_TINY_PROFILING + if (m_do_profiling) { + TinyProfiler::memory_free(old_size, busy_it->mem_stat()); + auto* stat = TinyProfiler::memory_alloc(new_size, m_profiling_stats); + const_cast(*busy_it).mem_stat(stat); + } +#endif + + return pt; + } } void @@ -439,4 +492,43 @@ CArena::PrintUsage (std::ostream& os, std::string const& name, std::string const << m_busylist.size() << " busy blocks, " << m_freelist.size() << " free blocks\n"; } +std::ostream& operator<< (std::ostream& os, const CArena& arena) +{ + os << "CArea:\n" + << " Hunk size: " << arena.m_hunk << "\n" + << " Memory allocated: " << arena.m_used << "\n" + << " Memory actually used: " << arena.m_actually_used << "\n"; + + if (arena.m_alloc.empty()) { + os << " No memory allocations\n"; + } else { + os << " List of memory alloations: (address, size)\n"; + for (auto const& a : arena.m_alloc) { + os << " " << a.first << ", " << a.second << "\n"; + } + } + + if (arena.m_freelist.empty()) { + os << " No free nodes\n"; + } else { + os << " List of free nodes: (address, owner, size)\n"; + for (auto const& a : arena.m_freelist) { + os << " " << a.block() << ", " << a.owner() << ", " + << a.size() << "\n"; + } + } + + if (arena.m_busylist.empty()) { + os << " No busy nodes\n"; + } else { + os << " List of busy nodes: (address, owner, size)\n"; + for (auto const& a : arena.m_busylist) { + os << " " << a.block() << ", " << a.owner() << ", " + << a.size() << "\n"; + } + } + + return os; +} + } From b7408ea6e8feca7eab2f7cf30606d066b6699814 Mon Sep 17 00:00:00 2001 From: Edward Basso Date: Mon, 13 Nov 2023 18:56:02 -0800 Subject: [PATCH 3/4] solve_cg: use linop.make instead of MF constructor (#3627) ## Summary This PR replaces the explicit use of `MF` constructors in `MLCGSolverT::solve_cg` with calls to the make method of the linear operator associated with the `MLCGSolverT` object. ## Additional background This is a similar to the PR on `solve_bicgstab`. --- Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index fce7b1d5005..c99d7b319bd 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -261,17 +261,13 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) const int ncomp = sol.nComp(); - const BoxArray& ba = sol.boxArray(); - const DistributionMapping& dm = sol.DistributionMap(); - const auto& factory = sol.Factory(); - - MF p(ba, dm, ncomp, sol.nGrowVect(), MFInfo(), factory); + MF p = Lp.make(amrlev, mglev, sol.nGrowVect()); p.setVal(RT(0.0)); - MF sorig(ba, dm, ncomp, nghost, MFInfo(), factory); - MF r (ba, dm, ncomp, nghost, MFInfo(), factory); - MF z (ba, dm, ncomp, nghost, MFInfo(), factory); - MF q (ba, dm, ncomp, nghost, MFInfo(), factory); + MF sorig = Lp.make(amrlev, mglev, nghost); + MF r = Lp.make(amrlev, mglev, nghost); + MF z = Lp.make(amrlev, mglev, nghost); + MF q = Lp.make(amrlev, mglev, nghost); sorig.LocalCopy(sol,0,0,ncomp,nghost); From af1e1be8d7d41de9b999b4973b2024281a28f23e Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 14 Nov 2023 11:13:47 -0800 Subject: [PATCH 4/4] Plotfile Tools: GPU support (#3626) ## Summary Add GPU support for fcompare, fextrema, fsnapshot and fvolumesum. ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_PlotFileDataImpl.cpp | 2 +- Tools/Plotfile/fextrema.cpp | 35 ++++---- Tools/Plotfile/fsnapshot.cpp | 4 +- Tools/Plotfile/fvolumesum.cpp | 130 ++++++++++------------------ 4 files changed, 66 insertions(+), 105 deletions(-) diff --git a/Src/Base/AMReX_PlotFileDataImpl.cpp b/Src/Base/AMReX_PlotFileDataImpl.cpp index 1fbf5044a50..b85c17ad93c 100644 --- a/Src/Base/AMReX_PlotFileDataImpl.cpp +++ b/Src/Base/AMReX_PlotFileDataImpl.cpp @@ -141,7 +141,7 @@ PlotFileDataImpl::get (int level, std::string const& varname) noexcept int gid = mfi.index(); FArrayBox& dstfab = mf[mfi]; std::unique_ptr srcfab(m_vismf[level]->readFAB(gid, icomp)); - dstfab.copy(*srcfab); + dstfab.copy(*srcfab); } } return mf; diff --git a/Tools/Plotfile/fextrema.cpp b/Tools/Plotfile/fextrema.cpp index 44cfaf161d4..55596bf952d 100644 --- a/Tools/Plotfile/fextrema.cpp +++ b/Tools/Plotfile/fextrema.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -80,23 +81,23 @@ void main_main() pf.boxArray(ilev+1), ratio); for (int ivar = 0; ivar < var_names.size(); ++ivar) { const MultiFab& mf = pf.get(ilev, var_names[ivar]); - for (MFIter mfi(mf); mfi.isValid(); ++mfi) { - const Box& bx = mfi.validbox(); - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - const auto& ifab = mask.array(mfi); - const auto& fab = mf.array(mfi); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - if (ifab(i,j,k) == 0) { - vvmin[ivar] = std::min(fab(i,j,k),vvmin[ivar]); - vvmax[ivar] = std::max(fab(i,j,k),vvmax[ivar]); - } - } - } - } - } + auto const& ma = mf.const_arrays(); + auto const& ima = mask.const_arrays(); + auto rr = ParReduce(TypeList{}, + TypeList{}, mf, + [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + -> GpuTuple + { + if (ima[bno](i,j,k) == 0) { + auto x = ma[bno](i,j,k); + return {x,x}; + } else { + return {std::numeric_limits::max(), + std::numeric_limits::lowest()}; + } + }); + vvmin[ivar] = std::min(amrex::get<0>(rr), vvmin[ivar]); + vvmax[ivar] = std::max(amrex::get<1>(rr), vvmax[ivar]); } } } diff --git a/Tools/Plotfile/fsnapshot.cpp b/Tools/Plotfile/fsnapshot.cpp index e68f8a33b6d..c4b9fd3f361 100644 --- a/Tools/Plotfile/fsnapshot.cpp +++ b/Tools/Plotfile/fsnapshot.cpp @@ -278,7 +278,7 @@ void main_main() gmx = std::log10(gmx); } - BaseFab intdat; + BaseFab intdat(The_Pinned_Arena()); for (int idir = ndir_begin; idir < ndir_end; ++idir) { intdat.resize(finebox[idir],1); const int width = (idir == 0) ? finebox[idir].length(1) : finebox[idir].length(0); @@ -286,7 +286,7 @@ void main_main() const auto& intarr = intdat.array(); const auto& realarr = datamf[idir].array(0); Real fac = Real(253.999) / (gmx-gmn); - amrex::LoopOnCpu(finebox[idir], [=] (int i, int j, int k) + amrex::ParallelFor(finebox[idir], [=] AMREX_GPU_DEVICE (int i, int j, int k) { int jj = (idir == 2) ? height - 1 - j : j; // flip the data in second image direction int kk = (idir == 2) ? k : height - 1 - k; diff --git a/Tools/Plotfile/fvolumesum.cpp b/Tools/Plotfile/fvolumesum.cpp index 2f9f03cc522..ec6e461dcc7 100644 --- a/Tools/Plotfile/fvolumesum.cpp +++ b/Tools/Plotfile/fvolumesum.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -80,7 +81,6 @@ void main_main() const int dim = pf.spaceDim(); - int fine_level = pf.finestLevel(); Vector pos; @@ -95,6 +95,35 @@ void main_main() Array dx = pf.cellSize(ilev); + Real volfac = AMREX_D_TERM(dx[0], *dx[1], *dx[2]); + + if (coord == 1) { + AMREX_ALWAYS_ASSERT(AMREX_SPACEDIM == 2); + // axisymmetric V = pi (r_r**2 - r_l**2) * dz + // = pi dr * dz * (r_r + r_l) + // = 2 pi r dr dz + volfac *= 2 * pi; // 2 * pi * dr * dz part here + } else if (coord == 2) { + AMREX_ALWAYS_ASSERT(AMREX_SPACEDIM == 1); + // 1-d spherical V = 4/3 pi (r_r**3 - r_l**3) + volfac *= (4.0_rt/3.0_rt) * pi; // 4/3 * pi * dr part here + } + + auto xlo = problo[0]; + auto dx0 = dx[0]; + AMREX_ASSERT(coord == 0 || coord == 1 || coord == 2); + auto f_vol = [=] AMREX_GPU_DEVICE (int i) { + if (coord == 0) { + return volfac; + } else if (coord == 1) { + return volfac * (xlo + (Real(i)+0.5_rt)*dx0); + } else { + Real r_r = xlo + Real(i+1)*dx0; + Real r_l = xlo + Real(i )*dx0; + return volfac * (r_r*r_r + r_l*r_r + r_l*r_l); + } + }; + if (ilev < fine_level) { IntVect ratio{pf.refRatio(ilev)}; for (int idim = dim; idim < AMREX_SPACEDIM; ++idim) { @@ -103,97 +132,28 @@ void main_main() const iMultiFab mask = makeFineMask(pf.boxArray(ilev), pf.DistributionMap(ilev), pf.boxArray(ilev+1), ratio); const MultiFab& mf = pf.get(ilev, var_name); - for (MFIter mfi(mf); mfi.isValid(); ++mfi) { - const Box& bx = mfi.validbox(); - if (bx.ok()) { - const auto& m = mask.array(mfi); - const auto& fab = mf.array(mfi); - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - if (m(i,j,k) == 0) { // not covered by fine - Array p - = {AMREX_D_DECL(problo[0]+static_cast(i+0.5)*dx[0], - problo[1]+static_cast(j+0.5)*dx[1], - problo[2]+static_cast(k+0.5)*dx[2])}; - - // compute the volume - Real vol = std::numeric_limits::quiet_NaN(); - if (coord == 0) { - // Cartesian - vol = 1.0_rt; - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - vol *= dx[idim]; - } - } else if (coord == 1) { - // axisymmetric V = pi (r_r**2 - r_l**2) * dz - // = pi dr * dz * (r_r + r_l) - // = 2 pi r dr dz - vol = 2 * pi * p[0] * dx[0] * dx[1]; - } else if (coord == 2) { - // 1-d spherical V = 4/3 pi (r_r**3 - r_l**3) - Real r_r = problo[0]+static_cast(i+1)*dx[0]; - Real r_l = problo[0]+static_cast(i)*dx[0]; - vol = (4.0_rt/3.0_rt) * pi * dx[0] * (r_r*r_r + r_l*r_r + r_l*r_l); - } - - lsum += fab(i,j,k) * vol; - } - } - } - } - } - } + auto const& ima = mask.const_arrays(); + auto const& ma = mf.const_arrays(); + lsum += ParReduce(TypeList{}, TypeList{}, mf, + [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + -> GpuTuple + { + return { (ima[bno](i,j,k) == 0) ? ma[bno](i,j,k)*f_vol(i) : 0._rt }; + }); } else { const MultiFab& mf = pf.get(ilev, var_name); - for (MFIter mfi(mf); mfi.isValid(); ++mfi) { - const Box& bx = mfi.validbox(); - if (bx.ok()) { - const auto& fab = mf.array(mfi); - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - Array p - = {AMREX_D_DECL(problo[0]+static_cast(i+0.5)*dx[0], - problo[1]+static_cast(j+0.5)*dx[1], - problo[2]+static_cast(k+0.5)*dx[2])}; - - // compute the volume - Real vol = std::numeric_limits::quiet_NaN(); - if (coord == 0) { - // Cartesian - vol = 1.0_rt; - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - vol *= dx[idim]; - } - } else if (coord == 1) { - // axisymmetric V = pi (r_r**2 - r_l**2) * dz - // = pi dr * dz * (r_r + r_l) - // = 2 pi r dr dz - vol = 2 * pi * p[0] * dx[0] * dx[1]; - } else if (coord == 2) { - // 1-d spherical V = 4/3 pi (r_r**3 - r_l**3) - Real r_r = problo[0]+static_cast(i+1)*dx[0]; - Real r_l = problo[0]+static_cast(i)*dx[0]; - vol = (4.0_rt/3.0_rt) * pi * dx[0] * (r_r*r_r + r_l*r_r + r_l*r_l); - } - - lsum += fab(i,j,k) * vol; - } - } - } - } - } + auto const& ma = mf.const_arrays(); + lsum += ParReduce(TypeList{}, TypeList{}, mf, + [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + -> GpuTuple + { + return { ma[bno](i,j,k)*f_vol(i) }; + }); } } ParallelDescriptor::ReduceRealSum(lsum); - if (ParallelDescriptor::IOProcessor()) { std::cout << "integral of " << var_name << " = " << lsum << std::endl;