diff --git a/CHANGELOG.md b/CHANGELOG.md index 2cfc0df66b46..08ffd596dbdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,8 @@ - [[PR 1004]](https://github.com/parthenon-hpc-lab/parthenon/pull/1004) Allow parameter modification from an input file for restarts ### Fixed (not changing behavior/API/variables/...) +- [[PR 1150]](https://github.com/parthenon-hpc-lab/parthenon/pull/1150) Reduce memory consumption for buffer pool +- [[PR 1146]](https://github.com/parthenon-hpc-lab/parthenon/pull/1146) Fix an issue outputting >4GB single variables per rank - [[PR 1152]](https://github.com/parthenon-hpc-lab/parthenon/pull/1152) Fix memory leak in task graph outputs related to `abi::__cxa_demangle` - [[PR 1146]](https://github.com/parthenon-hpc-lab/parthenon/pull/1146) Fix an issue outputting >4GB single variables per rank - [[PR 1144]](https://github.com/parthenon-hpc-lab/parthenon/pull/1144) Fix some restarts w/non-CC fields diff --git a/src/bvals/comms/bnd_info.cpp b/src/bvals/comms/bnd_info.cpp index d30522fd6998..1505b56f956a 100644 --- a/src/bvals/comms/bnd_info.cpp +++ b/src/bvals/comms/bnd_info.cpp @@ -332,7 +332,7 @@ BndInfo BndInfo::GetSetBndInfo(MeshBlock *pmb, const NeighborBlock &nb, out.buf_allocated = false; } else { printf("%i [rank: %i] -> %i [rank: %i] (Set %s) is in state %i.\n", nb.gid, nb.rank, - pmb->gid, Globals::my_rank, v->label().c_str(), buf_state); + pmb->gid, Globals::my_rank, v->label().c_str(), static_cast(buf_state)); PARTHENON_FAIL("Buffer should be in a received state."); } return out; diff --git a/src/bvals/comms/build_boundary_buffers.cpp b/src/bvals/comms/build_boundary_buffers.cpp index 918f4d5017c2..aac532d037e6 100644 --- a/src/bvals/comms/build_boundary_buffers.cpp +++ b/src/bvals/comms/build_boundary_buffers.cpp @@ -16,10 +16,12 @@ //======================================================================================== #include +#include #include // debug #include #include #include +#include #include #include "bvals_in_one.hpp" @@ -44,25 +46,58 @@ template void BuildBoundaryBufferSubset(std::shared_ptr> &md, Mesh::comm_buf_map_t &buf_map) { Mesh *pmesh = md->GetMeshPointer(); + std::unordered_map + nbufs; // total (existing and new) number of buffers for given size + + ForEachBoundary(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) { + // Calculate the required size of the buffer for this boundary + int buf_size = GetBufferSize(pmb, nb, v); + // LR: Multigrid logic requires blocks sending messages to themselves (since the same + // block can show up on two multigrid levels). This doesn't require any data + // transfer, so the message size can be zero. It is essentially just a flag to show + // that the block is done being used on one level and can be used on the next level. + if (pmb->gid == nb.gid && nb.offsets.IsCell()) buf_size = 0; + + nbufs[buf_size] += 1; // relying on value init of int to 0 for initial entry + }); + ForEachBoundary(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) { // Calculate the required size of the buffer for this boundary int buf_size = GetBufferSize(pmb, nb, v); + // See comment above on the same logic. if (pmb->gid == nb.gid && nb.offsets.IsCell()) buf_size = 0; // Add a buffer pool if one does not exist for this size + using buf_t = buf_pool_t::base_t; if (pmesh->pool_map.count(buf_size) == 0) { - pmesh->pool_map.emplace(std::make_pair( - buf_size, buf_pool_t([buf_size](buf_pool_t *pool) { - using buf_t = buf_pool_t::base_t; - // TODO(LFR): Make nbuf a user settable parameter - const int nbuf = 200; - buf_t chunk("pool buffer", buf_size * nbuf); + // Might be worth discussing what a good default is. + // Using the number of packs, assumes that all blocks in a pack have fairly similar + // buffer configurations, which may or may not be a good approximation. + // An alternative would be "1", which would reduce the memory footprint, but + // increase the number of individual memory allocations. + const int64_t nbuf = pmesh->DefaultNumPartitions(); + pmesh->pool_map.emplace( + buf_size, buf_pool_t([buf_size, nbuf](buf_pool_t *pool) { + const auto pool_size = nbuf * buf_size; + buf_t chunk("pool buffer", pool_size); for (int i = 1; i < nbuf; ++i) { pool->AddFreeObjectToPool( buf_t(chunk, std::make_pair(i * buf_size, (i + 1) * buf_size))); } return buf_t(chunk, std::make_pair(0, buf_size)); - }))); + })); + } + // Now that the pool is guaranteed to exist we can add free objects of the required + // amount. + auto &pool = pmesh->pool_map.at(buf_size); + const std::int64_t new_buffers_req = nbufs.at(buf_size) - pool.NumBuffersInPool(); + if (new_buffers_req > 0) { + const auto pool_size = new_buffers_req * buf_size; + buf_t chunk("pool buffer", pool_size); + for (int i = 0; i < new_buffers_req; ++i) { + pool.AddFreeObjectToPool( + buf_t(chunk, std::make_pair(i * buf_size, (i + 1) * buf_size))); + } } const int receiver_rank = nb.rank; diff --git a/src/utils/object_pool.hpp b/src/utils/object_pool.hpp index 89167334277f..c7452499f126 100644 --- a/src/utils/object_pool.hpp +++ b/src/utils/object_pool.hpp @@ -62,6 +62,8 @@ class ObjectPool { std::cout << inuse_.size() << " used objects." << std::endl; } + auto NumBuffersInPool() const { return inuse_.size() + available_.size(); } + std::uint64_t SizeInBytes() const { constexpr std::uint64_t datum_size = sizeof(typename base_t::value_type); std::uint64_t object_size = 0;