From 57b7304ebe1d3687f6ac66b6172e4c69bb7d7aec Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Thu, 8 Aug 2024 21:28:37 +0200 Subject: [PATCH 1/5] Reduce memory consumption for buffer pool --- src/bvals/comms/build_boundary_buffers.cpp | 39 +++++++++++++++++----- src/utils/object_pool.hpp | 4 +++ 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/bvals/comms/build_boundary_buffers.cpp b/src/bvals/comms/build_boundary_buffers.cpp index 918f4d5017c2..a4c268d39d35 100644 --- a/src/bvals/comms/build_boundary_buffers.cpp +++ b/src/bvals/comms/build_boundary_buffers.cpp @@ -16,10 +16,12 @@ //======================================================================================== #include +#include #include // debug #include #include #include +#include #include #include "bvals_in_one.hpp" @@ -44,25 +46,46 @@ template void BuildBoundaryBufferSubset(std::shared_ptr> &md, Mesh::comm_buf_map_t &buf_map) { Mesh *pmesh = md->GetMeshPointer(); + std::unordered_map nbufs; + + ForEachBoundary(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) { + // Calculate the required size of the buffer for this boundary + int buf_size = GetBufferSize(pmb, nb, v); + if (pmb->gid == nb.gid && nb.offsets.IsCell()) buf_size = 0; + + nbufs[buf_size] += 1; // relying on value init of int to 0 for initial entry + }); + ForEachBoundary(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) { // Calculate the required size of the buffer for this boundary int buf_size = GetBufferSize(pmb, nb, v); if (pmb->gid == nb.gid && nb.offsets.IsCell()) buf_size = 0; // Add a buffer pool if one does not exist for this size + using buf_t = buf_pool_t::base_t; if (pmesh->pool_map.count(buf_size) == 0) { - pmesh->pool_map.emplace(std::make_pair( - buf_size, buf_pool_t([buf_size](buf_pool_t *pool) { - using buf_t = buf_pool_t::base_t; - // TODO(LFR): Make nbuf a user settable parameter - const int nbuf = 200; - buf_t chunk("pool buffer", buf_size * nbuf); - for (int i = 1; i < nbuf; ++i) { + pmesh->pool_map.emplace( + buf_size, buf_pool_t([buf_size, &nbufs](buf_pool_t *pool) { + const auto pool_size = static_cast(nbufs[buf_size]) * buf_size; + buf_t chunk("pool buffer", pool_size); + for (int i = 1; i < nbufs[buf_size]; ++i) { pool->AddFreeObjectToPool( buf_t(chunk, std::make_pair(i * buf_size, (i + 1) * buf_size))); } return buf_t(chunk, std::make_pair(0, buf_size)); - }))); + })); + // or add to existing pool (if required) + } else { + auto pool = pmesh->pool_map[buf_size]; + const auto new_buffers_req = nbufs[buf_size] - pool.NumAvailable(); + if (new_buffers_req > 1) { + const auto pool_size = static_cast(new_buffers_req) * buf_size; + buf_t chunk("pool buffer", pool_size); + for (int i = 1; i < new_buffers_req; ++i) { + pool.AddFreeObjectToPool( + buf_t(chunk, std::make_pair(i * buf_size, (i + 1) * buf_size))); + } + } } const int receiver_rank = nb.rank; diff --git a/src/utils/object_pool.hpp b/src/utils/object_pool.hpp index 89167334277f..f69efad3810f 100644 --- a/src/utils/object_pool.hpp +++ b/src/utils/object_pool.hpp @@ -62,6 +62,10 @@ class ObjectPool { std::cout << inuse_.size() << " used objects." << std::endl; } + auto NumAvailable() const { + return available_.size(); + } + std::uint64_t SizeInBytes() const { constexpr std::uint64_t datum_size = sizeof(typename base_t::value_type); std::uint64_t object_size = 0; From 5522e48181f751ba2741471aa972bd41e8b632b0 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Fri, 9 Aug 2024 09:59:42 +0200 Subject: [PATCH 2/5] Only count vars for allocated variables --- src/bvals/comms/bnd_info.cpp | 2 +- src/bvals/comms/build_boundary_buffers.cpp | 10 ++++++---- src/utils/object_pool.hpp | 4 +--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/bvals/comms/bnd_info.cpp b/src/bvals/comms/bnd_info.cpp index d30522fd6998..1505b56f956a 100644 --- a/src/bvals/comms/bnd_info.cpp +++ b/src/bvals/comms/bnd_info.cpp @@ -332,7 +332,7 @@ BndInfo BndInfo::GetSetBndInfo(MeshBlock *pmb, const NeighborBlock &nb, out.buf_allocated = false; } else { printf("%i [rank: %i] -> %i [rank: %i] (Set %s) is in state %i.\n", nb.gid, nb.rank, - pmb->gid, Globals::my_rank, v->label().c_str(), buf_state); + pmb->gid, Globals::my_rank, v->label().c_str(), static_cast(buf_state)); PARTHENON_FAIL("Buffer should be in a received state."); } return out; diff --git a/src/bvals/comms/build_boundary_buffers.cpp b/src/bvals/comms/build_boundary_buffers.cpp index a4c268d39d35..800443381a51 100644 --- a/src/bvals/comms/build_boundary_buffers.cpp +++ b/src/bvals/comms/build_boundary_buffers.cpp @@ -53,7 +53,9 @@ void BuildBoundaryBufferSubset(std::shared_ptr> &md, int buf_size = GetBufferSize(pmb, nb, v); if (pmb->gid == nb.gid && nb.offsets.IsCell()) buf_size = 0; - nbufs[buf_size] += 1; // relying on value init of int to 0 for initial entry + if (v->IsAllocated()) { + nbufs[buf_size] += 1; // relying on value init of int to 0 for initial entry + } }); ForEachBoundary(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) { @@ -64,7 +66,7 @@ void BuildBoundaryBufferSubset(std::shared_ptr> &md, // Add a buffer pool if one does not exist for this size using buf_t = buf_pool_t::base_t; if (pmesh->pool_map.count(buf_size) == 0) { - pmesh->pool_map.emplace( + pmesh->pool_map.emplace(std::make_pair( buf_size, buf_pool_t([buf_size, &nbufs](buf_pool_t *pool) { const auto pool_size = static_cast(nbufs[buf_size]) * buf_size; buf_t chunk("pool buffer", pool_size); @@ -73,10 +75,10 @@ void BuildBoundaryBufferSubset(std::shared_ptr> &md, buf_t(chunk, std::make_pair(i * buf_size, (i + 1) * buf_size))); } return buf_t(chunk, std::make_pair(0, buf_size)); - })); + }))); // or add to existing pool (if required) } else { - auto pool = pmesh->pool_map[buf_size]; + auto &pool = pmesh->pool_map.at(buf_size); const auto new_buffers_req = nbufs[buf_size] - pool.NumAvailable(); if (new_buffers_req > 1) { const auto pool_size = static_cast(new_buffers_req) * buf_size; diff --git a/src/utils/object_pool.hpp b/src/utils/object_pool.hpp index f69efad3810f..e758a7867fe2 100644 --- a/src/utils/object_pool.hpp +++ b/src/utils/object_pool.hpp @@ -62,9 +62,7 @@ class ObjectPool { std::cout << inuse_.size() << " used objects." << std::endl; } - auto NumAvailable() const { - return available_.size(); - } + auto NumAvailable() const { return available_.size(); } std::uint64_t SizeInBytes() const { constexpr std::uint64_t datum_size = sizeof(typename base_t::value_type); From 4bef0509faa15b201f4e248aedc517f0765adbd7 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Fri, 9 Aug 2024 11:15:43 +0200 Subject: [PATCH 3/5] printf debugging... --- src/bvals/comms/build_boundary_buffers.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/bvals/comms/build_boundary_buffers.cpp b/src/bvals/comms/build_boundary_buffers.cpp index 800443381a51..44d2b1af9012 100644 --- a/src/bvals/comms/build_boundary_buffers.cpp +++ b/src/bvals/comms/build_boundary_buffers.cpp @@ -53,11 +53,12 @@ void BuildBoundaryBufferSubset(std::shared_ptr> &md, int buf_size = GetBufferSize(pmb, nb, v); if (pmb->gid == nb.gid && nb.offsets.IsCell()) buf_size = 0; - if (v->IsAllocated()) { - nbufs[buf_size] += 1; // relying on value init of int to 0 for initial entry - } + nbufs[buf_size] += 1; // relying on value init of int to 0 for initial entry + std::cerr << "\nnbufs[" << buf_size << "] += 1, which is now " << nbufs[buf_size]; }); + std::cerr << "\nbefore nbufs.size = " << nbufs.size(); + ForEachBoundary(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) { // Calculate the required size of the buffer for this boundary int buf_size = GetBufferSize(pmb, nb, v); @@ -66,6 +67,9 @@ void BuildBoundaryBufferSubset(std::shared_ptr> &md, // Add a buffer pool if one does not exist for this size using buf_t = buf_pool_t::base_t; if (pmesh->pool_map.count(buf_size) == 0) { + std::cerr << "\ninside nbufs.size = " << nbufs.size(); + std::cerr << "\nGoing to request " << nbufs[buf_size] << " buffers of size " + << buf_size; pmesh->pool_map.emplace(std::make_pair( buf_size, buf_pool_t([buf_size, &nbufs](buf_pool_t *pool) { const auto pool_size = static_cast(nbufs[buf_size]) * buf_size; From 3027cd0a34dbc8293d535b9172ff131e268e77a5 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Mon, 12 Aug 2024 15:07:17 +0200 Subject: [PATCH 4/5] Fix allocation patterns --- src/bvals/comms/build_boundary_buffers.cpp | 55 +++++++++++++--------- src/utils/object_pool.hpp | 2 +- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/bvals/comms/build_boundary_buffers.cpp b/src/bvals/comms/build_boundary_buffers.cpp index 44d2b1af9012..ca49d3210d0a 100644 --- a/src/bvals/comms/build_boundary_buffers.cpp +++ b/src/bvals/comms/build_boundary_buffers.cpp @@ -46,7 +46,8 @@ template void BuildBoundaryBufferSubset(std::shared_ptr> &md, Mesh::comm_buf_map_t &buf_map) { Mesh *pmesh = md->GetMeshPointer(); - std::unordered_map nbufs; + std::unordered_map + nbufs; // total (existing and new) number of buffers for given size ForEachBoundary(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) { // Calculate the required size of the buffer for this boundary @@ -54,11 +55,8 @@ void BuildBoundaryBufferSubset(std::shared_ptr> &md, if (pmb->gid == nb.gid && nb.offsets.IsCell()) buf_size = 0; nbufs[buf_size] += 1; // relying on value init of int to 0 for initial entry - std::cerr << "\nnbufs[" << buf_size << "] += 1, which is now " << nbufs[buf_size]; }); - std::cerr << "\nbefore nbufs.size = " << nbufs.size(); - ForEachBoundary(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) { // Calculate the required size of the buffer for this boundary int buf_size = GetBufferSize(pmb, nb, v); @@ -67,30 +65,41 @@ void BuildBoundaryBufferSubset(std::shared_ptr> &md, // Add a buffer pool if one does not exist for this size using buf_t = buf_pool_t::base_t; if (pmesh->pool_map.count(buf_size) == 0) { - std::cerr << "\ninside nbufs.size = " << nbufs.size(); - std::cerr << "\nGoing to request " << nbufs[buf_size] << " buffers of size " - << buf_size; - pmesh->pool_map.emplace(std::make_pair( - buf_size, buf_pool_t([buf_size, &nbufs](buf_pool_t *pool) { - const auto pool_size = static_cast(nbufs[buf_size]) * buf_size; + std::cerr << "Setting up a new pool for buffers of size " << buf_size << "\n"; + // Might be worth discussing what a good default is. + // Using the number of packs, assumes that all blocks in a pack have fairly similar + // buffer configurations, which may or may not be a good approximation. + // An alternative would be "1", which would reduce the memory footprint, but + // increase the number of individual memory allocations. + const int64_t nbuf = pmesh->DefaultNumPartitions(); + pmesh->pool_map.emplace( + buf_size, buf_pool_t([buf_size, nbuf](buf_pool_t *pool) { + std::cerr << "Dynamically adding " << nbuf << " buffers of size " << buf_size + << " to a pool with current size " << pool->NumBuffersInPool() + << " and future size " << pool->NumBuffersInPool() + nbuf << "\n"; + + const auto pool_size = nbuf * buf_size; buf_t chunk("pool buffer", pool_size); - for (int i = 1; i < nbufs[buf_size]; ++i) { + for (int i = 1; i < nbuf; ++i) { pool->AddFreeObjectToPool( buf_t(chunk, std::make_pair(i * buf_size, (i + 1) * buf_size))); } return buf_t(chunk, std::make_pair(0, buf_size)); - }))); - // or add to existing pool (if required) - } else { - auto &pool = pmesh->pool_map.at(buf_size); - const auto new_buffers_req = nbufs[buf_size] - pool.NumAvailable(); - if (new_buffers_req > 1) { - const auto pool_size = static_cast(new_buffers_req) * buf_size; - buf_t chunk("pool buffer", pool_size); - for (int i = 1; i < new_buffers_req; ++i) { - pool.AddFreeObjectToPool( - buf_t(chunk, std::make_pair(i * buf_size, (i + 1) * buf_size))); - } + })); + } + // Now that the pool is guaranteed to exist we can add free objects of the required + // amount. + auto &pool = pmesh->pool_map.at(buf_size); + const std::int64_t new_buffers_req = nbufs.at(buf_size) - pool.NumBuffersInPool(); + if (new_buffers_req > 0) { + std::cerr << "Reserving " << new_buffers_req << " new buffers of size " << buf_size + << " to pool with " << pool.NumBuffersInPool() << " buffers because " + << nbufs.at(buf_size) << " are required in total.\n"; + const auto pool_size = new_buffers_req * buf_size; + buf_t chunk("pool buffer", pool_size); + for (int i = 0; i < new_buffers_req; ++i) { + pool.AddFreeObjectToPool( + buf_t(chunk, std::make_pair(i * buf_size, (i + 1) * buf_size))); } } diff --git a/src/utils/object_pool.hpp b/src/utils/object_pool.hpp index e758a7867fe2..c7452499f126 100644 --- a/src/utils/object_pool.hpp +++ b/src/utils/object_pool.hpp @@ -62,7 +62,7 @@ class ObjectPool { std::cout << inuse_.size() << " used objects." << std::endl; } - auto NumAvailable() const { return available_.size(); } + auto NumBuffersInPool() const { return inuse_.size() + available_.size(); } std::uint64_t SizeInBytes() const { constexpr std::uint64_t datum_size = sizeof(typename base_t::value_type); From ae73a4a51d193a7d29f723ed77ce7359cad289cc Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Thu, 22 Aug 2024 16:27:17 +0200 Subject: [PATCH 5/5] Cleanup PR and include changelog --- CHANGELOG.md | 2 ++ src/bvals/comms/build_boundary_buffers.cpp | 13 +++++-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2cfc0df66b46..08ffd596dbdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,8 @@ - [[PR 1004]](https://github.com/parthenon-hpc-lab/parthenon/pull/1004) Allow parameter modification from an input file for restarts ### Fixed (not changing behavior/API/variables/...) +- [[PR 1150]](https://github.com/parthenon-hpc-lab/parthenon/pull/1150) Reduce memory consumption for buffer pool +- [[PR 1146]](https://github.com/parthenon-hpc-lab/parthenon/pull/1146) Fix an issue outputting >4GB single variables per rank - [[PR 1152]](https://github.com/parthenon-hpc-lab/parthenon/pull/1152) Fix memory leak in task graph outputs related to `abi::__cxa_demangle` - [[PR 1146]](https://github.com/parthenon-hpc-lab/parthenon/pull/1146) Fix an issue outputting >4GB single variables per rank - [[PR 1144]](https://github.com/parthenon-hpc-lab/parthenon/pull/1144) Fix some restarts w/non-CC fields diff --git a/src/bvals/comms/build_boundary_buffers.cpp b/src/bvals/comms/build_boundary_buffers.cpp index ca49d3210d0a..aac532d037e6 100644 --- a/src/bvals/comms/build_boundary_buffers.cpp +++ b/src/bvals/comms/build_boundary_buffers.cpp @@ -52,6 +52,10 @@ void BuildBoundaryBufferSubset(std::shared_ptr> &md, ForEachBoundary(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) { // Calculate the required size of the buffer for this boundary int buf_size = GetBufferSize(pmb, nb, v); + // LR: Multigrid logic requires blocks sending messages to themselves (since the same + // block can show up on two multigrid levels). This doesn't require any data + // transfer, so the message size can be zero. It is essentially just a flag to show + // that the block is done being used on one level and can be used on the next level. if (pmb->gid == nb.gid && nb.offsets.IsCell()) buf_size = 0; nbufs[buf_size] += 1; // relying on value init of int to 0 for initial entry @@ -60,12 +64,12 @@ void BuildBoundaryBufferSubset(std::shared_ptr> &md, ForEachBoundary(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) { // Calculate the required size of the buffer for this boundary int buf_size = GetBufferSize(pmb, nb, v); + // See comment above on the same logic. if (pmb->gid == nb.gid && nb.offsets.IsCell()) buf_size = 0; // Add a buffer pool if one does not exist for this size using buf_t = buf_pool_t::base_t; if (pmesh->pool_map.count(buf_size) == 0) { - std::cerr << "Setting up a new pool for buffers of size " << buf_size << "\n"; // Might be worth discussing what a good default is. // Using the number of packs, assumes that all blocks in a pack have fairly similar // buffer configurations, which may or may not be a good approximation. @@ -74,10 +78,6 @@ void BuildBoundaryBufferSubset(std::shared_ptr> &md, const int64_t nbuf = pmesh->DefaultNumPartitions(); pmesh->pool_map.emplace( buf_size, buf_pool_t([buf_size, nbuf](buf_pool_t *pool) { - std::cerr << "Dynamically adding " << nbuf << " buffers of size " << buf_size - << " to a pool with current size " << pool->NumBuffersInPool() - << " and future size " << pool->NumBuffersInPool() + nbuf << "\n"; - const auto pool_size = nbuf * buf_size; buf_t chunk("pool buffer", pool_size); for (int i = 1; i < nbuf; ++i) { @@ -92,9 +92,6 @@ void BuildBoundaryBufferSubset(std::shared_ptr> &md, auto &pool = pmesh->pool_map.at(buf_size); const std::int64_t new_buffers_req = nbufs.at(buf_size) - pool.NumBuffersInPool(); if (new_buffers_req > 0) { - std::cerr << "Reserving " << new_buffers_req << " new buffers of size " << buf_size - << " to pool with " << pool.NumBuffersInPool() << " buffers because " - << nbufs.at(buf_size) << " are required in total.\n"; const auto pool_size = new_buffers_req * buf_size; buf_t chunk("pool buffer", pool_size); for (int i = 0; i < new_buffers_req; ++i) {