Skip to content

Commit

Permalink
Merge pull request #1119 from parthenon-hpc-lab/lroberts36/refactor-p…
Browse files Browse the repository at this point in the history
…artitioning

Refactor `MeshBlock` Partitioning
  • Loading branch information
lroberts36 authored Jun 26, 2024
2 parents a44da26 + c17b01f commit f7a1908
Show file tree
Hide file tree
Showing 27 changed files with 247 additions and 236 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Current develop

### Added (new features/APIs/variables/...)
- [[PR 1119]](https://github.com/parthenon-hpc-lab/parthenon/pull/1119) Formalize MeshData partitioning.
- [[PR 1128]](https://github.com/parthenon-hpc-lab/parthenon/pull/1128) Add cycle and nbtotal to hst
- [[PR 1099]](https://github.com/parthenon-hpc-lab/parthenon/pull/1099) Functionality for outputting task graphs in GraphViz format.
- [[PR 1091]](https://github.com/parthenon-hpc-lab/parthenon/pull/1091) Add vector wave equation example.
Expand Down
34 changes: 14 additions & 20 deletions example/advection/advection_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,27 +62,21 @@ TaskCollection AdvectionDriver::MakeTaskCollection(BlockList_t &blocks, const in
const Real dt = integrator->dt;
const auto &stage_name = integrator->stage_name;

// first make other useful containers
if (stage == 1) {
for (int i = 0; i < blocks.size(); i++) {
auto &pmb = blocks[i];
// first make other useful containers
auto &base = pmb->meshblock_data.Get();
pmb->meshblock_data.Add("dUdt", base);
for (int s = 1; s < integrator->nstages; s++)
pmb->meshblock_data.Add(stage_name[s], base);
}
}

const int num_partitions = pmesh->DefaultNumPartitions();

auto partitions = pmesh->GetDefaultBlockPartitions();
int num_partitions = partitions.size();
// note that task within this region that contains one tasklist per pack
// could still be executed in parallel
TaskRegion &single_tasklist_per_pack_region2 = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
auto &tl = single_tasklist_per_pack_region2[i];
auto &mc0 = pmesh->mesh_data.GetOrAdd(stage_name[stage - 1], i);
auto &mc1 = pmesh->mesh_data.GetOrAdd(stage_name[stage], i);
// Initialize the base MeshData for this partition
// (this automatically initializes the MeshBlockData objects
// required by this MeshData object)
auto &mbase = pmesh->mesh_data.Add("base", partitions[i]);
// Initialize other MeshData objects based on the base container
auto &mc0 = pmesh->mesh_data.Add(stage_name[stage - 1], mbase);
auto &mc1 = pmesh->mesh_data.Add(stage_name[stage], mbase);
auto &mdudt = pmesh->mesh_data.Add("dUdt", mbase);

const auto any = parthenon::BoundaryType::any;

Expand Down Expand Up @@ -119,10 +113,10 @@ TaskCollection AdvectionDriver::MakeTaskCollection(BlockList_t &blocks, const in
TaskRegion &single_tasklist_per_pack_region = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
auto &tl = single_tasklist_per_pack_region[i];
auto &mbase = pmesh->mesh_data.GetOrAdd("base", i);
auto &mc0 = pmesh->mesh_data.GetOrAdd(stage_name[stage - 1], i);
auto &mc1 = pmesh->mesh_data.GetOrAdd(stage_name[stage], i);
auto &mdudt = pmesh->mesh_data.GetOrAdd("dUdt", i);
auto &mbase = pmesh->mesh_data.Add("base", partitions[i]);
auto &mc0 = pmesh->mesh_data.Add(stage_name[stage - 1], mbase);
auto &mc1 = pmesh->mesh_data.Add(stage_name[stage], mbase);
auto &mdudt = pmesh->mesh_data.Add("dUdt", mbase);

auto set_flx = parthenon::AddFluxCorrectionTasks(none, tl, mc0, pmesh->multilevel);

Expand Down
5 changes: 3 additions & 2 deletions example/calculate_pi/pi_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,15 @@ TaskCollection PiDriver::MakeTaskCollection(T &blocks) {
using calculate_pi::ComputeArea;
TaskCollection tc;

const int num_partitions = pmesh->DefaultNumPartitions();
auto partitions = pmesh->GetDefaultBlockPartitions();
const int num_partitions = partitions.size();
ParArrayHost<Real> areas("areas", num_partitions);
TaskRegion &async_region = tc.AddRegion(num_partitions);
{
// asynchronous region where area is computed per partition
for (int i = 0; i < num_partitions; i++) {
TaskID none(0);
auto &md = pmesh->mesh_data.GetOrAdd("base", i);
auto &md = pmesh->mesh_data.Add("base", partitions[i]);
auto get_area = async_region[i].AddTask(none, ComputeArea, md, areas, i);
}
}
Expand Down
14 changes: 7 additions & 7 deletions example/fine_advection/advection_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,15 @@ TaskCollection AdvectionDriver::MakeTaskCollection(BlockList_t &blocks, const in
const Real beta = integrator->beta[stage - 1];
const Real dt = integrator->dt;

const int num_partitions = pmesh->DefaultNumPartitions();
TaskRegion &single_tasklist_per_pack_region = tc.AddRegion(num_partitions);
auto partitions = pmesh->GetDefaultBlockPartitions();
TaskRegion &single_tasklist_per_pack_region = tc.AddRegion(partitions.size());

for (int i = 0; i < num_partitions; i++) {
for (int i = 0; i < partitions.size(); i++) {
auto &tl = single_tasklist_per_pack_region[i];
auto &mbase = pmesh->mesh_data.GetOrAdd("base", i);
auto &mc0 = pmesh->mesh_data.GetOrAdd(stage_name[stage - 1], i);
auto &mc1 = pmesh->mesh_data.GetOrAdd(stage_name[stage], i);
auto &mdudt = pmesh->mesh_data.GetOrAdd("dUdt", i);
auto &mbase = pmesh->mesh_data.Add("base", partitions[i]);
auto &mc0 = pmesh->mesh_data.Add(stage_name[stage - 1], mbase);
auto &mc1 = pmesh->mesh_data.Add(stage_name[stage], mbase);
auto &mdudt = pmesh->mesh_data.Add("dUdt", mbase);

auto start_send = tl.AddTask(none, parthenon::StartReceiveBoundaryBuffers, mc1);
auto start_flxcor = tl.AddTask(none, parthenon::StartReceiveFluxCorrections, mc0);
Expand Down
5 changes: 3 additions & 2 deletions example/particle_leapfrog/particle_leapfrog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,8 @@ TaskCollection ParticleDriver::MakeParticlesUpdateTaskCollection() const {
TaskID none(0);
const BlockList_t &blocks = pmesh->block_list;

const int num_partitions = pmesh->DefaultNumPartitions();
auto partitions = pmesh->GetDefaultBlockPartitions();
const int num_partitions = partitions.size();
const int num_task_lists_executed_independently = blocks.size();

TaskRegion &sync_region0 = tc.AddRegion(1);
Expand All @@ -304,7 +305,7 @@ TaskCollection ParticleDriver::MakeParticlesUpdateTaskCollection() const {
TaskRegion &tr = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
auto &tl = tr[i];
auto &base = pmesh->mesh_data.GetOrAdd("base", i);
auto &base = pmesh->mesh_data.Add("base", partitions[i]);
auto transport = tl.AddTask(none, TransportParticles, base.get(), &integrator);
}

Expand Down
11 changes: 6 additions & 5 deletions example/particle_tracers/particle_tracers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,16 +429,17 @@ TaskCollection ParticleDriver::MakeTaskCollection(BlockList_t &blocks, int stage
auto advect_flux = tl.AddTask(none, tracers_example::CalculateFluxes, sc0.get());
}

const int num_partitions = pmesh->DefaultNumPartitions();
auto partitions = pmesh->GetDefaultBlockPartitions();
const int num_partitions = partitions.size();
// note that task within this region that contains one tasklist per pack
// could still be executed in parallel
TaskRegion &single_tasklist_per_pack_region = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
auto &tl = single_tasklist_per_pack_region[i];
auto &mbase = pmesh->mesh_data.GetOrAdd("base", i);
auto &mc0 = pmesh->mesh_data.GetOrAdd(stage_name[stage - 1], i);
auto &mc1 = pmesh->mesh_data.GetOrAdd(stage_name[stage], i);
auto &mdudt = pmesh->mesh_data.GetOrAdd("dUdt", i);
auto &mbase = pmesh->mesh_data.Add("base", partitions[i]);
auto &mc0 = pmesh->mesh_data.Add(stage_name[stage - 1], mbase);
auto &mc1 = pmesh->mesh_data.Add(stage_name[stage], mbase);
auto &mdudt = pmesh->mesh_data.Add("dUdt", mbase);

const auto any = parthenon::BoundaryType::any;

Expand Down
7 changes: 4 additions & 3 deletions example/poisson/poisson_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) {
auto fail_flag = pkg->Param<bool>("fail_without_convergence");
auto warn_flag = pkg->Param<bool>("warn_without_convergence");

const int num_partitions = pmesh->DefaultNumPartitions();
auto partitions = pmesh->GetDefaultBlockPartitions();
const int num_partitions = partitions.size();
TaskRegion &solver_region = tc.AddRegion(num_partitions);

// setup some reductions
Expand All @@ -72,8 +73,8 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) {
pkg->MutableParam<AllReduce<HostArray1D<Real>>>("view_reduce");
for (int i = 0; i < num_partitions; i++) {
// make/get a mesh_data container for the state
auto &md = pmesh->mesh_data.GetOrAdd("base", i);
auto &mdelta = pmesh->mesh_data.GetOrAdd("delta", i);
auto &md = pmesh->mesh_data.Add("base", partitions[i]);
auto &mdelta = pmesh->mesh_data.Add("delta", md);

TaskList &tl = solver_region[i];

Expand Down
5 changes: 3 additions & 2 deletions example/poisson_gmg/poisson_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,12 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) {
pkg->MutableParam<parthenon::solvers::BiCGSTABSolver<u, rhs, PoissonEquation>>(
"MGBiCGSTABsolver");

const int num_partitions = pmesh->DefaultNumPartitions();
auto partitions = pmesh->GetDefaultBlockPartitions();
const int num_partitions = partitions.size();
TaskRegion &region = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; ++i) {
TaskList &tl = region[i];
auto &md = pmesh->mesh_data.GetOrAdd("base", i);
auto &md = pmesh->mesh_data.Add("base", partitions[i]);

// Possibly set rhs <- A.u_exact for a given u_exact so that the exact solution is
// known when we solve A.u = rhs
Expand Down
24 changes: 8 additions & 16 deletions example/sparse_advection/sparse_advection_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,34 +74,26 @@ TaskCollection SparseAdvectionDriver::MakeTaskCollection(BlockList_t &blocks,
for (int i = 0; i < blocks.size(); i++) {
auto &pmb = blocks[i];
auto &tl = async_region1[i];
// first make other useful containers
if (stage == 1) {
auto &base = pmb->meshblock_data.Get();
pmb->meshblock_data.Add("dUdt", base);
for (int i = 1; i < integrator->nstages; i++)
pmb->meshblock_data.Add(stage_name[i], base);
}

// pull out the container we'll use to get fluxes and/or compute RHSs
auto &sc0 = pmb->meshblock_data.Get(stage_name[stage - 1]);
// pull out the container that will hold the updated state
// effectively, sc1 = sc0 + dudt*dt
auto &sc1 = pmb->meshblock_data.Get(stage_name[stage]);
auto &base = pmb->meshblock_data.Add("base", pmb);
auto &sc0 = pmb->meshblock_data.Add(stage_name[stage - 1], base);

auto advect_flux =
tl.AddTask(none, TF(sparse_advection_package::CalculateFluxes), sc0);
}

const int num_partitions = pmesh->DefaultNumPartitions();
auto partitions = pmesh->GetDefaultBlockPartitions();
const int num_partitions = partitions.size();
// note that task within this region that contains one tasklist per pack
// could still be executed in parallel
TaskRegion &single_tasklist_per_pack_region = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
auto &tl = single_tasklist_per_pack_region[i];
auto &mbase = pmesh->mesh_data.GetOrAdd("base", i);
auto &mc0 = pmesh->mesh_data.GetOrAdd(stage_name[stage - 1], i);
auto &mc1 = pmesh->mesh_data.GetOrAdd(stage_name[stage], i);
auto &mdudt = pmesh->mesh_data.GetOrAdd("dUdt", i);
auto &mbase = pmesh->mesh_data.Add("base", partitions[i]);
auto &mc0 = pmesh->mesh_data.Add(stage_name[stage - 1], mbase);
auto &mc1 = pmesh->mesh_data.Add(stage_name[stage], mbase);
auto &mdudt = pmesh->mesh_data.Add("dUdt", mbase);

const auto any = parthenon::BoundaryType::any;
auto start_flxcor = tl.AddTask(none, TF(parthenon::StartReceiveFluxCorrections), mc0);
Expand Down
16 changes: 9 additions & 7 deletions example/stochastic_subgrid/stochastic_subgrid_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,11 @@ TaskCollection StochasticSubgridDriver::MakeTaskCollection(BlockList_t &blocks,

// sample number of iterations task
{
const int num_partitions = pmesh->DefaultNumPartitions();
auto partitions = pmesh->GetDefaultBlockPartitions();
const int num_partitions = partitions.size();
TaskRegion &async_region = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
auto &md = pmesh->mesh_data.GetOrAdd("base", i);
auto &md = pmesh->mesh_data.Add("base", partitions[i]);
async_region[i].AddTask(none, ComputeNumIter, md, pmesh->packages);
}
}
Expand Down Expand Up @@ -114,17 +115,18 @@ TaskCollection StochasticSubgridDriver::MakeTaskCollection(BlockList_t &blocks,
{
const Real beta = integrator->beta[stage - 1];
const Real dt = integrator->dt;
const int num_partitions = pmesh->DefaultNumPartitions();
auto partitions = pmesh->GetDefaultBlockPartitions();
const int num_partitions = partitions.size();

// note that task within this region that contains one tasklist per pack
// could still be executed in parallel
TaskRegion &single_tasklist_per_pack_region = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
auto &tl = single_tasklist_per_pack_region[i];
auto &mbase = pmesh->mesh_data.GetOrAdd("base", i);
auto &mc0 = pmesh->mesh_data.GetOrAdd(stage_name[stage - 1], i);
auto &mc1 = pmesh->mesh_data.GetOrAdd(stage_name[stage], i);
auto &mdudt = pmesh->mesh_data.GetOrAdd("dUdt", i);
auto &mbase = pmesh->mesh_data.Add("base", partitions[i]);
auto &mc0 = pmesh->mesh_data.Add(stage_name[stage - 1], mbase);
auto &mc1 = pmesh->mesh_data.Add(stage_name[stage], mbase);
auto &mdudt = pmesh->mesh_data.Add("dUdt", mbase);

const auto any = parthenon::BoundaryType::any;

Expand Down
7 changes: 6 additions & 1 deletion src/basic_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ enum class BoundaryType : int {
gmg_prolongate_recv
};

enum class GridType { none, leaf, two_level_composite, single_level_with_internal };
enum class GridType : int { none, leaf, two_level_composite, single_level_with_internal };
struct GridIdentifier {
GridType type = GridType::none;
int logical_level = 0;
Expand All @@ -87,6 +87,11 @@ struct GridIdentifier {
return GridIdentifier{GridType::two_level_composite, level};
}
};
// Add a comparator so we can store in std::map
inline bool operator<(const GridIdentifier &lhs, const GridIdentifier &rhs) {
if (lhs.type != rhs.type) return lhs.type < rhs.type;
return lhs.logical_level < rhs.logical_level;
}

constexpr bool IsSender(BoundaryType btype) {
if (btype == BoundaryType::flxcor_recv) return false;
Expand Down
2 changes: 0 additions & 2 deletions src/bvals/comms/build_boundary_buffers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,6 @@ void BuildBoundaryBufferSubset(std::shared_ptr<MeshData<Real>> &md,
}
} // namespace

// pmesh->boundary_comm_map.clear() after every remesh
// in InitializeBlockTimeStepsAndBoundaries()
TaskStatus BuildBoundaryBuffers(std::shared_ptr<MeshData<Real>> &md) {
PARTHENON_INSTRUMENT
Mesh *pmesh = md->GetMeshPointer();
Expand Down
6 changes: 5 additions & 1 deletion src/bvals/comms/bvals_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,11 @@ void InitializeBufferCache(std::shared_ptr<MeshData<Real>> &md, COMM_MAP *comm_m
pcache->idx_vec = std::vector<std::size_t>(key_order.size());
std::for_each(std::begin(key_order), std::end(key_order), [&](auto &t) {
if (comm_map->count(std::get<2>(t)) == 0) {
PARTHENON_FAIL("Asking for buffer that doesn't exist");
auto key = std::get<2>(t);
PARTHENON_FAIL(std::string("Asking for buffer that doesn't exist") +
" (sender: " + std::to_string(std::get<0>(key)) + ", receiver: " +
std::to_string(std::get<1>(key)) + ", var: " + std::get<2>(key) +
", location: " + std::to_string(std::get<3>(key)) + ")");
}
pcache->buf_vec.push_back(&((*comm_map)[std::get<2>(t)]));
(pcache->idx_vec)[std::get<1>(t)] = buff_idx++;
Expand Down
18 changes: 5 additions & 13 deletions src/driver/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ void Driver::PostExecute(DriverStatus status) {

DriverStatus EvolutionDriver::Execute() {
PreExecute();
InitializeBlockTimeStepsAndBoundaries();
InitializeBlockTimeSteps();
SetGlobalTimeStep();

// Before loop do work
Expand Down Expand Up @@ -123,7 +123,7 @@ DriverStatus EvolutionDriver::Execute() {

timer_LBandAMR.reset();
pmesh->LoadBalancingAndAdaptiveMeshRefinement(pinput, app_input);
if (pmesh->modified) InitializeBlockTimeStepsAndBoundaries();
if (pmesh->modified) InitializeBlockTimeSteps();
time_LBandAMR += timer_LBandAMR.seconds();
SetGlobalTimeStep();

Expand Down Expand Up @@ -188,23 +188,15 @@ void EvolutionDriver::PostExecute(DriverStatus status) {
Driver::PostExecute(status);
}

void EvolutionDriver::InitializeBlockTimeStepsAndBoundaries() {
void EvolutionDriver::InitializeBlockTimeSteps() {
// calculate the first time step using Block function
for (auto &pmb : pmesh->block_list) {
Update::EstimateTimestep(pmb->meshblock_data.Get().get());
}
// calculate the first time step using Mesh function
pmesh->boundary_comm_map.clear();
const int num_partitions = pmesh->DefaultNumPartitions();
for (int i = 0; i < num_partitions; i++) {
auto &mbase = pmesh->mesh_data.GetOrAdd("base", i);
for (auto &partition : pmesh->GetDefaultBlockPartitions()) {
auto &mbase = pmesh->mesh_data.Add("base", partition);
Update::EstimateTimestep(mbase.get());
BuildBoundaryBuffers(mbase);
for (auto &[gmg_level, mdc] : pmesh->gmg_mesh_data) {
auto &mdg = mdc.GetOrAdd(gmg_level, "base", i);
BuildBoundaryBuffers(mdg);
BuildGMGBoundaryBuffers(mdg);
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/driver/driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class EvolutionDriver : public Driver {
void PostExecute(DriverStatus status) override;

private:
void InitializeBlockTimeStepsAndBoundaries();
void InitializeBlockTimeSteps();
};

namespace DriverUtils {
Expand Down
Loading

0 comments on commit f7a1908

Please sign in to comment.