Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Perform swarm boundary logic on device #1154

Merged
merged 25 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
3bc0824
First kernel down
brryan Jul 31, 2024
e21af3b
Further cleanup
brryan Jul 31, 2024
73abc93
Notes
brryan Jul 31, 2024
5e084a7
Send seems to work
brryan Aug 13, 2024
5abcd5d
Need to promote particles example to latest pattern
brryan Aug 13, 2024
27b1c8e
May have fixed particles example
brryan Aug 13, 2024
8911325
Merge branch 'develop' of github.com:lanl/parthenon into brryan/swarm…
brryan Aug 13, 2024
3df0f26
cycles...
brryan Aug 13, 2024
1e0ec29
iterative tasking is only iterating on one meshblock in particles exa…
brryan Aug 13, 2024
a5c276f
Still not working...
brryan Aug 13, 2024
bede6c5
New loop seems to work
brryan Aug 14, 2024
981cdb8
Cleaned up some printfs/marked unused code for deletion
brryan Aug 14, 2024
6ca217c
New algorithm is cycling, time to clean up
brryan Aug 14, 2024
0fa6d30
Fixed indexing bug...
brryan Aug 14, 2024
a37cad2
Clean up
brryan Aug 14, 2024
5fe308c
finished_transport shouldnt be provided by swarm
brryan Aug 14, 2024
8e3ad9a
Reverting to old manual iterative tasking
brryan Aug 19, 2024
78c79b6
Still working...
brryan Aug 19, 2024
867b465
Starting to make progress...
brryan Aug 20, 2024
b0ed973
Cleaned up
brryan Aug 20, 2024
1e0e5d1
Merge branch 'brryan/more_swarm_prefix_sums' into brryan/swarm_comms_…
brryan Aug 20, 2024
89e034a
Merge branch 'develop' of github.com:lanl/parthenon into brryan/swarm…
brryan Aug 20, 2024
c2ef49d
format
brryan Aug 20, 2024
9f22dca
Merge branch 'brryan/more_swarm_prefix_sums' of github.com:lanl/parth…
brryan Aug 20, 2024
012ee54
A few leftover print statements
brryan Aug 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions example/particles/parthinput.particles
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@ refinement = none
nx1 = 16
x1min = -0.5
x1max = 0.5
ix1_bc = user
ox1_bc = user
# ix1_bc = periodic # Optionally use periodic boundary conditions everywhere
# ox1_bc = periodic
ix1_bc = periodic
ox1_bc = periodic

nx2 = 16
x2min = -0.5
Expand Down
250 changes: 150 additions & 100 deletions example/particles/particles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ TaskStatus DepositParticles(MeshBlock *pmb) {
return TaskStatus::complete;
}

TaskStatus CreateSomeParticles(MeshBlock *pmb, const double t0) {
TaskStatus CreateSomeParticles(MeshBlock *pmb, const Real t0) {
PARTHENON_INSTRUMENT

auto pkg = pmb->packages.Get("particles_package");
Expand Down Expand Up @@ -340,8 +340,7 @@ TaskStatus CreateSomeParticles(MeshBlock *pmb, const double t0) {
return TaskStatus::complete;
}

TaskStatus TransportParticles(MeshBlock *pmb, const StagedIntegrator *integrator,
const double t0) {
TaskStatus TransportParticles(MeshBlock *pmb, const Real t0, const Real dt) {
PARTHENON_INSTRUMENT

auto swarm = pmb->meshblock_data.Get()->GetSwarmData()->Get("my_particles");
Expand All @@ -350,8 +349,6 @@ TaskStatus TransportParticles(MeshBlock *pmb, const StagedIntegrator *integrator

int max_active_index = swarm->GetMaxActiveIndex();

Real dt = integrator->dt;

auto &t = swarm->Get<Real>("t").Get();
auto &x = swarm->Get<Real>(swarm_position::x::name()).Get();
auto &y = swarm->Get<Real>(swarm_position::y::name()).Get();
Expand Down Expand Up @@ -469,101 +466,33 @@ TaskStatus TransportParticles(MeshBlock *pmb, const StagedIntegrator *integrator
// Custom step function to allow for looping over MPI-related tasks until complete
TaskListStatus ParticleDriver::Step() {
TaskListStatus status;
integrator.dt = tm.dt;

PARTHENON_REQUIRE(integrator.nstages == 1,
"Only first order time integration supported!");

BlockList_t &blocks = pmesh->block_list;
auto num_task_lists_executed_independently = blocks.size();

// Create all the particles that will be created during the step
status = MakeParticlesCreationTaskCollection().Execute();
PARTHENON_REQUIRE(status == TaskListStatus::complete, "Task list failed!");

// Loop over repeated MPI calls until every particle is finished. This logic is
// required because long-distance particle pushes can lead to a large, unpredictable
// number of MPI sends and receives.
bool particles_update_done = false;
while (!particles_update_done) {
status = MakeParticlesUpdateTaskCollection().Execute();

particles_update_done = true;
for (auto &block : blocks) {
// TODO(BRR) Despite this "my_particles"-specific call, this function feels like it
// should be generalized
auto swarm = block->meshblock_data.Get()->GetSwarmData()->Get("my_particles");
if (!swarm->finished_transport) {
particles_update_done = false;
}
}
}
// Transport particles iteratively until all particles reach final time
status = IterativeTransport();
// status = MakeParticlesTransportTaskCollection().Execute();
PARTHENON_REQUIRE(status == TaskListStatus::complete, "Task list failed!");

// Use a more traditional task list for predictable post-MPI evaluations.
status = MakeFinalizationTaskCollection().Execute();
PARTHENON_REQUIRE(status == TaskListStatus::complete, "Task list failed!");

return status;
}

// TODO(BRR) This should really be in parthenon/src... but it can't just live in Swarm
// because of the loop over blocks
TaskStatus StopCommunicationMesh(const BlockList_t &blocks) {
PARTHENON_INSTRUMENT

int num_sent_local = 0;
for (auto &block : blocks) {
auto sc = block->meshblock_data.Get()->GetSwarmData();
auto swarm = sc->Get("my_particles");
swarm->finished_transport = false;
num_sent_local += swarm->num_particles_sent_;
}

int num_sent_global = num_sent_local; // potentially overwritten by following Allreduce
#ifdef MPI_PARALLEL
for (auto &block : blocks) {
auto swarm = block->meshblock_data.Get()->GetSwarmData()->Get("my_particles");
for (int n = 0; n < block->neighbors.size(); n++) {
NeighborBlock &nb = block->neighbors[n];
// TODO(BRR) May want logic like this if we have non-blocking TaskRegions
// if (nb.snb.rank != Globals::my_rank) {
// if (swarm->vbswarm->bd_var_.flag[nb.bufid] != BoundaryStatus::completed) {
// return TaskStatus::incomplete;
// }
//}

// TODO(BRR) May want to move this logic into a per-cycle initialization call
if (swarm->vbswarm->bd_var_.flag[nb.bufid] == BoundaryStatus::completed) {
swarm->vbswarm->bd_var_.req_send[nb.bufid] = MPI_REQUEST_NULL;
}
}
}

MPI_Allreduce(&num_sent_local, &num_sent_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
#endif // MPI_PARALLEL

if (num_sent_global == 0) {
for (auto &block : blocks) {
auto &pmb = block;
auto sc = pmb->meshblock_data.Get()->GetSwarmData();
auto swarm = sc->Get("my_particles");
swarm->finished_transport = true;
}
}

// Reset boundary statuses
for (auto &block : blocks) {
auto &pmb = block;
auto sc = pmb->meshblock_data.Get()->GetSwarmData();
auto swarm = sc->Get("my_particles");
for (int n = 0; n < pmb->neighbors.size(); n++) {
auto &nb = block->neighbors[n];
swarm->vbswarm->bd_var_.flag[nb.bufid] = BoundaryStatus::waiting;
}
}

return TaskStatus::complete;
}

TaskCollection ParticleDriver::MakeParticlesCreationTaskCollection() const {
TaskCollection tc;
TaskID none(0);
const double t0 = tm.time;
const Real t0 = tm.time;
const BlockList_t &blocks = pmesh->block_list;

auto num_task_lists_executed_independently = blocks.size();
Expand All @@ -577,40 +506,161 @@ TaskCollection ParticleDriver::MakeParticlesCreationTaskCollection() const {
return tc;
}

TaskCollection ParticleDriver::MakeParticlesUpdateTaskCollection() const {
// TODO(BRR) To be used in the future, currently there appears to be a bug in iterative
// tasking
// TaskStatus CheckCompletion(MeshBlock *pmb, const Real tf) {
// auto swarm = pmb->meshblock_data.Get()->GetSwarmData()->Get("my_particles");
//
// int max_active_index = swarm->GetMaxActiveIndex();
//
// auto &t = swarm->Get<Real>("t").Get();
//
// auto swarm_d = swarm->GetDeviceContext();
//
// int num_unfinished = 0;
// parthenon::par_reduce(
// PARTHENON_AUTO_LABEL, 0, max_active_index,
// KOKKOS_LAMBDA(const int n, int &num_unfinished) {
// if (swarm_d.IsActive(n)) {
// if (t(n) < tf) {
// num_unfinished++;
// }
// }
// },
// Kokkos::Sum<int>(num_unfinished));
//
// if (num_unfinished > 0) {
// return TaskStatus::iterate;
// } else {
// return TaskStatus::complete;
// }
//}
// TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const {
// using TQ = TaskQualifier;
//
// TaskCollection tc;
//
// TaskID none(0);
// BlockList_t &blocks = pmesh->block_list;
//
// const int max_transport_iterations = 1000;
//
// const Real t0 = tm.time;
// const Real dt = tm.dt;
//
// auto &reg = tc.AddRegion(blocks.size());
//
// for (int i = 0; i < blocks.size(); i++) {
// auto &pmb = blocks[i];
// auto &sc = pmb->meshblock_data.Get()->GetSwarmData();
// auto &tl = reg[i];
//
// // Add task sublist
// auto [itl, push] = tl.AddSublist(none, {i, max_transport_iterations});
// auto transport = itl.AddTask(none, TransportParticles, pmb.get(), t0, dt);
// auto reset_comms =
// itl.AddTask(transport, &SwarmContainer::ResetCommunication, sc.get());
// auto send = itl.AddTask(reset_comms, &SwarmContainer::Send, sc.get(),
// BoundaryCommSubset::all);
// auto receive =
// itl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all);
//
// auto complete = itl.AddTask(TQ::global_sync | TQ::completion, receive,
// CheckCompletion, pmb.get(), t0 + dt);
// }
//
// return tc;
//}

TaskStatus CountNumSent(const BlockList_t &blocks, const Real tf, bool *done) {
int num_unfinished_local = 0;
for (auto &block : blocks) {
auto sc = block->meshblock_data.Get()->GetSwarmData();
auto swarm = sc->Get("my_particles");
int max_active_index = swarm->GetMaxActiveIndex();

auto &t = swarm->Get<Real>("t").Get();

auto swarm_d = swarm->GetDeviceContext();

int num_unfinished_block = 0;
parthenon::par_reduce(
PARTHENON_AUTO_LABEL, 0, max_active_index,
KOKKOS_LAMBDA(const int n, int &num_unfinished) {
if (swarm_d.IsActive(n)) {
if (t(n) < tf) {
num_unfinished++;
}
}
},
Kokkos::Sum<int>(num_unfinished_block));
num_unfinished_local += num_unfinished_block;
}

int num_unfinished_global = num_unfinished_local;
#ifdef MPI_PARALLEL
MPI_Allreduce(&num_unfinished_local, &num_unfinished_global, 1, MPI_INT, MPI_SUM,
MPI_COMM_WORLD);
#endif // MPI_PARALLEL

if (num_unfinished_global > 0) {
*done = false;
} else {
*done = true;
}

return TaskStatus::complete;
}

TaskCollection ParticleDriver::IterativeTransportTaskCollection(bool *done) const {
TaskCollection tc;
TaskID none(0);
const double t0 = tm.time;
const BlockList_t &blocks = pmesh->block_list;
const int nblocks = blocks.size();
const Real t0 = tm.time;
const Real dt = tm.dt;

auto num_task_lists_executed_independently = blocks.size();

TaskRegion &async_region0 = tc.AddRegion(num_task_lists_executed_independently);
for (int i = 0; i < blocks.size(); i++) {
TaskRegion &async_region = tc.AddRegion(nblocks);
for (int i = 0; i < nblocks; i++) {
auto &pmb = blocks[i];

auto &sc = pmb->meshblock_data.Get()->GetSwarmData();
auto &tl = async_region[i];

auto &tl = async_region0[i];

auto transport_particles =
tl.AddTask(none, TransportParticles, pmb.get(), &integrator, t0);

auto send = tl.AddTask(transport_particles, &SwarmContainer::Send, sc.get(),
BoundaryCommSubset::all);
auto transport = tl.AddTask(none, TransportParticles, pmb.get(), t0, dt);
auto reset_comms =
tl.AddTask(transport, &SwarmContainer::ResetCommunication, sc.get());
auto send =
tl.AddTask(reset_comms, &SwarmContainer::Send, sc.get(), BoundaryCommSubset::all);
auto receive =
tl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all);
}

TaskRegion &sync_region0 = tc.AddRegion(1);
TaskRegion &sync_region = tc.AddRegion(1);
{
auto &tl = sync_region0[0];
auto stop_comm = tl.AddTask(none, StopCommunicationMesh, blocks);
auto &tl = sync_region[0];
auto check_completion = tl.AddTask(none, CountNumSent, blocks, t0 + dt, done);
}

return tc;
}

// TODO(BRR) to be replaced by iterative tasklist machinery
TaskListStatus ParticleDriver::IterativeTransport() const {
TaskListStatus status;
bool transport_done = false;
int n_transport_iter = 0;
int n_transport_iter_max = 1000;
while (!transport_done) {
status = IterativeTransportTaskCollection(&transport_done).Execute();

n_transport_iter++;
PARTHENON_REQUIRE(n_transport_iter < n_transport_iter_max,
"Too many transport iterations!");
}

return status;
}

TaskCollection ParticleDriver::MakeFinalizationTaskCollection() const {
TaskCollection tc;
TaskID none(0);
Expand Down
4 changes: 3 additions & 1 deletion example/particles/particles.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ class ParticleDriver : public EvolutionDriver {
ParticleDriver(ParameterInput *pin, ApplicationInput *app_in, Mesh *pm)
: EvolutionDriver(pin, app_in, pm), integrator(pin) {}
TaskCollection MakeParticlesCreationTaskCollection() const;
TaskCollection MakeParticlesUpdateTaskCollection() const;
TaskCollection MakeParticlesTransportTaskCollection() const;
TaskListStatus IterativeTransport() const;
TaskCollection IterativeTransportTaskCollection(bool *done) const;
TaskCollection MakeFinalizationTaskCollection() const;
TaskListStatus Step();

Expand Down
15 changes: 7 additions & 8 deletions src/interface/swarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,11 @@ Swarm::Swarm(const std::string &label, const Metadata &metadata, const int nmax_
empty_indices_("empty_indices_", nmax_pool_),
block_index_("block_index_", nmax_pool_),
neighbor_indices_("neighbor_indices_", 4, 4, 4),
new_indices_("new_indices_", nmax_pool_),
from_to_indices_("from_to_indices_", nmax_pool_ + 1),
recv_neighbor_index_("recv_neighbor_index_", nmax_pool_),
recv_buffer_index_("recv_buffer_index_", nmax_pool_),
scratch_a_("scratch_a_", nmax_pool_), scratch_b_("scratch_b_", nmax_pool_),
new_indices_("new_indices_", nmax_pool_), scratch_a_("scratch_a_", nmax_pool_),
scratch_b_("scratch_b_", nmax_pool_),
num_particles_to_send_("num_particles_to_send_", NMAX_NEIGHBORS),
buffer_counters_("buffer_counters_", NMAX_NEIGHBORS),
neighbor_received_particles_("neighbor_received_particles_", NMAX_NEIGHBORS),
cell_sorted_("cell_sorted_", nmax_pool_), mpiStatus(true) {
PARTHENON_REQUIRE_THROWS(typeid(Coordinates_t) == typeid(UniformCartesian),
"SwarmDeviceContext only supports a uniform Cartesian mesh!");
Expand All @@ -86,6 +85,9 @@ Swarm::Swarm(const std::string &label, const Metadata &metadata, const int nmax_
Add(swarm_position::y::name(), Metadata({Metadata::Real}));
Add(swarm_position::z::name(), Metadata({Metadata::Real}));

// Create associated host arrays
neighbor_received_particles_h = neighbor_received_particles_.GetHostMirror();

// Initialize index metadata
num_active_ = 0;
max_active_index_ = inactive_max_active_index;
Expand Down Expand Up @@ -201,9 +203,6 @@ void Swarm::SetPoolMax(const std::int64_t nmax_pool) {
Kokkos::resize(marked_for_removal_, nmax_pool);
Kokkos::resize(empty_indices_, nmax_pool);
Kokkos::resize(new_indices_, nmax_pool);
Kokkos::resize(from_to_indices_, nmax_pool + 1);
Kokkos::resize(recv_neighbor_index_, nmax_pool);
Kokkos::resize(recv_buffer_index_, nmax_pool);
Kokkos::resize(scratch_a_, nmax_pool);
Kokkos::resize(scratch_b_, nmax_pool);

Expand Down
Loading
Loading