From 517bf9211217b2a96834cac3d061ea103aef8594 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 28 Aug 2024 10:43:17 -0600 Subject: [PATCH] Fix remaining swarm D->H->D copies (#1145) * working * Clean up implicit captures * OK something isnt working with updating empty_indices * Creating indices works but defragmentation is broken * Defrag works * Switch to persistent scratch memory * Clean up * Fix GPU issues * Fix compile error by cleaning up code * Remove unnecessary check against non-null user swarm BCs * Remove unused function * Formatting * Fiddle with send logic * Perform swarm boundary logic on device (#1154) * First kernel down * Further cleanup * Notes * Send seems to work * Need to promote particles example to latest pattern * May have fixed particles example * cycles... * iterative tasking is only iterating on one meshblock in particles example... * Still not working... * New loop seems to work * Cleaned up some printfs/marked unused code for deletion * New algorithm is cycling, time to clean up * Fixed indexing bug... * Clean up * finished_transport shouldnt be provided by swarm * Reverting to old manual iterative tasking * Still working... * Starting to make progress... * Cleaned up * format * A few leftover print statements * Oops ParArray1D isn't a host array when compiled for device * implicit this-> * bug in nrecvd particles with 1 particle received... * Found the bug * Fixed bug, cleaned up * kokkos parallel_scan -> par_scan * Fix types, clean up code * Add warning if using swarms but Real != double * Cleanup from debugging --------- Co-authored-by: Luke Roberts --- CHANGELOG.md | 1 + example/particles/parthinput.particles | 6 +- example/particles/particles.cpp | 180 ++++++++--------- example/particles/particles.hpp | 4 +- src/interface/state_descriptor.cpp | 7 + src/interface/swarm.cpp | 259 +++++++++++++----------- src/interface/swarm.hpp | 38 ++-- src/interface/swarm_comms.cpp | 268 ++++++++----------------- src/interface/swarm_container.cpp | 30 --- src/interface/swarm_container.hpp | 2 - src/mesh/forest/tree.cpp | 2 - src/mesh/meshblock.cpp | 3 - src/tasks/tasks.hpp | 2 + tst/unit/test_swarm.cpp | 4 +- 14 files changed, 338 insertions(+), 468 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f474633ce8e..af70ab73376b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ - [[PR 1004]](https://github.com/parthenon-hpc-lab/parthenon/pull/1004) Allow parameter modification from an input file for restarts ### Fixed (not changing behavior/API/variables/...) +- [[PR 1145]](https://github.com/parthenon-hpc-lab/parthenon/pull/1145) Fix remaining swarm D->H->D copies - [[PR 1150]](https://github.com/parthenon-hpc-lab/parthenon/pull/1150) Reduce memory consumption for buffer pool - [[PR 1146]](https://github.com/parthenon-hpc-lab/parthenon/pull/1146) Fix an issue outputting >4GB single variables per rank - [[PR 1152]](https://github.com/parthenon-hpc-lab/parthenon/pull/1152) Fix memory leak in task graph outputs related to `abi::__cxa_demangle` diff --git a/example/particles/parthinput.particles b/example/particles/parthinput.particles index bca9ec93292e..a79f774c6850 100644 --- a/example/particles/parthinput.particles +++ b/example/particles/parthinput.particles @@ -24,10 +24,8 @@ refinement = none nx1 = 16 x1min = -0.5 x1max = 0.5 -ix1_bc = user -ox1_bc = user -# ix1_bc = periodic # Optionally use periodic boundary conditions everywhere -# ox1_bc = periodic +ix1_bc = periodic +ox1_bc = periodic nx2 = 16 x2min = -0.5 diff --git a/example/particles/particles.cpp b/example/particles/particles.cpp index 8a8d47ef111a..3f99407ded54 100644 --- a/example/particles/particles.cpp +++ b/example/particles/particles.cpp @@ -340,8 +340,7 @@ TaskStatus CreateSomeParticles(MeshBlock *pmb, const double t0) { return TaskStatus::complete; } -TaskStatus TransportParticles(MeshBlock *pmb, const StagedIntegrator *integrator, - const double t0) { +TaskStatus TransportParticles(MeshBlock *pmb, const double t0, const double dt) { PARTHENON_INSTRUMENT auto swarm = pmb->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); @@ -350,8 +349,6 @@ TaskStatus TransportParticles(MeshBlock *pmb, const StagedIntegrator *integrator int max_active_index = swarm->GetMaxActiveIndex(); - Real dt = integrator->dt; - auto &t = swarm->Get("t").Get(); auto &x = swarm->Get(swarm_position::x::name()).Get(); auto &y = swarm->Get(swarm_position::y::name()).Get(); @@ -469,97 +466,31 @@ TaskStatus TransportParticles(MeshBlock *pmb, const StagedIntegrator *integrator // Custom step function to allow for looping over MPI-related tasks until complete TaskListStatus ParticleDriver::Step() { TaskListStatus status; - integrator.dt = tm.dt; + + PARTHENON_REQUIRE(integrator.nstages == 1, + "Only first order time integration supported!"); BlockList_t &blocks = pmesh->block_list; auto num_task_lists_executed_independently = blocks.size(); // Create all the particles that will be created during the step status = MakeParticlesCreationTaskCollection().Execute(); + PARTHENON_REQUIRE(status == TaskListStatus::complete, + "ParticlesCreation task list failed!"); - // Loop over repeated MPI calls until every particle is finished. This logic is - // required because long-distance particle pushes can lead to a large, unpredictable - // number of MPI sends and receives. - bool particles_update_done = false; - while (!particles_update_done) { - status = MakeParticlesUpdateTaskCollection().Execute(); - - particles_update_done = true; - for (auto &block : blocks) { - // TODO(BRR) Despite this "my_particles"-specific call, this function feels like it - // should be generalized - auto swarm = block->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); - if (!swarm->finished_transport) { - particles_update_done = false; - } - } - } + // Transport particles iteratively until all particles reach final time + status = IterativeTransport(); + // status = MakeParticlesTransportTaskCollection().Execute(); + PARTHENON_REQUIRE(status == TaskListStatus::complete, + "IterativeTransport task list failed!"); // Use a more traditional task list for predictable post-MPI evaluations. status = MakeFinalizationTaskCollection().Execute(); + PARTHENON_REQUIRE(status == TaskListStatus::complete, "Finalization task list failed!"); return status; } -// TODO(BRR) This should really be in parthenon/src... but it can't just live in Swarm -// because of the loop over blocks -TaskStatus StopCommunicationMesh(const BlockList_t &blocks) { - PARTHENON_INSTRUMENT - - int num_sent_local = 0; - for (auto &block : blocks) { - auto sc = block->meshblock_data.Get()->GetSwarmData(); - auto swarm = sc->Get("my_particles"); - swarm->finished_transport = false; - num_sent_local += swarm->num_particles_sent_; - } - - int num_sent_global = num_sent_local; // potentially overwritten by following Allreduce -#ifdef MPI_PARALLEL - for (auto &block : blocks) { - auto swarm = block->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); - for (int n = 0; n < block->neighbors.size(); n++) { - NeighborBlock &nb = block->neighbors[n]; - // TODO(BRR) May want logic like this if we have non-blocking TaskRegions - // if (nb.snb.rank != Globals::my_rank) { - // if (swarm->vbswarm->bd_var_.flag[nb.bufid] != BoundaryStatus::completed) { - // return TaskStatus::incomplete; - // } - //} - - // TODO(BRR) May want to move this logic into a per-cycle initialization call - if (swarm->vbswarm->bd_var_.flag[nb.bufid] == BoundaryStatus::completed) { - swarm->vbswarm->bd_var_.req_send[nb.bufid] = MPI_REQUEST_NULL; - } - } - } - - MPI_Allreduce(&num_sent_local, &num_sent_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); -#endif // MPI_PARALLEL - - if (num_sent_global == 0) { - for (auto &block : blocks) { - auto &pmb = block; - auto sc = pmb->meshblock_data.Get()->GetSwarmData(); - auto swarm = sc->Get("my_particles"); - swarm->finished_transport = true; - } - } - - // Reset boundary statuses - for (auto &block : blocks) { - auto &pmb = block; - auto sc = pmb->meshblock_data.Get()->GetSwarmData(); - auto swarm = sc->Get("my_particles"); - for (int n = 0; n < pmb->neighbors.size(); n++) { - auto &nb = block->neighbors[n]; - swarm->vbswarm->bd_var_.flag[nb.bufid] = BoundaryStatus::waiting; - } - } - - return TaskStatus::complete; -} - TaskCollection ParticleDriver::MakeParticlesCreationTaskCollection() const { TaskCollection tc; TaskID none(0); @@ -577,40 +508,93 @@ TaskCollection ParticleDriver::MakeParticlesCreationTaskCollection() const { return tc; } -TaskCollection ParticleDriver::MakeParticlesUpdateTaskCollection() const { +TaskStatus CountNumSent(const BlockList_t &blocks, const double tf_, bool *done) { + int num_unfinished = 0; + for (auto &block : blocks) { + auto sc = block->meshblock_data.Get()->GetSwarmData(); + auto swarm = sc->Get("my_particles"); + int max_active_index = swarm->GetMaxActiveIndex(); + + auto &t = swarm->Get("t").Get(); + + auto swarm_d = swarm->GetDeviceContext(); + + const auto &tf = tf_; + + parthenon::par_reduce( + PARTHENON_AUTO_LABEL, 0, max_active_index, + KOKKOS_LAMBDA(const int n, int &num_unfinished) { + if (swarm_d.IsActive(n)) { + if (t(n) < tf) { + num_unfinished++; + } + } + }, + Kokkos::Sum(num_unfinished)); + } + +#ifdef MPI_PARALLEL + MPI_Allreduce(MPI_IN_PLACE, &num_unfinished, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); +#endif // MPI_PARALLEL + + if (num_unfinished > 0) { + *done = false; + } else { + *done = true; + } + + return TaskStatus::complete; +} + +TaskCollection ParticleDriver::IterativeTransportTaskCollection(bool *done) const { TaskCollection tc; TaskID none(0); - const double t0 = tm.time; const BlockList_t &blocks = pmesh->block_list; + const int nblocks = blocks.size(); + const double t0 = tm.time; + const double dt = tm.dt; - auto num_task_lists_executed_independently = blocks.size(); - - TaskRegion &async_region0 = tc.AddRegion(num_task_lists_executed_independently); - for (int i = 0; i < blocks.size(); i++) { + TaskRegion &async_region = tc.AddRegion(nblocks); + for (int i = 0; i < nblocks; i++) { auto &pmb = blocks[i]; - auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); + auto &tl = async_region[i]; - auto &tl = async_region0[i]; - - auto transport_particles = - tl.AddTask(none, TransportParticles, pmb.get(), &integrator, t0); - - auto send = tl.AddTask(transport_particles, &SwarmContainer::Send, sc.get(), - BoundaryCommSubset::all); + auto transport = tl.AddTask(none, TransportParticles, pmb.get(), t0, dt); + auto reset_comms = + tl.AddTask(transport, &SwarmContainer::ResetCommunication, sc.get()); + auto send = + tl.AddTask(reset_comms, &SwarmContainer::Send, sc.get(), BoundaryCommSubset::all); auto receive = tl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); } - TaskRegion &sync_region0 = tc.AddRegion(1); + TaskRegion &sync_region = tc.AddRegion(1); { - auto &tl = sync_region0[0]; - auto stop_comm = tl.AddTask(none, StopCommunicationMesh, blocks); + auto &tl = sync_region[0]; + auto check_completion = tl.AddTask(none, CountNumSent, blocks, t0 + dt, done); } return tc; } +// TODO(BRR) to be replaced by iterative tasklist machinery +TaskListStatus ParticleDriver::IterativeTransport() const { + TaskListStatus status; + bool transport_done = false; + int n_transport_iter = 0; + int n_transport_iter_max = 1000; + while (!transport_done) { + status = IterativeTransportTaskCollection(&transport_done).Execute(); + + n_transport_iter++; + PARTHENON_REQUIRE(n_transport_iter < n_transport_iter_max, + "Too many transport iterations!"); + } + + return status; +} + TaskCollection ParticleDriver::MakeFinalizationTaskCollection() const { TaskCollection tc; TaskID none(0); diff --git a/example/particles/particles.hpp b/example/particles/particles.hpp index 6d74d59dbbb8..705572675247 100644 --- a/example/particles/particles.hpp +++ b/example/particles/particles.hpp @@ -33,7 +33,9 @@ class ParticleDriver : public EvolutionDriver { ParticleDriver(ParameterInput *pin, ApplicationInput *app_in, Mesh *pm) : EvolutionDriver(pin, app_in, pm), integrator(pin) {} TaskCollection MakeParticlesCreationTaskCollection() const; - TaskCollection MakeParticlesUpdateTaskCollection() const; + TaskCollection MakeParticlesTransportTaskCollection() const; + TaskListStatus IterativeTransport() const; + TaskCollection IterativeTransportTaskCollection(bool *done) const; TaskCollection MakeFinalizationTaskCollection() const; TaskListStatus Step(); diff --git a/src/interface/state_descriptor.cpp b/src/interface/state_descriptor.cpp index aa3fa5e6af27..af73259ff6aa 100644 --- a/src/interface/state_descriptor.cpp +++ b/src/interface/state_descriptor.cpp @@ -452,6 +452,13 @@ StateDescriptor::CreateResolvedStateDescriptor(Packages_t &packages) { field_tracker.CategorizeCollection(name, field_dict, &field_provider); swarm_tracker.CategorizeCollection(name, package->AllSwarms(), &swarm_provider); + if (!package->AllSwarms().empty() && !std::is_same::value) { + PARTHENON_WARN( + "Swarms always use Real precision, even for ParticleVariables containing " + "time data, while Parthenon time variables are fixed to double precision. This " + "may cause inaccurate comparisons with cycle beginning and end times.") + } + // Add package registered boundary conditions for (int i = 0; i < 6; ++i) state->UserBoundaryFunctions[i].insert(state->UserBoundaryFunctions[i].end(), diff --git a/src/interface/swarm.cpp b/src/interface/swarm.cpp index b6f1fb6fd991..8e715f1cb8db 100644 --- a/src/interface/swarm.cpp +++ b/src/interface/swarm.cpp @@ -65,38 +65,30 @@ SwarmDeviceContext Swarm::GetDeviceContext() const { Swarm::Swarm(const std::string &label, const Metadata &metadata, const int nmax_pool_in) : label_(label), m_(metadata), nmax_pool_(nmax_pool_in), mask_("mask", nmax_pool_), - marked_for_removal_("mfr", nmax_pool_), block_index_("block_index_", nmax_pool_), + marked_for_removal_("mfr", nmax_pool_), + empty_indices_("empty_indices_", nmax_pool_), + block_index_("block_index_", nmax_pool_), neighbor_indices_("neighbor_indices_", 4, 4, 4), - new_indices_("new_indices_", nmax_pool_), - from_to_indices_("from_to_indices_", nmax_pool_ + 1), - recv_neighbor_index_("recv_neighbor_index_", nmax_pool_), - recv_buffer_index_("recv_buffer_index_", nmax_pool_), + new_indices_("new_indices_", nmax_pool_), scratch_a_("scratch_a_", nmax_pool_), + scratch_b_("scratch_b_", nmax_pool_), num_particles_to_send_("num_particles_to_send_", NMAX_NEIGHBORS), + buffer_counters_("buffer_counters_", NMAX_NEIGHBORS), + neighbor_received_particles_("neighbor_received_particles_", NMAX_NEIGHBORS), cell_sorted_("cell_sorted_", nmax_pool_), mpiStatus(true) { PARTHENON_REQUIRE_THROWS(typeid(Coordinates_t) == typeid(UniformCartesian), "SwarmDeviceContext only supports a uniform Cartesian mesh!"); uid_ = get_uid_(label_); + // Add default swarm fields Add(swarm_position::x::name(), Metadata({Metadata::Real})); Add(swarm_position::y::name(), Metadata({Metadata::Real})); Add(swarm_position::z::name(), Metadata({Metadata::Real})); + + // Initialize index metadata num_active_ = 0; max_active_index_ = inactive_max_active_index; - - // TODO(BRR) Do this in a device kernel? - auto mask_h = Kokkos::create_mirror_view(HostMemSpace(), mask_); - auto marked_for_removal_h = - Kokkos::create_mirror_view(HostMemSpace(), marked_for_removal_); - - for (int n = 0; n < nmax_pool_; n++) { - mask_h(n) = false; - marked_for_removal_h(n) = false; - free_indices_.push_back(n); - } - - Kokkos::deep_copy(mask_, mask_h); - Kokkos::deep_copy(marked_for_removal_, marked_for_removal_h); + UpdateEmptyIndices(); } void Swarm::Add(const std::vector &label_array, const Metadata &metadata) { @@ -196,25 +188,21 @@ void Swarm::Remove(const std::string &label) { } } -void Swarm::setPoolMax(const std::int64_t nmax_pool) { +void Swarm::SetPoolMax(const std::int64_t nmax_pool) { PARTHENON_REQUIRE(nmax_pool > nmax_pool_, "Must request larger pool size!"); - std::int64_t n_new_begin = nmax_pool_; std::int64_t n_new = nmax_pool - nmax_pool_; auto pmb = GetBlockPointer(); auto pm = pmb->pmy_mesh; - for (std::int64_t n = 0; n < n_new; n++) { - free_indices_.push_back(n + n_new_begin); - } - // Rely on Kokkos setting the newly added values to false for these arrays Kokkos::resize(mask_, nmax_pool); Kokkos::resize(marked_for_removal_, nmax_pool); + Kokkos::resize(empty_indices_, nmax_pool); Kokkos::resize(new_indices_, nmax_pool); - Kokkos::resize(from_to_indices_, nmax_pool + 1); - Kokkos::resize(recv_neighbor_index_, nmax_pool); - Kokkos::resize(recv_buffer_index_, nmax_pool); + Kokkos::resize(scratch_a_, nmax_pool); + Kokkos::resize(scratch_b_, nmax_pool); + pmb->LogMemUsage(2 * n_new * sizeof(bool)); Kokkos::resize(cell_sorted_, nmax_pool); @@ -240,7 +228,10 @@ void Swarm::setPoolMax(const std::int64_t nmax_pool) { nmax_pool_ = nmax_pool; - // Eliminate any cached SwarmPacks, as they will need to be rebuilt following setPoolMax + // Populate new empty indices + UpdateEmptyIndices(); + + // Eliminate any cached SwarmPacks, as they will need to be rebuilt following SetPoolMax pmb->meshblock_data.Get()->ClearSwarmCaches(); pm->mesh_data.Get("base")->ClearSwarmCaches(); for (auto &partition : pm->GetDefaultBlockPartitions()) { @@ -251,129 +242,153 @@ void Swarm::setPoolMax(const std::int64_t nmax_pool) { NewParticlesContext Swarm::AddEmptyParticles(const int num_to_add) { PARTHENON_DEBUG_REQUIRE(num_to_add >= 0, "Cannot add negative numbers of particles!"); + auto pmb = GetBlockPointer(); + if (num_to_add > 0) { - while (free_indices_.size() < num_to_add) { - increasePoolMax(); + while (nmax_pool_ - num_active_ < num_to_add) { + IncreasePoolMax(); } - // TODO(BRR) Use par_scan on device rather than do this on host - auto mask_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), mask_); - - auto block_index_h = block_index_.GetHostMirrorAndCopy(); + auto &new_indices = new_indices_; + auto &empty_indices = empty_indices_; + auto &mask = mask_; - auto free_index = free_indices_.begin(); + int max_new_active_index = 0; + parthenon::par_reduce( + PARTHENON_AUTO_LABEL, 0, num_to_add - 1, + KOKKOS_LAMBDA(const int n, int &max_ind) { + new_indices(n) = empty_indices(n); + mask(new_indices(n)) = true; - auto new_indices_h = new_indices_.GetHostMirror(); - - // Don't bother sanitizing the memory - for (int n = 0; n < num_to_add; n++) { - mask_h(*free_index) = true; - block_index_h(*free_index) = this_block_; - max_active_index_ = std::max(max_active_index_, *free_index); - new_indices_h(n) = *free_index; - - free_index = free_indices_.erase(free_index); - } + // Record vote for max active index + max_ind = new_indices(n); + }, + Kokkos::Max(max_new_active_index)); - new_indices_.DeepCopy(new_indices_h); + // Update max active index if necessary + max_active_index_ = std::max(max_active_index_, max_new_active_index); + new_indices_max_idx_ = num_to_add - 1; num_active_ += num_to_add; - Kokkos::deep_copy(mask_, mask_h); - block_index_.DeepCopy(block_index_h); - new_indices_max_idx_ = num_to_add - 1; + UpdateEmptyIndices(); } else { new_indices_max_idx_ = -1; } + // Create and return NewParticlesContext return NewParticlesContext(new_indices_max_idx_, new_indices_); } +// Updates the empty_indices_ array so the first N elements contain an ascending list of +// indices into empty elements of the swarm pool, where N is the number of empty indices +void Swarm::UpdateEmptyIndices() { + auto &mask = mask_; + auto &empty_indices = empty_indices_; + + // Associate scratch memory + auto &empty_indices_scan = scratch_a_; + + // Calculate prefix sum of empty indices + parthenon::par_scan( + "Set empty indices prefix sum", 0, nmax_pool_ - 1, + KOKKOS_LAMBDA(const int n, int &update, const bool &final) { + const int val = !mask(n); + if (val) { + update += 1; + } + + if (final) { + empty_indices_scan(n) = update; + } + }); + + // Update list of empty indices such that it is contiguous and in ascending order + parthenon::par_for( + PARTHENON_AUTO_LABEL, 0, nmax_pool_ - 1, KOKKOS_LAMBDA(const int n) { + if (!mask(n)) { + empty_indices(empty_indices_scan(n) - 1) = n; + } + }); +} + // No active particles: nmax_active_index = inactive_max_active_index (= -1) // No particles removed: nmax_active_index unchanged // Particles removed: nmax_active_index is new max active index void Swarm::RemoveMarkedParticles() { - // TODO(BRR) Use par_scan to do this on device rather than host - auto mask_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), mask_); - auto marked_for_removal_h = - Kokkos::create_mirror_view_and_copy(HostMemSpace(), marked_for_removal_); - - // loop backwards to keep free_indices_ updated correctly - for (int n = max_active_index_; n >= 0; n--) { - if (mask_h(n)) { - if (marked_for_removal_h(n)) { - mask_h(n) = false; - free_indices_.push_front(n); - num_active_ -= 1; - if (n == max_active_index_) { - max_active_index_ -= 1; + int &max_active_index = max_active_index_; + + auto &mask = mask_; + auto &marked_for_removal = marked_for_removal_; + + // Update mask, count number of removed particles + int num_removed = 0; + parthenon::par_reduce( + PARTHENON_AUTO_LABEL, 0, max_active_index, + KOKKOS_LAMBDA(const int n, int &removed) { + if (mask(n)) { + if (marked_for_removal(n)) { + mask(n) = false; + marked_for_removal(n) = false; + removed += 1; + } } - marked_for_removal_h(n) = false; - } - } - } + }, + Kokkos::Sum(num_removed)); - Kokkos::deep_copy(mask_, mask_h); - Kokkos::deep_copy(marked_for_removal_, marked_for_removal_h); + num_active_ -= num_removed; + + UpdateEmptyIndices(); } void Swarm::Defrag() { if (GetNumActive() == 0) { return; } - // TODO(BRR) Could this algorithm be more efficient? Does it matter? - // Add 1 to convert max index to max number - std::int64_t num_free = (max_active_index_ + 1) - num_active_; - auto pmb = GetBlockPointer(); - auto from_to_indices_h = from_to_indices_.GetHostMirror(); + // Associate scratch memory + auto &scan_scratch_toread = scratch_a_; + auto &map = scratch_b_; - auto mask_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), mask_); - - for (int n = 0; n <= max_active_index_; n++) { - from_to_indices_h(n) = unset_index_; - } - - std::list new_free_indices; - - free_indices_.sort(); - - int index = max_active_index_; - int num_to_move = std::min(num_free, num_active_); - for (int n = 0; n < num_to_move; n++) { - while (mask_h(index) == false) { - index--; - } - int index_to_move_from = index; - index--; - - // Below this number "moved" particles should actually stay in place - if (index_to_move_from < num_active_) { - break; - } - int index_to_move_to = free_indices_.front(); - free_indices_.pop_front(); - new_free_indices.push_back(index_to_move_from); - from_to_indices_h(index_to_move_from) = index_to_move_to; - } + auto &mask = mask_; - // TODO(BRR) Not all these sorts may be necessary - new_free_indices.sort(); - free_indices_.merge(new_free_indices); + const int &num_active = num_active_; + parthenon::par_scan( + "Set empty indices prefix sum", 0, nmax_pool_ - num_active_ - 1, + KOKKOS_LAMBDA(const int nn, int &update, const bool &final) { + const int n = nn + num_active; + const int val = mask(n); + if (val) { + update += 1; + } + if (final) scan_scratch_toread(n) = update; + }); - from_to_indices_.DeepCopy(from_to_indices_h); + parthenon::par_for( + PARTHENON_AUTO_LABEL, 0, nmax_pool_ - 1, KOKKOS_LAMBDA(const int n) { + if (n >= num_active) { + if (mask(n)) { + map(scan_scratch_toread(n) - 1) = n; + } + mask(n) = false; + } + }); - auto from_to_indices = from_to_indices_; + // Reuse scratch memory + auto &scan_scratch_towrite = scan_scratch_toread; - auto &mask = mask_; - pmb->par_for( - PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { - if (from_to_indices(n) >= 0) { - mask(from_to_indices(n)) = mask(n); - mask(n) = false; + // Update list of empty indices + parthenon::par_scan( + "Set empty indices prefix sum", 0, num_active_ - 1, + KOKKOS_LAMBDA(const int n, int &update, const bool &final) { + const int val = !mask(n); + if (val) { + update += 1; } + if (final) scan_scratch_towrite(n) = update; }); + // Get all dynamical variables in swarm auto &int_vector = std::get()>(vectors_); auto &real_vector = std::get()>(vectors_); PackIndexMap real_imap; @@ -387,15 +402,19 @@ void Swarm::Defrag() { const int realPackDim = vreal.GetDim(2); const int intPackDim = vint.GetDim(2); - pmb->par_for( - PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { - if (from_to_indices(n) >= 0) { + // Loop over only the active number of particles, and if mask is empty, copy in particle + // using address from prefix sum + parthenon::par_for( + PARTHENON_AUTO_LABEL, 0, num_active_ - 1, KOKKOS_LAMBDA(const int n) { + if (!mask(n)) { + const int nread = map(scan_scratch_towrite(n) - 1); for (int vidx = 0; vidx < realPackDim; vidx++) { - vreal(vidx, from_to_indices(n)) = vreal(vidx, n); + vreal(vidx, n) = vreal(vidx, nread); } for (int vidx = 0; vidx < intPackDim; vidx++) { - vint(vidx, from_to_indices(n)) = vint(vidx, n); + vint(vidx, n) = vint(vidx, nread); } + mask(n) = true; } }); diff --git a/src/interface/swarm.hpp b/src/interface/swarm.hpp index a3e366789986..8eb31a3d1cd7 100644 --- a/src/interface/swarm.hpp +++ b/src/interface/swarm.hpp @@ -161,10 +161,10 @@ class Swarm { std::string info() const { return info_; } /// Expand pool size geometrically as necessary - void increasePoolMax() { setPoolMax(2 * nmax_pool_); } + void IncreasePoolMax() { SetPoolMax(2 * nmax_pool_); } /// Set max pool size - void setPoolMax(const std::int64_t nmax_pool); + void SetPoolMax(const std::int64_t nmax_pool); /// Check whether metadata bit is set bool IsSet(const MetadataFlag bit) const { return m_.IsSet(bit); } @@ -182,6 +182,9 @@ class Swarm { /// indicates gaps in the list. Real GetPackingEfficiency() const { return num_active_ / (max_active_index_ + 1); } + // Update sorted array of empty indices in the current memory pool + void UpdateEmptyIndices(); + /// Remove particles marked for removal and update internal indexing void RemoveMarkedParticles(); @@ -227,14 +230,10 @@ class Swarm { SwarmVariablePack PackVariables(const std::vector &name, PackIndexMap &vmap); - // Temporarily public - int num_particles_sent_; - bool finished_transport; - - void LoadBuffers_(const int max_indices_size); + void LoadBuffers_(); void UnloadBuffers_(); - int CountParticlesToSend_(); // Must be public for launching kernel + void CountParticlesToSend_(); // Must be public for launching kernel template const auto &GetVariableVector() const { @@ -251,10 +250,6 @@ class Swarm { void SetNeighborIndices_(); - void CountReceivedParticles_(); - void UpdateNeighborBufferReceiveIndices_(ParArray1D &neighbor_index, - ParArray1D &buffer_index); - template SwarmVariablePack PackAllVariables_(PackIndexMap &vmap); @@ -273,28 +268,25 @@ class Swarm { std::tuple, MapToParticle> maps_; - std::list free_indices_; ParArray1D mask_; ParArray1D marked_for_removal_; + ParArray1D empty_indices_; // Indices of empty slots in particle pool ParArrayND block_index_; // Neighbor index for each particle. -1 for current block. ParArrayND neighbor_indices_; // Indexing of vbvar's neighbor array. -1 for same. // k,j indices unused in 3D&2D, 2D, respectively - ParArray1D new_indices_; // Persistent array that provides the new indices when - // AddEmptyParticles is called. Always defragmented. - int new_indices_max_idx_; // Maximum valid index of new_indices_ array. - ParArray1D from_to_indices_; // Array used for sorting particles during defragment - // step (size nmax_pool + 1). - ParArray1D recv_neighbor_index_; // Neighbor indices for received particles - ParArray1D recv_buffer_index_; // Buffer indices for received particles + ParArray1D new_indices_; // Persistent array that provides the new indices when + // AddEmptyParticles is called. Always defragmented. + int new_indices_max_idx_; // Maximum valid index of new_indices_ array. + ParArray1D scratch_a_; // Scratch memory for index sorting + ParArray1D scratch_b_; // Scratch memory for index sorting constexpr static int no_block_ = -2; constexpr static int this_block_ = -1; constexpr static int unset_index_ = -1; ParArray1D num_particles_to_send_; - ParArrayND particle_indices_to_send_; - - std::vector neighbor_received_particles_; + ParArray1D buffer_counters_; + ParArray1D neighbor_received_particles_; int total_received_particles_; ParArrayND neighbor_buffer_index_; // Map from neighbor index to neighbor bufid diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 07be992ac727..650bcef09304 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -130,36 +130,6 @@ void Swarm::SetNeighborIndices_() { } } - // Draft alternative approach due to LFR utilizing mesh offset comparisons at the - // highest refinement level - // auto ll_block = pmb->loc.GetDaughter(0, 0, 0); - // int finest_level = pmb->loc.level() + 1; - // for (auto &n : pmb->neighbors) { - // std::vector dlocs; - // auto &nloc = - // n.loc; // Would need to use the location in the coordinates of the origin tree - // if (nloc.level() == finest_level) { - // dlocs.emplace_back(nloc); - // } else if (nloc.level() == finest_level) { - // dlocs = nloc.GetDaughters(ndim); - // } else if (nloc.level() == finest_level - 2) { - // auto tlocs = nloc.GetDaughters(ndim); - // for (auto &t : tlocs) { - // auto gdlocs = t.GetDaughters(ndim); - // dlocs.insert(dlocs.end(), gdlocs.begin(), gdlocs.end()); - // } - // } else { - // PARTHENON_FAIL("Proper nesting is not being respected."); - // } - // for (auto &d : dlocs) { - // const int k = d.lx3() - ll_block.lx3() + 1; - // const int j = d.lx2() - ll_block.lx2() + 1; - // const int i = d.lx1() - ll_block.lx1() + 1; - // if (i >= 0 && i <= 3 && j >= 0 && j <= 3 && k >= 0 && k <= 3) - // neighbor_indices_h(k, j, i) = n.gid; - // } - //} - neighbor_indices_.DeepCopy(neighbor_indices_h); } @@ -174,8 +144,6 @@ void Swarm::SetupPersistentMPI() { // Build up convenience array of neighbor indices SetNeighborIndices_(); - neighbor_received_particles_.resize(nbmax); - // Build device array mapping neighbor index to neighbor bufid if (pmb->neighbors.size() > 0) { ParArrayND neighbor_buffer_index("Neighbor buffer index", pmb->neighbors.size()); @@ -188,7 +156,7 @@ void Swarm::SetupPersistentMPI() { } } -int Swarm::CountParticlesToSend_() { +void Swarm::CountParticlesToSend_() { auto mask_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), mask_); auto swarm_d = GetDeviceContext(); auto pmb = GetBlockPointer(); @@ -197,10 +165,6 @@ int Swarm::CountParticlesToSend_() { // Fence to make sure particles aren't currently being transported locally // TODO(BRR) do this operation on device. pmb->exec_space.fence(); - auto num_particles_to_send_h = num_particles_to_send_.GetHostMirror(); - for (int n = 0; n < pmb->neighbors.size(); n++) { - num_particles_to_send_h(n) = 0; - } const int particle_size = GetParticleDataSize(); vbswarm->particle_size = particle_size; @@ -218,63 +182,31 @@ int Swarm::CountParticlesToSend_() { } }); - int max_indices_size = 0; - int total_noblock_particles = 0; - auto block_index_h = block_index_.GetHostMirrorAndCopy(); - for (int n = 0; n <= max_active_index_; n++) { - if (mask_h(n)) { - // This particle should be sent - if (block_index_h(n) >= 0) { - num_particles_to_send_h(block_index_h(n))++; - if (max_indices_size < num_particles_to_send_h(block_index_h(n))) { - max_indices_size = num_particles_to_send_h(block_index_h(n)); - } - } - if (block_index_h(n) == no_block_) { - total_noblock_particles++; - } - } - } - // Size-0 arrays not permitted but we don't want to short-circuit subsequent logic - // that indicates completed communications - max_indices_size = std::max(1, max_indices_size); - - // Not a ragged-right array, just for convenience - if (total_noblock_particles > 0) { - auto noblock_indices = - ParArray1D("Particles with no block", total_noblock_particles); - auto noblock_indices_h = noblock_indices.GetHostMirror(); - int counter = 0; - for (int n = 0; n <= max_active_index_; n++) { - if (mask_h(n)) { - if (block_index_h(n) == no_block_) { - noblock_indices_h(counter) = n; - counter++; + // Facilitate lambda captures + auto &block_index = block_index_; + auto &num_particles_to_send = num_particles_to_send_; + + // Zero out number of particles to send before accumulating + pmb->par_for( + PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, + KOKKOS_LAMBDA(const int n) { num_particles_to_send[n] = 0; }); + + parthenon::par_for( + PARTHENON_AUTO_LABEL, 0, max_active_index, KOKKOS_LAMBDA(const int n) { + if (swarm_d.IsActive(n)) { + bool on_current_mesh_block = true; + swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); + + if (block_index(n) >= 0) { + Kokkos::atomic_add(&num_particles_to_send(block_index(n)), 1); + } } - } - } - noblock_indices.DeepCopy(noblock_indices_h); - } + }); - // TODO(BRR) don't allocate dynamically - particle_indices_to_send_ = - ParArrayND("Particle indices to send", nbmax, max_indices_size); - auto particle_indices_to_send_h = particle_indices_to_send_.GetHostMirror(); - std::vector counter(nbmax, 0); - for (int n = 0; n <= max_active_index_; n++) { - if (mask_h(n)) { - if (block_index_h(n) >= 0) { - particle_indices_to_send_h(block_index_h(n), counter[block_index_h(n)]) = n; - counter[block_index_h(n)]++; - } - } - } - num_particles_to_send_.DeepCopy(num_particles_to_send_h); - particle_indices_to_send_.DeepCopy(particle_indices_to_send_h); + auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy(); - num_particles_sent_ = 0; + // Resize send buffers if too small for (int n = 0; n < pmb->neighbors.size(); n++) { - // Resize buffer if too small const int bufid = pmb->neighbors[n].bufid; auto sendbuf = vbswarm->bd_var_.send[bufid]; if (sendbuf.extent(0) < num_particles_to_send_h(n) * particle_size) { @@ -282,13 +214,10 @@ int Swarm::CountParticlesToSend_() { vbswarm->bd_var_.send[bufid] = sendbuf; } vbswarm->send_size[bufid] = num_particles_to_send_h(n) * particle_size; - num_particles_sent_ += num_particles_to_send_h(n); } - - return max_indices_size; } -void Swarm::LoadBuffers_(const int max_indices_size) { +void Swarm::LoadBuffers_() { auto swarm_d = GetDeviceContext(); auto pmb = GetBlockPointer(); const int particle_size = GetParticleDataSize(); @@ -303,34 +232,45 @@ void Swarm::LoadBuffers_(const int max_indices_size) { const int realPackDim = vreal.GetDim(2); const int intPackDim = vint.GetDim(2); - // Pack index: - // [variable start] [swarm idx] + auto &x = Get(swarm_position::x::name()).Get(); + auto &y = Get(swarm_position::y::name()).Get(); + auto &z = Get(swarm_position::z::name()).Get(); + + // Zero buffer index counters + auto &buffer_counters = buffer_counters_; + pmb->par_for( + PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, + KOKKOS_LAMBDA(const int n) { buffer_counters[n] = 0; }); auto &bdvar = vbswarm->bd_var_; - auto num_particles_to_send = num_particles_to_send_; - auto particle_indices_to_send = particle_indices_to_send_; auto neighbor_buffer_index = neighbor_buffer_index_; + // Loop over active particles and use atomic operations to find indices into buffers if + // this particle will be sent. pmb->par_for( - PARTHENON_AUTO_LABEL, 0, max_indices_size - 1, - KOKKOS_LAMBDA(const int n) { // Max index - for (int m = 0; m < nneighbor; m++) { // Number of neighbors + PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { + if (swarm_d.IsActive(n)) { + bool on_current_mesh_block = true; + const int m = + swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); const int bufid = neighbor_buffer_index(m); - if (n < num_particles_to_send(m)) { - const int sidx = particle_indices_to_send(m, n); - int buffer_index = n * particle_size; - swarm_d.MarkParticleForRemoval(sidx); + + if (m >= 0) { + const int bid = Kokkos::atomic_fetch_add(&buffer_counters(m), 1); + int buffer_index = bid * particle_size; + swarm_d.MarkParticleForRemoval(n); for (int i = 0; i < realPackDim; i++) { - bdvar.send[bufid](buffer_index) = vreal(i, sidx); + bdvar.send[bufid](buffer_index) = vreal(i, n); buffer_index++; } for (int i = 0; i < intPackDim; i++) { - bdvar.send[bufid](buffer_index) = static_cast(vint(i, sidx)); + bdvar.send[bufid](buffer_index) = static_cast(vint(i, n)); buffer_index++; } } } }); + // Remove particles that were loaded to send to another block from this block RemoveMarkedParticles(); } @@ -339,99 +279,42 @@ void Swarm::Send(BoundaryCommSubset phase) { const int nneighbor = pmb->neighbors.size(); auto swarm_d = GetDeviceContext(); - if (nneighbor == 0) { - // TODO(BRR) Do we ever reach this branch? - // Process physical boundary conditions on "sent" particles - auto block_index_h = block_index_.GetHostMirrorAndCopy(); - auto mask_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), mask_); - - int total_sent_particles = 0; - pmb->par_reduce( - PARTHENON_AUTO_LABEL, 0, max_active_index_, - KOKKOS_LAMBDA(int n, int &total_sent_particles) { - if (swarm_d.IsActive(n)) { - if (!swarm_d.IsOnCurrentMeshBlock(n)) { - total_sent_particles++; - } - } - }, - Kokkos::Sum(total_sent_particles)); - - if (total_sent_particles > 0) { - ParArray1D new_indices("new indices", total_sent_particles); - auto new_indices_h = new_indices.GetHostMirrorAndCopy(); - int sent_particle_index = 0; - for (int n = 0; n <= max_active_index_; n++) { - if (mask_h(n)) { - if (block_index_h(n) >= 0 || block_index_h(n) == no_block_) { - new_indices_h(sent_particle_index) = n; - sent_particle_index++; - } - } - } - new_indices.DeepCopy(new_indices_h); - } - } else { - // Query particles for those to be sent - int max_indices_size = CountParticlesToSend_(); + // Query particles for those to be sent + CountParticlesToSend_(); - // Prepare buffers for send operations - LoadBuffers_(max_indices_size); + // Prepare buffers for send operations + LoadBuffers_(); - // Send buffer data - vbswarm->Send(phase); - } + // Send buffer data + vbswarm->Send(phase); } -void Swarm::CountReceivedParticles_() { +void Swarm::UnloadBuffers_() { auto pmb = GetBlockPointer(); + + // Count received particles total_received_particles_ = 0; + auto &neighbor_received_particles = neighbor_received_particles_; + auto neighbor_received_particles_h = neighbor_received_particles.GetHostMirror(); for (int n = 0; n < pmb->neighbors.size(); n++) { const int bufid = pmb->neighbors[n].bufid; if (vbswarm->bd_var_.flag[bufid] == BoundaryStatus::arrived) { PARTHENON_DEBUG_REQUIRE(vbswarm->recv_size[bufid] % vbswarm->particle_size == 0, "Receive buffer is not divisible by particle size!"); - neighbor_received_particles_[n] = + neighbor_received_particles_h(n) = vbswarm->recv_size[bufid] / vbswarm->particle_size; - total_received_particles_ += neighbor_received_particles_[n]; + total_received_particles_ += neighbor_received_particles_h(n); } else { - neighbor_received_particles_[n] = 0; + neighbor_received_particles_h(n) = 0; } } -} - -void Swarm::UpdateNeighborBufferReceiveIndices_(ParArray1D &neighbor_index, - ParArray1D &buffer_index) { - auto pmb = GetBlockPointer(); - auto neighbor_index_h = neighbor_index.GetHostMirror(); - auto buffer_index_h = - buffer_index.GetHostMirror(); // Index of each particle in its received buffer - - int id = 0; - for (int n = 0; n < pmb->neighbors.size(); n++) { - for (int m = 0; m < neighbor_received_particles_[n]; m++) { - neighbor_index_h(id) = n; - buffer_index_h(id) = m; - id++; - } - } - neighbor_index.DeepCopy(neighbor_index_h); - buffer_index.DeepCopy(buffer_index_h); -} - -void Swarm::UnloadBuffers_() { - auto pmb = GetBlockPointer(); - - CountReceivedParticles_(); auto &bdvar = vbswarm->bd_var_; + const int nbmax = vbswarm->bd_var_.nbmax; if (total_received_particles_ > 0) { auto newParticlesContext = AddEmptyParticles(total_received_particles_); - auto &recv_neighbor_index = recv_neighbor_index_; - auto &recv_buffer_index = recv_buffer_index_; - UpdateNeighborBufferReceiveIndices_(recv_neighbor_index, recv_buffer_index); auto neighbor_buffer_index = neighbor_buffer_index_; auto &int_vector = std::get()>(vectors_); @@ -443,18 +326,37 @@ void Swarm::UnloadBuffers_() { int realPackDim = vreal.GetDim(2); int intPackDim = vint.GetDim(2); - // construct map from buffer index to swarm index (or just return vector of - // indices!) const int particle_size = GetParticleDataSize(); auto swarm_d = GetDeviceContext(); + // Change meaning of neighbor_received_particles from particles per neighbor to + // cumulative particles per neighbor + int val_prev = 0; + for (int n = 0; n < nbmax; n++) { + int val_curr = neighbor_received_particles_h(n); + neighbor_received_particles_h(n) += val_prev; + val_prev += val_curr; + } + neighbor_received_particles.DeepCopy(neighbor_received_particles_h); + + auto &x = Get(swarm_position::x::name()).Get(); + auto &y = Get(swarm_position::y::name()).Get(); + auto &z = Get(swarm_position::z::name()).Get(); + pmb->par_for( PARTHENON_AUTO_LABEL, 0, newParticlesContext.GetNewParticlesMaxIndex(), // n is both new particle index and index over buffer values KOKKOS_LAMBDA(const int n) { const int sid = newParticlesContext.GetNewParticleIndex(n); - const int nid = recv_neighbor_index(n); - int bid = recv_buffer_index(n) * particle_size; + // Search for neighbor id over cumulative indices + int nid = 0; + while (n >= neighbor_received_particles(nid) && nid < nbmax - 1) { + nid++; + } + + // Convert neighbor id to buffer id + int bid = nid == 0 ? n * particle_size + : (n - neighbor_received_particles(nid - 1)) * particle_size; const int nbid = neighbor_buffer_index(nid); for (int i = 0; i < realPackDim; i++) { vreal(i, sid) = bdvar.recv[nbid](bid); diff --git a/src/interface/swarm_container.cpp b/src/interface/swarm_container.cpp index c449ef084c6a..9fc26a213e03 100644 --- a/src/interface/swarm_container.cpp +++ b/src/interface/swarm_container.cpp @@ -36,36 +36,6 @@ void SwarmContainer::Initialize(const std::shared_ptr resolved_ } } -void SwarmContainer::InitializeBoundaries(const std::shared_ptr pmb) { - if (swarmVector_.empty()) { - // No Swarms in this container, so no need to initialize boundaries - // This allows default reflecting boundary conditions to be used when no - // swarms are present in a parthenon calculation. - // NOTE SwarmContainer::Initialize must have already been called. - return; - } - - std::stringstream msg; - auto &bcs = pmb->pmy_mesh->mesh_bcs; - // Check that, if we are using user BCs, they are actually enrolled, and unsupported BCs - // are not being used - for (int iFace = 0; iFace < 6; iFace++) { - if (bcs[iFace] == BoundaryFlag::user) { - if (pmb->pmy_mesh->forest.GetTreePtr(pmb->loc.tree())->SwarmBndryFnctn[iFace] == - nullptr) { - msg << (iFace % 2 == 0 ? "i" : "o") << "x" << iFace / 2 + 1 - << " user boundary requested but provided function is null!"; - PARTHENON_FAIL(msg); - } - } else if (bcs[iFace] != BoundaryFlag::outflow && - bcs[iFace] != BoundaryFlag::periodic) { - msg << (iFace % 2 == 0 ? "i" : "o") << "x" << iFace / 2 + 1 << " boundary flag " - << static_cast(bcs[iFace]) << " not supported!"; - PARTHENON_FAIL(msg); - } - } -} - void SwarmContainer::Add(const std::vector &labelArray, const Metadata &metadata) { // generate the vector and call Add diff --git a/src/interface/swarm_container.hpp b/src/interface/swarm_container.hpp index 3e2072db3a64..1a1486b2a44c 100644 --- a/src/interface/swarm_container.hpp +++ b/src/interface/swarm_container.hpp @@ -90,8 +90,6 @@ class SwarmContainer { void Initialize(const std::shared_ptr resolved_packages, const std::shared_ptr pmb); - void InitializeBoundaries(const std::shared_ptr pmb); - void Add(std::shared_ptr swarm) { swarmVector_.push_back(swarm); swarmMap_[swarm->label()] = swarm; diff --git a/src/mesh/forest/tree.cpp b/src/mesh/forest/tree.cpp index d33fd757e353..fe1803882401 100644 --- a/src/mesh/forest/tree.cpp +++ b/src/mesh/forest/tree.cpp @@ -442,8 +442,6 @@ void Tree::EnrollBndryFncts( break; case BoundaryFlag::user: if (app_in->swarm_boundary_conditions[f] != nullptr) { - // This is checked to be non-null later in Swarm::AllocateBoundaries, in case user - // boundaries are requested but no swarms are used. SwarmBndryFnctn[f] = app_in->swarm_boundary_conditions[f]; } break; diff --git a/src/mesh/meshblock.cpp b/src/mesh/meshblock.cpp index 3f84ce48293b..0bccf068576d 100644 --- a/src/mesh/meshblock.cpp +++ b/src/mesh/meshblock.cpp @@ -144,9 +144,6 @@ void MeshBlock::Initialize(int igid, int ilid, LogicalLocation iloc, auto &real_container = meshblock_data.Get(); real_container->Initialize(shared_from_this()); - // Initialize swarm boundary condition flags - real_container->GetSwarmData()->InitializeBoundaries(shared_from_this()); - // TODO(jdolence): Should these loops be moved to Variable creation // TODO(JMM): What variables should be in vars_cc_? They are used // for counting load-balance cost. Should it be different than the diff --git a/src/tasks/tasks.hpp b/src/tasks/tasks.hpp index 91b9e103d6aa..d432f0a7ea21 100644 --- a/src/tasks/tasks.hpp +++ b/src/tasks/tasks.hpp @@ -544,6 +544,8 @@ class TaskRegion { public: TaskRegion() = delete; + TaskRegion(const TaskRegion &) = delete; // Prevent copying TaskRegions during AddRegion + // calls which is a segfault explicit TaskRegion(const int num_lists) : task_lists(num_lists) { for (int i = 0; i < num_lists; i++) task_lists[i].SetID(i); diff --git a/tst/unit/test_swarm.cpp b/tst/unit/test_swarm.cpp index f365362ddcc6..bcff16106110 100644 --- a/tst/unit/test_swarm.cpp +++ b/tst/unit/test_swarm.cpp @@ -212,8 +212,8 @@ TEST_CASE("Swarm memory management", "[Swarm]") { // Check that data was moved during defrag x_h = swarm->Get(swarm_position::x::name()).Get().GetHostMirrorAndCopy(); - REQUIRE(x_h(2) == 1.2); - REQUIRE(x_h(4) == 1.1); + REQUIRE(x_h(2) == 1.1); + REQUIRE(x_h(4) == 1.2); i_h = swarm->Get("i").Get().GetHostMirrorAndCopy(); REQUIRE(i_h(1) == 2);