From 3bc0824ecaee2216df6abe3f14d1767cc9a0a9da Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 31 Jul 2024 15:24:54 -0600 Subject: [PATCH 01/21] First kernel down --- src/interface/swarm_comms.cpp | 39 +++++------------------------------ 1 file changed, 5 insertions(+), 34 deletions(-) diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 8cd1f045b031..5a58d5a95d4b 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -218,11 +218,14 @@ int Swarm::CountParticlesToSend_() { } }); + // Facilitate lambda captures auto &block_index = block_index_; + auto &num_particles_to_send = num_particles_to_send_; + int max_indices_size = 0; parthenon::par_reduce( PARTHENON_AUTO_LABEL, 0, max_active_index, - KOKKOS_LAMBDA(const int n, const int &red) { + KOKKOS_LAMBDA(const int n, int &red) { if (swarm_d.IsActive(n)) { bool on_current_mesh_block = true; swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); @@ -234,44 +237,12 @@ int Swarm::CountParticlesToSend_() { }, Kokkos::Max(max_indices_size)); - // int max_indices_size = 0; - // int total_noblock_particles = 0; auto block_index_h = block_index_.GetHostMirrorAndCopy(); - // for (int n = 0; n <= max_active_index_; n++) { - // if (mask_h(n)) { - // // This particle should be sent - // if (block_index_h(n) >= 0) { - // num_particles_to_send_h(block_index_h(n))++; - // if (max_indices_size < num_particles_to_send_h(block_index_h(n))) { - // max_indices_size = num_particles_to_send_h(block_index_h(n)); - // } - // } - // if (block_index_h(n) == no_block_) { - // total_noblock_particles++; - // } - // } - //} + // Size-0 arrays not permitted but we don't want to short-circuit subsequent logic // that indicates completed communications max_indices_size = std::max(1, max_indices_size); - // Not a ragged-right array, just for convenience - if (total_noblock_particles > 0) { - auto noblock_indices = - ParArray1D("Particles with no block", total_noblock_particles); - auto noblock_indices_h = noblock_indices.GetHostMirror(); - int counter = 0; - for (int n = 0; n <= max_active_index_; n++) { - if (mask_h(n)) { - if (block_index_h(n) == no_block_) { - noblock_indices_h(counter) = n; - counter++; - } - } - } - noblock_indices.DeepCopy(noblock_indices_h); - } - // TODO(BRR) don't allocate dynamically particle_indices_to_send_ = ParArrayND("Particle indices to send", nbmax, max_indices_size); From e21af3b44044ad784be52d20cb6375ce05747857 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 31 Jul 2024 16:17:39 -0600 Subject: [PATCH 02/21] Further cleanup --- src/interface/swarm_comms.cpp | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 5a58d5a95d4b..89a664c987dc 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -237,28 +237,29 @@ int Swarm::CountParticlesToSend_() { }, Kokkos::Max(max_indices_size)); - auto block_index_h = block_index_.GetHostMirrorAndCopy(); + auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy(); // Size-0 arrays not permitted but we don't want to short-circuit subsequent logic // that indicates completed communications max_indices_size = std::max(1, max_indices_size); - // TODO(BRR) don't allocate dynamically - particle_indices_to_send_ = - ParArrayND("Particle indices to send", nbmax, max_indices_size); - auto particle_indices_to_send_h = particle_indices_to_send_.GetHostMirror(); - std::vector counter(nbmax, 0); - for (int n = 0; n <= max_active_index_; n++) { - if (mask_h(n)) { - if (block_index_h(n) >= 0) { - particle_indices_to_send_h(block_index_h(n), counter[block_index_h(n)]) = n; - counter[block_index_h(n)]++; - } - } - } - num_particles_to_send_.DeepCopy(num_particles_to_send_h); - particle_indices_to_send_.DeepCopy(particle_indices_to_send_h); + //// TODO(BRR) don't allocate dynamically + // particle_indices_to_send_ = + // ParArrayND("Particle indices to send", nbmax, max_indices_size); + // auto particle_indices_to_send_h = particle_indices_to_send_.GetHostMirror(); + // std::vector counter(nbmax, 0); + // for (int n = 0; n <= max_active_index_; n++) { + // if (mask_h(n)) { + // if (block_index_h(n) >= 0) { + // particle_indices_to_send_h(block_index_h(n), counter[block_index_h(n)]) = n; + // counter[block_index_h(n)]++; + // } + // } + //} + // num_particles_to_send_.DeepCopy(num_particles_to_send_h); + // particle_indices_to_send_.DeepCopy(particle_indices_to_send_h); + // Count total particles sent and resize buffers if too small num_particles_sent_ = 0; for (int n = 0; n < pmb->neighbors.size(); n++) { // Resize buffer if too small From 73abc93d56dd8c6bd635c0b2da24cf49a4b53593 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 31 Jul 2024 16:23:21 -0600 Subject: [PATCH 03/21] Notes --- src/interface/swarm.hpp | 2 +- src/interface/swarm_comms.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/interface/swarm.hpp b/src/interface/swarm.hpp index 5415e0be8201..a4125e196e17 100644 --- a/src/interface/swarm.hpp +++ b/src/interface/swarm.hpp @@ -297,7 +297,7 @@ class Swarm { constexpr static int unset_index_ = -1; ParArray1D num_particles_to_send_; - ParArrayND particle_indices_to_send_; + // ParArrayND particle_indices_to_send_; std::vector neighbor_received_particles_; int total_received_particles_; diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 89a664c987dc..48bd7cd3f369 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -277,6 +277,7 @@ int Swarm::CountParticlesToSend_() { } void Swarm::LoadBuffers_(const int max_indices_size) { + // TODO(BRR) rewrite without particle_indices_to_send auto swarm_d = GetDeviceContext(); auto pmb = GetBlockPointer(); const int particle_size = GetParticleDataSize(); @@ -328,6 +329,7 @@ void Swarm::Send(BoundaryCommSubset phase) { auto swarm_d = GetDeviceContext(); if (nneighbor == 0) { + PARTHENON_FAIL("Shouldn't be here... just remove this logic?"); // TODO(BRR) Do we ever reach this branch? // Process physical boundary conditions on "sent" particles auto block_index_h = block_index_.GetHostMirrorAndCopy(); From 5e084a7225fd308638e04767050c5057da1a7b42 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 13 Aug 2024 10:36:48 -0600 Subject: [PATCH 04/21] Send seems to work --- src/interface/swarm_comms.cpp | 144 ++++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 58 deletions(-) diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 48bd7cd3f369..82da892d5158 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -197,10 +197,10 @@ int Swarm::CountParticlesToSend_() { // Fence to make sure particles aren't currently being transported locally // TODO(BRR) do this operation on device. pmb->exec_space.fence(); - auto num_particles_to_send_h = num_particles_to_send_.GetHostMirror(); - for (int n = 0; n < pmb->neighbors.size(); n++) { - num_particles_to_send_h(n) = 0; - } + // auto num_particles_to_send_h = num_particles_to_send_.GetHostMirror(); + // for (int n = 0; n < pmb->neighbors.size(); n++) { + // num_particles_to_send_h(n) = 0; + //} const int particle_size = GetParticleDataSize(); vbswarm->particle_size = particle_size; @@ -222,6 +222,11 @@ int Swarm::CountParticlesToSend_() { auto &block_index = block_index_; auto &num_particles_to_send = num_particles_to_send_; + // Zero out number of particles to send before accumulating + pmb->par_for( + PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, + KOKKOS_LAMBDA(const int n) { num_particles_to_send[n] = 0; }); + int max_indices_size = 0; parthenon::par_reduce( PARTHENON_AUTO_LABEL, 0, max_active_index, @@ -266,6 +271,8 @@ int Swarm::CountParticlesToSend_() { const int bufid = pmb->neighbors[n].bufid; auto sendbuf = vbswarm->bd_var_.send[bufid]; if (sendbuf.extent(0) < num_particles_to_send_h(n) * particle_size) { + printf("resize buf (n: %i) bufid: %i size: %i\n", n, bufid, + num_particles_to_send_h(n)); sendbuf = BufArray1D("Buffer", num_particles_to_send_h(n) * particle_size); vbswarm->bd_var_.send[bufid] = sendbuf; } @@ -292,33 +299,83 @@ void Swarm::LoadBuffers_(const int max_indices_size) { const int realPackDim = vreal.GetDim(2); const int intPackDim = vint.GetDim(2); - // Pack index: - // [variable start] [swarm idx] + auto &x = Get(swarm_position::x::name()).Get(); + auto &y = Get(swarm_position::y::name()).Get(); + auto &z = Get(swarm_position::z::name()).Get(); + + // TODO(BRR) make this a Swarm member and initialize in constructor + ParArray1D buffer_counters("Buffer counters", NMAX_NEIGHBORS); + // Kokkos::fence(); + // printf("%s:%i\n", __FILE__, __LINE__); + + // Zero buffer index counters + pmb->par_for( + PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, + KOKKOS_LAMBDA(const int n) { buffer_counters[n] = 0; }); + // Kokkos::fence(); + // printf("%s:%i\n", __FILE__, __LINE__); auto &bdvar = vbswarm->bd_var_; - auto num_particles_to_send = num_particles_to_send_; - auto particle_indices_to_send = particle_indices_to_send_; auto neighbor_buffer_index = neighbor_buffer_index_; + // Loop over active particles and use atomic operations to find indices into buffers if + // this particle will be sent. pmb->par_for( - PARTHENON_AUTO_LABEL, 0, max_indices_size - 1, - KOKKOS_LAMBDA(const int n) { // Max index - for (int m = 0; m < nneighbor; m++) { // Number of neighbors + PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { + if (swarm_d.IsActive(n)) { + bool on_current_mesh_block = true; + const int m = + swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); + // TODO(BRR) need this map? const int bufid = neighbor_buffer_index(m); - if (n < num_particles_to_send(m)) { - const int sidx = particle_indices_to_send(m, n); - int buffer_index = n * particle_size; - swarm_d.MarkParticleForRemoval(sidx); + + if (m >= 0) { + printf("n: %i m %i\n", n, m); + const int bid = Kokkos::atomic_fetch_add(&buffer_counters(m), 1); + int buffer_index = bid * particle_size; + printf("n: %i m: %i bid: %i bufid: %i xyz: %e %e %e\n", n, m, bid, bufid, + x(n), y(n), z(n)); + swarm_d.MarkParticleForRemoval(n); for (int i = 0; i < realPackDim; i++) { - bdvar.send[bufid](buffer_index) = vreal(i, sidx); + bdvar.send[bufid](buffer_index) = vreal(i, n); buffer_index++; } for (int i = 0; i < intPackDim; i++) { - bdvar.send[bufid](buffer_index) = static_cast(vint(i, sidx)); + bdvar.send[bufid](buffer_index) = static_cast(vint(i, n)); buffer_index++; } } } }); + // Kokkos::fence(); + // printf("%s:%i\n", __FILE__, __LINE__); + + // Pack index: + // [variable start] [swarm idx] + + // auto &bdvar = vbswarm->bd_var_; + // auto num_particles_to_send = num_particles_to_send_; + // auto particle_indices_to_send = particle_indices_to_send_; + // auto neighbor_buffer_index = neighbor_buffer_index_; + // pmb->par_for( + // PARTHENON_AUTO_LABEL, 0, max_indices_size - 1, + // KOKKOS_LAMBDA(const int n) { // Max index + // for (int m = 0; m < nneighbor; m++) { // Number of neighbors + // const int bufid = neighbor_buffer_index(m); + // if (n < num_particles_to_send(m)) { + // const int sidx = particle_indices_to_send(m, n); + // int buffer_index = n * particle_size; + // swarm_d.MarkParticleForRemoval(sidx); + // for (int i = 0; i < realPackDim; i++) { + // bdvar.send[bufid](buffer_index) = vreal(i, sidx); + // buffer_index++; + // } + // for (int i = 0; i < intPackDim; i++) { + // bdvar.send[bufid](buffer_index) = static_cast(vint(i, sidx)); + // buffer_index++; + // } + // } + // } + // }); RemoveMarkedParticles(); } @@ -328,49 +385,14 @@ void Swarm::Send(BoundaryCommSubset phase) { const int nneighbor = pmb->neighbors.size(); auto swarm_d = GetDeviceContext(); - if (nneighbor == 0) { - PARTHENON_FAIL("Shouldn't be here... just remove this logic?"); - // TODO(BRR) Do we ever reach this branch? - // Process physical boundary conditions on "sent" particles - auto block_index_h = block_index_.GetHostMirrorAndCopy(); - auto mask_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), mask_); - - int total_sent_particles = 0; - pmb->par_reduce( - PARTHENON_AUTO_LABEL, 0, max_active_index_, - KOKKOS_LAMBDA(int n, int &total_sent_particles) { - if (swarm_d.IsActive(n)) { - if (!swarm_d.IsOnCurrentMeshBlock(n)) { - total_sent_particles++; - } - } - }, - Kokkos::Sum(total_sent_particles)); - - if (total_sent_particles > 0) { - ParArray1D new_indices("new indices", total_sent_particles); - auto new_indices_h = new_indices.GetHostMirrorAndCopy(); - int sent_particle_index = 0; - for (int n = 0; n <= max_active_index_; n++) { - if (mask_h(n)) { - if (block_index_h(n) >= 0 || block_index_h(n) == no_block_) { - new_indices_h(sent_particle_index) = n; - sent_particle_index++; - } - } - } - new_indices.DeepCopy(new_indices_h); - } - } else { - // Query particles for those to be sent - int max_indices_size = CountParticlesToSend_(); + // Query particles for those to be sent + int max_indices_size = CountParticlesToSend_(); - // Prepare buffers for send operations - LoadBuffers_(max_indices_size); + // Prepare buffers for send operations + LoadBuffers_(max_indices_size); - // Send buffer data - vbswarm->Send(phase); - } + // Send buffer data + vbswarm->Send(phase); } void Swarm::CountReceivedParticles_() { @@ -416,6 +438,11 @@ void Swarm::UnloadBuffers_() { auto &bdvar = vbswarm->bd_var_; + // Debug + auto &x = Get(swarm_position::x::name()).Get(); + auto &y = Get(swarm_position::y::name()).Get(); + auto &z = Get(swarm_position::z::name()).Get(); + if (total_received_particles_ > 0) { auto newParticlesContext = AddEmptyParticles(total_received_particles_); @@ -454,6 +481,7 @@ void Swarm::UnloadBuffers_() { vint(i, sid) = static_cast(bdvar.recv[nbid](bid)); bid++; } + printf("Received! [%i] xyz = %e %e %e\n", sid, x(sid), y(sid), z(sid)); }); } } From 5abcd5d53177c1616ab7b61fe1fb04d625b97aca Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 13 Aug 2024 10:50:14 -0600 Subject: [PATCH 05/21] Need to promote particles example to latest pattern --- src/interface/swarm.cpp | 1 + src/interface/swarm.hpp | 8 +-- src/interface/swarm_comms.cpp | 112 ++++++++-------------------------- 3 files changed, 30 insertions(+), 91 deletions(-) diff --git a/src/interface/swarm.cpp b/src/interface/swarm.cpp index 0d2cef0d12c3..f29d6b374c6c 100644 --- a/src/interface/swarm.cpp +++ b/src/interface/swarm.cpp @@ -75,6 +75,7 @@ Swarm::Swarm(const std::string &label, const Metadata &metadata, const int nmax_ recv_buffer_index_("recv_buffer_index_", nmax_pool_), scratch_a_("scratch_a_", nmax_pool_), scratch_b_("scratch_b_", nmax_pool_), num_particles_to_send_("num_particles_to_send_", NMAX_NEIGHBORS), + buffer_counters_("buffer_counters_", NMAX_NEIGHBORS), cell_sorted_("cell_sorted_", nmax_pool_), mpiStatus(true) { PARTHENON_REQUIRE_THROWS(typeid(Coordinates_t) == typeid(UniformCartesian), "SwarmDeviceContext only supports a uniform Cartesian mesh!"); diff --git a/src/interface/swarm.hpp b/src/interface/swarm.hpp index a4125e196e17..2a550d506b7b 100644 --- a/src/interface/swarm.hpp +++ b/src/interface/swarm.hpp @@ -231,13 +231,13 @@ class Swarm { PackIndexMap &vmap); // Temporarily public - int num_particles_sent_; + // TODO(BRR) Remove this and require downstream codes make their own boolean? bool finished_transport; - void LoadBuffers_(const int max_indices_size); + void LoadBuffers_(); void UnloadBuffers_(); - int CountParticlesToSend_(); // Must be public for launching kernel + void CountParticlesToSend_(); // Must be public for launching kernel template const auto &GetVariableVector() const { @@ -297,7 +297,7 @@ class Swarm { constexpr static int unset_index_ = -1; ParArray1D num_particles_to_send_; - // ParArrayND particle_indices_to_send_; + ParArray1D buffer_counters_; std::vector neighbor_received_particles_; int total_received_particles_; diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 82da892d5158..6c34bcd5bbee 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -188,7 +188,7 @@ void Swarm::SetupPersistentMPI() { } } -int Swarm::CountParticlesToSend_() { +void Swarm::CountParticlesToSend_() { auto mask_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), mask_); auto swarm_d = GetDeviceContext(); auto pmb = GetBlockPointer(); @@ -227,64 +227,47 @@ int Swarm::CountParticlesToSend_() { PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, KOKKOS_LAMBDA(const int n) { num_particles_to_send[n] = 0; }); - int max_indices_size = 0; - parthenon::par_reduce( - PARTHENON_AUTO_LABEL, 0, max_active_index, - KOKKOS_LAMBDA(const int n, int &red) { + // int max_indices_size = 0; + // parthenon::par_reduce( + // PARTHENON_AUTO_LABEL, 0, max_active_index, + // KOKKOS_LAMBDA(const int n, int &red) { + // if (swarm_d.IsActive(n)) { + // bool on_current_mesh_block = true; + // swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); + + // if (block_index(n) >= 0) { + // red = Kokkos::atomic_add_fetch(&num_particles_to_send(block_index(n)), 1); + // } + // } + // }, + // Kokkos::Max(max_indices_size)); + parthenon::par_for( + PARTHENON_AUTO_LABEL, 0, max_active_index, KOKKOS_LAMBDA(const int n) { if (swarm_d.IsActive(n)) { bool on_current_mesh_block = true; swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); if (block_index(n) >= 0) { - red = Kokkos::atomic_add_fetch(&num_particles_to_send(block_index(n)), 1); + Kokkos::atomic_add(&num_particles_to_send(block_index(n)), 1); } } - }, - Kokkos::Max(max_indices_size)); + }); auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy(); - // Size-0 arrays not permitted but we don't want to short-circuit subsequent logic - // that indicates completed communications - max_indices_size = std::max(1, max_indices_size); - - //// TODO(BRR) don't allocate dynamically - // particle_indices_to_send_ = - // ParArrayND("Particle indices to send", nbmax, max_indices_size); - // auto particle_indices_to_send_h = particle_indices_to_send_.GetHostMirror(); - // std::vector counter(nbmax, 0); - // for (int n = 0; n <= max_active_index_; n++) { - // if (mask_h(n)) { - // if (block_index_h(n) >= 0) { - // particle_indices_to_send_h(block_index_h(n), counter[block_index_h(n)]) = n; - // counter[block_index_h(n)]++; - // } - // } - //} - // num_particles_to_send_.DeepCopy(num_particles_to_send_h); - // particle_indices_to_send_.DeepCopy(particle_indices_to_send_h); - - // Count total particles sent and resize buffers if too small - num_particles_sent_ = 0; + // Resize send buffers if too small for (int n = 0; n < pmb->neighbors.size(); n++) { - // Resize buffer if too small const int bufid = pmb->neighbors[n].bufid; auto sendbuf = vbswarm->bd_var_.send[bufid]; if (sendbuf.extent(0) < num_particles_to_send_h(n) * particle_size) { - printf("resize buf (n: %i) bufid: %i size: %i\n", n, bufid, - num_particles_to_send_h(n)); sendbuf = BufArray1D("Buffer", num_particles_to_send_h(n) * particle_size); vbswarm->bd_var_.send[bufid] = sendbuf; } vbswarm->send_size[bufid] = num_particles_to_send_h(n) * particle_size; - num_particles_sent_ += num_particles_to_send_h(n); } - - return max_indices_size; } -void Swarm::LoadBuffers_(const int max_indices_size) { - // TODO(BRR) rewrite without particle_indices_to_send +void Swarm::LoadBuffers_() { auto swarm_d = GetDeviceContext(); auto pmb = GetBlockPointer(); const int particle_size = GetParticleDataSize(); @@ -303,17 +286,11 @@ void Swarm::LoadBuffers_(const int max_indices_size) { auto &y = Get(swarm_position::y::name()).Get(); auto &z = Get(swarm_position::z::name()).Get(); - // TODO(BRR) make this a Swarm member and initialize in constructor - ParArray1D buffer_counters("Buffer counters", NMAX_NEIGHBORS); - // Kokkos::fence(); - // printf("%s:%i\n", __FILE__, __LINE__); - // Zero buffer index counters + auto &buffer_counters = buffer_counters_; pmb->par_for( PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, KOKKOS_LAMBDA(const int n) { buffer_counters[n] = 0; }); - // Kokkos::fence(); - // printf("%s:%i\n", __FILE__, __LINE__); auto &bdvar = vbswarm->bd_var_; auto neighbor_buffer_index = neighbor_buffer_index_; @@ -325,15 +302,11 @@ void Swarm::LoadBuffers_(const int max_indices_size) { bool on_current_mesh_block = true; const int m = swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); - // TODO(BRR) need this map? const int bufid = neighbor_buffer_index(m); if (m >= 0) { - printf("n: %i m %i\n", n, m); const int bid = Kokkos::atomic_fetch_add(&buffer_counters(m), 1); int buffer_index = bid * particle_size; - printf("n: %i m: %i bid: %i bufid: %i xyz: %e %e %e\n", n, m, bid, bufid, - x(n), y(n), z(n)); swarm_d.MarkParticleForRemoval(n); for (int i = 0; i < realPackDim; i++) { bdvar.send[bufid](buffer_index) = vreal(i, n); @@ -346,37 +319,8 @@ void Swarm::LoadBuffers_(const int max_indices_size) { } } }); - // Kokkos::fence(); - // printf("%s:%i\n", __FILE__, __LINE__); - - // Pack index: - // [variable start] [swarm idx] - - // auto &bdvar = vbswarm->bd_var_; - // auto num_particles_to_send = num_particles_to_send_; - // auto particle_indices_to_send = particle_indices_to_send_; - // auto neighbor_buffer_index = neighbor_buffer_index_; - // pmb->par_for( - // PARTHENON_AUTO_LABEL, 0, max_indices_size - 1, - // KOKKOS_LAMBDA(const int n) { // Max index - // for (int m = 0; m < nneighbor; m++) { // Number of neighbors - // const int bufid = neighbor_buffer_index(m); - // if (n < num_particles_to_send(m)) { - // const int sidx = particle_indices_to_send(m, n); - // int buffer_index = n * particle_size; - // swarm_d.MarkParticleForRemoval(sidx); - // for (int i = 0; i < realPackDim; i++) { - // bdvar.send[bufid](buffer_index) = vreal(i, sidx); - // buffer_index++; - // } - // for (int i = 0; i < intPackDim; i++) { - // bdvar.send[bufid](buffer_index) = static_cast(vint(i, sidx)); - // buffer_index++; - // } - // } - // } - // }); + // Remove particles that were loaded to send to another block from this block RemoveMarkedParticles(); } @@ -386,10 +330,10 @@ void Swarm::Send(BoundaryCommSubset phase) { auto swarm_d = GetDeviceContext(); // Query particles for those to be sent - int max_indices_size = CountParticlesToSend_(); + CountParticlesToSend_(); // Prepare buffers for send operations - LoadBuffers_(max_indices_size); + LoadBuffers_(); // Send buffer data vbswarm->Send(phase); @@ -438,11 +382,6 @@ void Swarm::UnloadBuffers_() { auto &bdvar = vbswarm->bd_var_; - // Debug - auto &x = Get(swarm_position::x::name()).Get(); - auto &y = Get(swarm_position::y::name()).Get(); - auto &z = Get(swarm_position::z::name()).Get(); - if (total_received_particles_ > 0) { auto newParticlesContext = AddEmptyParticles(total_received_particles_); @@ -481,7 +420,6 @@ void Swarm::UnloadBuffers_() { vint(i, sid) = static_cast(bdvar.recv[nbid](bid)); bid++; } - printf("Received! [%i] xyz = %e %e %e\n", sid, x(sid), y(sid), z(sid)); }); } } From 27b1c8e34fea5e4aabf375e21681a30a5ac0f569 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 13 Aug 2024 11:30:52 -0600 Subject: [PATCH 06/21] May have fixed particles example --- example/particles/particles.cpp | 262 ++++++++++++++++++++------------ example/particles/particles.hpp | 3 +- 2 files changed, 166 insertions(+), 99 deletions(-) diff --git a/example/particles/particles.cpp b/example/particles/particles.cpp index 8a8d47ef111a..48bc002f891b 100644 --- a/example/particles/particles.cpp +++ b/example/particles/particles.cpp @@ -229,7 +229,7 @@ TaskStatus DepositParticles(MeshBlock *pmb) { return TaskStatus::complete; } -TaskStatus CreateSomeParticles(MeshBlock *pmb, const double t0) { +TaskStatus CreateSomeParticles(MeshBlock *pmb, const Real t0) { PARTHENON_INSTRUMENT auto pkg = pmb->packages.Get("particles_package"); @@ -340,8 +340,7 @@ TaskStatus CreateSomeParticles(MeshBlock *pmb, const double t0) { return TaskStatus::complete; } -TaskStatus TransportParticles(MeshBlock *pmb, const StagedIntegrator *integrator, - const double t0) { +TaskStatus TransportParticles(MeshBlock *pmb, const Real t0, const Real dt) { PARTHENON_INSTRUMENT auto swarm = pmb->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); @@ -350,8 +349,6 @@ TaskStatus TransportParticles(MeshBlock *pmb, const StagedIntegrator *integrator int max_active_index = swarm->GetMaxActiveIndex(); - Real dt = integrator->dt; - auto &t = swarm->Get("t").Get(); auto &x = swarm->Get(swarm_position::x::name()).Get(); auto &y = swarm->Get(swarm_position::y::name()).Get(); @@ -466,10 +463,74 @@ TaskStatus TransportParticles(MeshBlock *pmb, const StagedIntegrator *integrator return TaskStatus::complete; } +TaskStatus CheckCompletion(MeshBlock *pmb, const Real tf) { + auto swarm = pmb->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); + + int max_active_index = swarm->GetMaxActiveIndex(); + + auto &t = swarm->Get("t").Get(); + + auto swarm_d = swarm->GetDeviceContext(); + + int num_unfinished = 0; + parthenon::par_reduce( + PARTHENON_AUTO_LABEL, 0, max_active_index, + KOKKOS_LAMBDA(const int n, int &num_unfinished) { + if (swarm_d.IsActive(n)) { + if (t(n) < tf) { + num_unfinished++; + } + } + }, + Kokkos::Sum(num_unfinished)); + + if (num_unfinished > 0) { + return TaskStatus::iterate; + } else { + return TaskStatus::complete; + } +} + +TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { + using TQ = TaskQualifier; + + TaskCollection tc; + TaskID none(0); + BlockList_t &blocks = pmesh->block_list; + + const int max_transport_iterations = 1000; + + const Real t0 = tm.time; + const Real dt = tm.dt; + + auto num_task_lists_executed_independently = blocks.size(); + TaskRegion ® = tc.AddRegion(num_task_lists_executed_independently); + + for (int i = 0; i < blocks.size(); i++) { + auto &pmb = blocks[i]; + auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); + auto &sc1 = pmb->meshblock_data.Get(); + auto &tl = reg[i]; + + auto [itl, push] = tl.AddSublist(none, {i, max_transport_iterations}); + auto transport = itl.AddTask(none, TransportParticles, pmb.get(), t0, dt); + auto reset_comms = + itl.AddTask(transport, &SwarmContainer::ResetCommunication, sc.get()); + auto send = itl.AddTask(reset_comms, &SwarmContainer::Send, sc.get(), + BoundaryCommSubset::all); + auto receive = + itl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); + auto complete = itl.AddTask(TQ::once_per_region | TQ::global_sync | TQ::completion, + receive, CheckCompletion, pmb.get(), t0 + dt); + } + + return tc; +} + // Custom step function to allow for looping over MPI-related tasks until complete TaskListStatus ParticleDriver::Step() { TaskListStatus status; - integrator.dt = tm.dt; + // integrator.dt = tm.dt; // TODO(BRR) remove? require first order time integrator? BlockList_t &blocks = pmesh->block_list; auto num_task_lists_executed_independently = blocks.size(); @@ -477,23 +538,26 @@ TaskListStatus ParticleDriver::Step() { // Create all the particles that will be created during the step status = MakeParticlesCreationTaskCollection().Execute(); - // Loop over repeated MPI calls until every particle is finished. This logic is - // required because long-distance particle pushes can lead to a large, unpredictable - // number of MPI sends and receives. - bool particles_update_done = false; - while (!particles_update_done) { - status = MakeParticlesUpdateTaskCollection().Execute(); - - particles_update_done = true; - for (auto &block : blocks) { - // TODO(BRR) Despite this "my_particles"-specific call, this function feels like it - // should be generalized - auto swarm = block->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); - if (!swarm->finished_transport) { - particles_update_done = false; - } - } - } + status = MakeParticlesTransportTaskCollection().Execute(); + + //// Loop over repeated MPI calls until every particle is finished. This logic is + //// required because long-distance particle pushes can lead to a large, unpredictable + //// number of MPI sends and receives. + // bool particles_update_done = false; + // while (!particles_update_done) { + // status = MakeParticlesUpdateTaskCollection().Execute(); + + // particles_update_done = true; + // for (auto &block : blocks) { + // // TODO(BRR) Despite this "my_particles"-specific call, this function feels like + // it + // // should be generalized + // auto swarm = block->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); + // if (!swarm->finished_transport) { + // particles_update_done = false; + // } + // } + //} // Use a more traditional task list for predictable post-MPI evaluations. status = MakeFinalizationTaskCollection().Execute(); @@ -504,66 +568,68 @@ TaskListStatus ParticleDriver::Step() { // TODO(BRR) This should really be in parthenon/src... but it can't just live in Swarm // because of the loop over blocks TaskStatus StopCommunicationMesh(const BlockList_t &blocks) { - PARTHENON_INSTRUMENT - - int num_sent_local = 0; - for (auto &block : blocks) { - auto sc = block->meshblock_data.Get()->GetSwarmData(); - auto swarm = sc->Get("my_particles"); - swarm->finished_transport = false; - num_sent_local += swarm->num_particles_sent_; - } - - int num_sent_global = num_sent_local; // potentially overwritten by following Allreduce -#ifdef MPI_PARALLEL - for (auto &block : blocks) { - auto swarm = block->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); - for (int n = 0; n < block->neighbors.size(); n++) { - NeighborBlock &nb = block->neighbors[n]; - // TODO(BRR) May want logic like this if we have non-blocking TaskRegions - // if (nb.snb.rank != Globals::my_rank) { - // if (swarm->vbswarm->bd_var_.flag[nb.bufid] != BoundaryStatus::completed) { - // return TaskStatus::incomplete; - // } - //} - - // TODO(BRR) May want to move this logic into a per-cycle initialization call - if (swarm->vbswarm->bd_var_.flag[nb.bufid] == BoundaryStatus::completed) { - swarm->vbswarm->bd_var_.req_send[nb.bufid] = MPI_REQUEST_NULL; - } - } - } - - MPI_Allreduce(&num_sent_local, &num_sent_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); -#endif // MPI_PARALLEL - - if (num_sent_global == 0) { - for (auto &block : blocks) { - auto &pmb = block; - auto sc = pmb->meshblock_data.Get()->GetSwarmData(); - auto swarm = sc->Get("my_particles"); - swarm->finished_transport = true; - } - } - - // Reset boundary statuses - for (auto &block : blocks) { - auto &pmb = block; - auto sc = pmb->meshblock_data.Get()->GetSwarmData(); - auto swarm = sc->Get("my_particles"); - for (int n = 0; n < pmb->neighbors.size(); n++) { - auto &nb = block->neighbors[n]; - swarm->vbswarm->bd_var_.flag[nb.bufid] = BoundaryStatus::waiting; - } - } - + // PARTHENON_INSTRUMENT + // + // int num_sent_local = 0; + // for (auto &block : blocks) { + // auto sc = block->meshblock_data.Get()->GetSwarmData(); + // auto swarm = sc->Get("my_particles"); + // swarm->finished_transport = false; + // num_sent_local += swarm->num_particles_sent_; + // } + // + // int num_sent_global = num_sent_local; // potentially overwritten by following + // Allreduce + //#ifdef MPI_PARALLEL + // for (auto &block : blocks) { + // auto swarm = block->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); + // for (int n = 0; n < block->neighbors.size(); n++) { + // NeighborBlock &nb = block->neighbors[n]; + // // TODO(BRR) May want logic like this if we have non-blocking TaskRegions + // // if (nb.snb.rank != Globals::my_rank) { + // // if (swarm->vbswarm->bd_var_.flag[nb.bufid] != BoundaryStatus::completed) { + // // return TaskStatus::incomplete; + // // } + // //} + // + // // TODO(BRR) May want to move this logic into a per-cycle initialization call + // if (swarm->vbswarm->bd_var_.flag[nb.bufid] == BoundaryStatus::completed) { + // swarm->vbswarm->bd_var_.req_send[nb.bufid] = MPI_REQUEST_NULL; + // } + // } + // } + // + // MPI_Allreduce(&num_sent_local, &num_sent_global, 1, MPI_INT, MPI_SUM, + // MPI_COMM_WORLD); + //#endif // MPI_PARALLEL + // + // if (num_sent_global == 0) { + // for (auto &block : blocks) { + // auto &pmb = block; + // auto sc = pmb->meshblock_data.Get()->GetSwarmData(); + // auto swarm = sc->Get("my_particles"); + // swarm->finished_transport = true; + // } + // } + // + // // Reset boundary statuses + // for (auto &block : blocks) { + // auto &pmb = block; + // auto sc = pmb->meshblock_data.Get()->GetSwarmData(); + // auto swarm = sc->Get("my_particles"); + // for (int n = 0; n < pmb->neighbors.size(); n++) { + // auto &nb = block->neighbors[n]; + // swarm->vbswarm->bd_var_.flag[nb.bufid] = BoundaryStatus::waiting; + // } + // } + // return TaskStatus::complete; } TaskCollection ParticleDriver::MakeParticlesCreationTaskCollection() const { TaskCollection tc; TaskID none(0); - const double t0 = tm.time; + const Real t0 = tm.time; const BlockList_t &blocks = pmesh->block_list; auto num_task_lists_executed_independently = blocks.size(); @@ -579,34 +645,34 @@ TaskCollection ParticleDriver::MakeParticlesCreationTaskCollection() const { TaskCollection ParticleDriver::MakeParticlesUpdateTaskCollection() const { TaskCollection tc; - TaskID none(0); - const double t0 = tm.time; - const BlockList_t &blocks = pmesh->block_list; + // TaskID none(0); + // const Real t0 = tm.time; + // const BlockList_t &blocks = pmesh->block_list; - auto num_task_lists_executed_independently = blocks.size(); + // auto num_task_lists_executed_independently = blocks.size(); - TaskRegion &async_region0 = tc.AddRegion(num_task_lists_executed_independently); - for (int i = 0; i < blocks.size(); i++) { - auto &pmb = blocks[i]; + // TaskRegion &async_region0 = tc.AddRegion(num_task_lists_executed_independently); + // for (int i = 0; i < blocks.size(); i++) { + // auto &pmb = blocks[i]; - auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); + // auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); - auto &tl = async_region0[i]; + // auto &tl = async_region0[i]; - auto transport_particles = - tl.AddTask(none, TransportParticles, pmb.get(), &integrator, t0); + // auto transport_particles = + // tl.AddTask(none, TransportParticles, pmb.get(), &integrator, t0); - auto send = tl.AddTask(transport_particles, &SwarmContainer::Send, sc.get(), - BoundaryCommSubset::all); - auto receive = - tl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); - } + // auto send = tl.AddTask(transport_particles, &SwarmContainer::Send, sc.get(), + // BoundaryCommSubset::all); + // auto receive = + // tl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); + //} - TaskRegion &sync_region0 = tc.AddRegion(1); - { - auto &tl = sync_region0[0]; - auto stop_comm = tl.AddTask(none, StopCommunicationMesh, blocks); - } + // TaskRegion &sync_region0 = tc.AddRegion(1); + //{ + // auto &tl = sync_region0[0]; + // auto stop_comm = tl.AddTask(none, StopCommunicationMesh, blocks); + //} return tc; } diff --git a/example/particles/particles.hpp b/example/particles/particles.hpp index 6d74d59dbbb8..7c79f4670219 100644 --- a/example/particles/particles.hpp +++ b/example/particles/particles.hpp @@ -33,7 +33,8 @@ class ParticleDriver : public EvolutionDriver { ParticleDriver(ParameterInput *pin, ApplicationInput *app_in, Mesh *pm) : EvolutionDriver(pin, app_in, pm), integrator(pin) {} TaskCollection MakeParticlesCreationTaskCollection() const; - TaskCollection MakeParticlesUpdateTaskCollection() const; + TaskCollection MakeParticlesUpdateTaskCollection() const; // TODO(BRR) remove + TaskCollection MakeParticlesTransportTaskCollection() const; TaskCollection MakeFinalizationTaskCollection() const; TaskListStatus Step(); From 3df0f265629f7821c0fb3a13619d9bec8bbee08d Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 13 Aug 2024 11:36:54 -0600 Subject: [PATCH 07/21] cycles... --- example/particles/parthinput.particles | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/example/particles/parthinput.particles b/example/particles/parthinput.particles index bca9ec93292e..a79f774c6850 100644 --- a/example/particles/parthinput.particles +++ b/example/particles/parthinput.particles @@ -24,10 +24,8 @@ refinement = none nx1 = 16 x1min = -0.5 x1max = 0.5 -ix1_bc = user -ox1_bc = user -# ix1_bc = periodic # Optionally use periodic boundary conditions everywhere -# ox1_bc = periodic +ix1_bc = periodic +ox1_bc = periodic nx2 = 16 x2min = -0.5 From 1e0ec29adf75951a5d62705ba4d84dbe20025085 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 13 Aug 2024 12:47:48 -0600 Subject: [PATCH 08/21] iterative tasking is only iterating on one meshblock in particles example... --- example/particles/particles.cpp | 147 ++++++-------------------------- example/particles/particles.hpp | 1 - src/interface/swarm_comms.cpp | 3 + 3 files changed, 30 insertions(+), 121 deletions(-) diff --git a/example/particles/particles.cpp b/example/particles/particles.cpp index 48bc002f891b..33ae20d2b243 100644 --- a/example/particles/particles.cpp +++ b/example/particles/particles.cpp @@ -342,6 +342,11 @@ TaskStatus CreateSomeParticles(MeshBlock *pmb, const Real t0) { TaskStatus TransportParticles(MeshBlock *pmb, const Real t0, const Real dt) { PARTHENON_INSTRUMENT + if (dt < 1.e-10) { + printf("dt: %e\n", dt); + } + + printf("TransportParticles gid: %i\n", pmb->gid); auto swarm = pmb->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); auto pkg = pmb->packages.Get("particles_package"); @@ -373,6 +378,8 @@ TaskStatus TransportParticles(MeshBlock *pmb, const Real t0, const Real dt) { auto swarm_d = swarm->GetDeviceContext(); + printf("HERE! orbit? %i\n", orbiting_particles); + // Simple particle push: push particles half the smallest zone width until they have // traveled one integrator timestep's worth of time. Particles orbit the origin. if (orbiting_particles) { @@ -444,6 +451,9 @@ TaskStatus TransportParticles(MeshBlock *pmb, const Real t0, const Real dt) { y(n) += v(1, n) * dt_push; z(n) += v(2, n) * dt_push; t(n) += dt_push; + // if (n < 3) { + // printf("[%i] t: %e xyz: %e %e %e\n", n, t(n), x(n), y(n), z(n)); + //} bool on_current_mesh_block = true; // This call is required to trigger internal boundary condition machinery @@ -464,6 +474,7 @@ TaskStatus TransportParticles(MeshBlock *pmb, const Real t0, const Real dt) { } TaskStatus CheckCompletion(MeshBlock *pmb, const Real tf) { + printf("CheckCompletion gid: %i\n", pmb->gid); auto swarm = pmb->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); int max_active_index = swarm->GetMaxActiveIndex(); @@ -483,15 +494,19 @@ TaskStatus CheckCompletion(MeshBlock *pmb, const Real tf) { } }, Kokkos::Sum(num_unfinished)); + printf("num_unfinished: %i\n", num_unfinished); if (num_unfinished > 0) { + printf("ITERATE! %i\n", pmb->gid); return TaskStatus::iterate; } else { + printf("COMPLETE! %i\n", pmb->gid); return TaskStatus::complete; } } TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { + printf("Well I'm here...\n"); using TQ = TaskQualifier; TaskCollection tc; @@ -503,16 +518,18 @@ TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { const Real t0 = tm.time; const Real dt = tm.dt; - auto num_task_lists_executed_independently = blocks.size(); - TaskRegion ® = tc.AddRegion(num_task_lists_executed_independently); + auto ® = tc.AddRegion(blocks.size()); for (int i = 0; i < blocks.size(); i++) { + printf("i: %i\n", i); auto &pmb = blocks[i]; auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); - auto &sc1 = pmb->meshblock_data.Get(); auto &tl = reg[i]; - auto [itl, push] = tl.AddSublist(none, {i, max_transport_iterations}); + auto pushtmp = tl.AddTask(none, TransportParticles, pmb.get(), t0, 0.); + + // auto [itl, push] = tl.AddSublist(none, {i, max_transport_iterations}); + auto [itl, push] = tl.AddSublist(pushtmp, {i, max_transport_iterations}); auto transport = itl.AddTask(none, TransportParticles, pmb.get(), t0, dt); auto reset_comms = itl.AddTask(transport, &SwarmContainer::ResetCommunication, sc.get()); @@ -520,8 +537,11 @@ TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { BoundaryCommSubset::all); auto receive = itl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); - auto complete = itl.AddTask(TQ::once_per_region | TQ::global_sync | TQ::completion, - receive, CheckCompletion, pmb.get(), t0 + dt); + // auto complete = itl.AddTask(TQ::once_per_region | TQ::global_sync | TQ::completion, + auto complete = itl.AddTask(TQ::global_sync | TQ::completion, receive, + CheckCompletion, pmb.get(), t0 + dt); + + auto pushtmp2 = tl.AddTask(push, TransportParticles, pmb.get(), t0, 0.); } return tc; @@ -538,94 +558,15 @@ TaskListStatus ParticleDriver::Step() { // Create all the particles that will be created during the step status = MakeParticlesCreationTaskCollection().Execute(); + // Transport particles iteratively until all particles reach final time status = MakeParticlesTransportTaskCollection().Execute(); - //// Loop over repeated MPI calls until every particle is finished. This logic is - //// required because long-distance particle pushes can lead to a large, unpredictable - //// number of MPI sends and receives. - // bool particles_update_done = false; - // while (!particles_update_done) { - // status = MakeParticlesUpdateTaskCollection().Execute(); - - // particles_update_done = true; - // for (auto &block : blocks) { - // // TODO(BRR) Despite this "my_particles"-specific call, this function feels like - // it - // // should be generalized - // auto swarm = block->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); - // if (!swarm->finished_transport) { - // particles_update_done = false; - // } - // } - //} - // Use a more traditional task list for predictable post-MPI evaluations. status = MakeFinalizationTaskCollection().Execute(); return status; } -// TODO(BRR) This should really be in parthenon/src... but it can't just live in Swarm -// because of the loop over blocks -TaskStatus StopCommunicationMesh(const BlockList_t &blocks) { - // PARTHENON_INSTRUMENT - // - // int num_sent_local = 0; - // for (auto &block : blocks) { - // auto sc = block->meshblock_data.Get()->GetSwarmData(); - // auto swarm = sc->Get("my_particles"); - // swarm->finished_transport = false; - // num_sent_local += swarm->num_particles_sent_; - // } - // - // int num_sent_global = num_sent_local; // potentially overwritten by following - // Allreduce - //#ifdef MPI_PARALLEL - // for (auto &block : blocks) { - // auto swarm = block->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); - // for (int n = 0; n < block->neighbors.size(); n++) { - // NeighborBlock &nb = block->neighbors[n]; - // // TODO(BRR) May want logic like this if we have non-blocking TaskRegions - // // if (nb.snb.rank != Globals::my_rank) { - // // if (swarm->vbswarm->bd_var_.flag[nb.bufid] != BoundaryStatus::completed) { - // // return TaskStatus::incomplete; - // // } - // //} - // - // // TODO(BRR) May want to move this logic into a per-cycle initialization call - // if (swarm->vbswarm->bd_var_.flag[nb.bufid] == BoundaryStatus::completed) { - // swarm->vbswarm->bd_var_.req_send[nb.bufid] = MPI_REQUEST_NULL; - // } - // } - // } - // - // MPI_Allreduce(&num_sent_local, &num_sent_global, 1, MPI_INT, MPI_SUM, - // MPI_COMM_WORLD); - //#endif // MPI_PARALLEL - // - // if (num_sent_global == 0) { - // for (auto &block : blocks) { - // auto &pmb = block; - // auto sc = pmb->meshblock_data.Get()->GetSwarmData(); - // auto swarm = sc->Get("my_particles"); - // swarm->finished_transport = true; - // } - // } - // - // // Reset boundary statuses - // for (auto &block : blocks) { - // auto &pmb = block; - // auto sc = pmb->meshblock_data.Get()->GetSwarmData(); - // auto swarm = sc->Get("my_particles"); - // for (int n = 0; n < pmb->neighbors.size(); n++) { - // auto &nb = block->neighbors[n]; - // swarm->vbswarm->bd_var_.flag[nb.bufid] = BoundaryStatus::waiting; - // } - // } - // - return TaskStatus::complete; -} - TaskCollection ParticleDriver::MakeParticlesCreationTaskCollection() const { TaskCollection tc; TaskID none(0); @@ -643,40 +584,6 @@ TaskCollection ParticleDriver::MakeParticlesCreationTaskCollection() const { return tc; } -TaskCollection ParticleDriver::MakeParticlesUpdateTaskCollection() const { - TaskCollection tc; - // TaskID none(0); - // const Real t0 = tm.time; - // const BlockList_t &blocks = pmesh->block_list; - - // auto num_task_lists_executed_independently = blocks.size(); - - // TaskRegion &async_region0 = tc.AddRegion(num_task_lists_executed_independently); - // for (int i = 0; i < blocks.size(); i++) { - // auto &pmb = blocks[i]; - - // auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); - - // auto &tl = async_region0[i]; - - // auto transport_particles = - // tl.AddTask(none, TransportParticles, pmb.get(), &integrator, t0); - - // auto send = tl.AddTask(transport_particles, &SwarmContainer::Send, sc.get(), - // BoundaryCommSubset::all); - // auto receive = - // tl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); - //} - - // TaskRegion &sync_region0 = tc.AddRegion(1); - //{ - // auto &tl = sync_region0[0]; - // auto stop_comm = tl.AddTask(none, StopCommunicationMesh, blocks); - //} - - return tc; -} - TaskCollection ParticleDriver::MakeFinalizationTaskCollection() const { TaskCollection tc; TaskID none(0); diff --git a/example/particles/particles.hpp b/example/particles/particles.hpp index 7c79f4670219..21cef09a6879 100644 --- a/example/particles/particles.hpp +++ b/example/particles/particles.hpp @@ -33,7 +33,6 @@ class ParticleDriver : public EvolutionDriver { ParticleDriver(ParameterInput *pin, ApplicationInput *app_in, Mesh *pm) : EvolutionDriver(pin, app_in, pm), integrator(pin) {} TaskCollection MakeParticlesCreationTaskCollection() const; - TaskCollection MakeParticlesUpdateTaskCollection() const; // TODO(BRR) remove TaskCollection MakeParticlesTransportTaskCollection() const; TaskCollection MakeFinalizationTaskCollection() const; TaskListStatus Step(); diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 6c34bcd5bbee..2fe1f2ea5271 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -325,6 +325,7 @@ void Swarm::LoadBuffers_() { } void Swarm::Send(BoundaryCommSubset phase) { + printf("Swarm::Send gid: %i\n", GetBlockPointer()->gid); auto pmb = GetBlockPointer(); const int nneighbor = pmb->neighbors.size(); auto swarm_d = GetDeviceContext(); @@ -425,6 +426,7 @@ void Swarm::UnloadBuffers_() { } bool Swarm::Receive(BoundaryCommSubset phase) { + // printf("Swarm::Receive gid: %i\n", GetBlockPointer()->gid); auto pmb = GetBlockPointer(); const int nneighbor = pmb->neighbors.size(); @@ -450,6 +452,7 @@ bool Swarm::Receive(BoundaryCommSubset phase) { if (bdvar.flag[nb.bufid] == BoundaryStatus::arrived) { bdvar.flag[nb.bufid] = BoundaryStatus::completed; } else if (bdvar.flag[nb.bufid] == BoundaryStatus::waiting) { + // printf("block %i Waiting on %i\n", pmb->gid, nb.bufid); all_boundaries_received = false; } } From a5c276fea2e186fb40e514b6c61a9c023c4cf91f Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 13 Aug 2024 15:19:28 -0600 Subject: [PATCH 09/21] Still not working... --- example/particles/particles.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/example/particles/particles.cpp b/example/particles/particles.cpp index 33ae20d2b243..613d0f8fcc40 100644 --- a/example/particles/particles.cpp +++ b/example/particles/particles.cpp @@ -526,10 +526,11 @@ TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); auto &tl = reg[i]; - auto pushtmp = tl.AddTask(none, TransportParticles, pmb.get(), t0, 0.); + // If this task is enrolled then the iterative loop will cycle + // auto pushtmp = tl.AddTask(none, TransportParticles, pmb.get(), t0, 0.); - // auto [itl, push] = tl.AddSublist(none, {i, max_transport_iterations}); - auto [itl, push] = tl.AddSublist(pushtmp, {i, max_transport_iterations}); + auto [itl, push] = tl.AddSublist(none, {i, max_transport_iterations}); + // auto [itl, push] = tl.AddSublist(pushtmp, {i, max_transport_iterations}); auto transport = itl.AddTask(none, TransportParticles, pmb.get(), t0, dt); auto reset_comms = itl.AddTask(transport, &SwarmContainer::ResetCommunication, sc.get()); @@ -537,11 +538,8 @@ TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { BoundaryCommSubset::all); auto receive = itl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); - // auto complete = itl.AddTask(TQ::once_per_region | TQ::global_sync | TQ::completion, auto complete = itl.AddTask(TQ::global_sync | TQ::completion, receive, CheckCompletion, pmb.get(), t0 + dt); - - auto pushtmp2 = tl.AddTask(push, TransportParticles, pmb.get(), t0, 0.); } return tc; From bede6c59bbc75a13c844005cb84a6dbf822c27f0 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 13 Aug 2024 20:46:00 -0600 Subject: [PATCH 10/21] New loop seems to work --- example/particles/particles.cpp | 10 +++--- src/interface/swarm_comms.cpp | 59 ++++++++++++++++++++++++--------- 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/example/particles/particles.cpp b/example/particles/particles.cpp index 613d0f8fcc40..92803a3d5dd2 100644 --- a/example/particles/particles.cpp +++ b/example/particles/particles.cpp @@ -506,7 +506,6 @@ TaskStatus CheckCompletion(MeshBlock *pmb, const Real tf) { } TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { - printf("Well I'm here...\n"); using TQ = TaskQualifier; TaskCollection tc; @@ -527,10 +526,10 @@ TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { auto &tl = reg[i]; // If this task is enrolled then the iterative loop will cycle - // auto pushtmp = tl.AddTask(none, TransportParticles, pmb.get(), t0, 0.); + auto pushtmp = tl.AddTask(none, TransportParticles, pmb.get(), t0, 0.); - auto [itl, push] = tl.AddSublist(none, {i, max_transport_iterations}); - // auto [itl, push] = tl.AddSublist(pushtmp, {i, max_transport_iterations}); + // auto [itl, push] = tl.AddSublist(none, {i, max_transport_iterations}); + auto [itl, push] = tl.AddSublist(pushtmp, {i, max_transport_iterations}); auto transport = itl.AddTask(none, TransportParticles, pmb.get(), t0, dt); auto reset_comms = itl.AddTask(transport, &SwarmContainer::ResetCommunication, sc.get()); @@ -555,12 +554,15 @@ TaskListStatus ParticleDriver::Step() { // Create all the particles that will be created during the step status = MakeParticlesCreationTaskCollection().Execute(); + PARTHENON_REQUIRE(status == TaskListStatus::complete, "Task list failed!"); // Transport particles iteratively until all particles reach final time status = MakeParticlesTransportTaskCollection().Execute(); + PARTHENON_REQUIRE(status == TaskListStatus::complete, "Task list failed!"); // Use a more traditional task list for predictable post-MPI evaluations. status = MakeFinalizationTaskCollection().Execute(); + PARTHENON_REQUIRE(status == TaskListStatus::complete, "Task list failed!"); return status; } diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 2fe1f2ea5271..2eeb1026bd89 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -227,20 +227,6 @@ void Swarm::CountParticlesToSend_() { PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, KOKKOS_LAMBDA(const int n) { num_particles_to_send[n] = 0; }); - // int max_indices_size = 0; - // parthenon::par_reduce( - // PARTHENON_AUTO_LABEL, 0, max_active_index, - // KOKKOS_LAMBDA(const int n, int &red) { - // if (swarm_d.IsActive(n)) { - // bool on_current_mesh_block = true; - // swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); - - // if (block_index(n) >= 0) { - // red = Kokkos::atomic_add_fetch(&num_particles_to_send(block_index(n)), 1); - // } - // } - // }, - // Kokkos::Max(max_indices_size)); parthenon::par_for( PARTHENON_AUTO_LABEL, 0, max_active_index, KOKKOS_LAMBDA(const int n) { if (swarm_d.IsActive(n)) { @@ -350,6 +336,7 @@ void Swarm::CountReceivedParticles_() { "Receive buffer is not divisible by particle size!"); neighbor_received_particles_[n] = vbswarm->recv_size[bufid] / vbswarm->particle_size; + printf("nid: %i bufid: %i nrecvd: %i\n", n, bufid, neighbor_received_particles_[n]); total_received_particles_ += neighbor_received_particles_[n]; } else { neighbor_received_particles_[n] = 0; @@ -405,23 +392,63 @@ void Swarm::UnloadBuffers_() { const int particle_size = GetParticleDataSize(); auto swarm_d = GetDeviceContext(); + // TODO(BRR) put neighbor_received_particles_ on device? + // Cumulative array of number of particles received by all previous neighbors + printf("total_received_particles_: %i\n", total_received_particles_); + ParArray1D nrp("nrp_d", NMAX_NEIGHBORS); + auto nrp_h = nrp.GetHostMirror(); + nrp_h(0) = 0; + for (int n = 1; n < NMAX_NEIGHBORS; n++) { + nrp_h(n) = neighbor_received_particles_[n] + nrp_h(n - 1); + if (n < 10) printf("nrp_h(%i) = %i\n", n, nrp_h(n)); + } + nrp.DeepCopy(nrp_h); + pmb->par_for( PARTHENON_AUTO_LABEL, 0, newParticlesContext.GetNewParticlesMaxIndex(), // n is both new particle index and index over buffer values KOKKOS_LAMBDA(const int n) { const int sid = newParticlesContext.GetNewParticleIndex(n); - const int nid = recv_neighbor_index(n); - int bid = recv_buffer_index(n) * particle_size; + // Get neighbor id + int nid = 0; + while (n > nrp(nid) - 1) { + nid++; + } + int bid = (n - nrp(nid)) * particle_size; + printf("n: %i nid: %i bid: %i\n", n, nid, bid); const int nbid = neighbor_buffer_index(nid); + printf("nbid: %i sid: %i\n", nbid, sid); for (int i = 0; i < realPackDim; i++) { + printf("A i: %i\n", i); vreal(i, sid) = bdvar.recv[nbid](bid); bid++; } + printf("bid: %i\n", bid); for (int i = 0; i < intPackDim; i++) { + printf("B i: %i\n", i); vint(i, sid) = static_cast(bdvar.recv[nbid](bid)); bid++; } + printf("n: %i nid: %i\n", n, nid); }); + + // pmb->par_for( + // PARTHENON_AUTO_LABEL, 0, newParticlesContext.GetNewParticlesMaxIndex(), + // // n is both new particle index and index over buffer values + // KOKKOS_LAMBDA(const int n) { + // const int sid = newParticlesContext.GetNewParticleIndex(n); + // const int nid = recv_neighbor_index(n); + // int bid = recv_buffer_index(n) * particle_size; + // const int nbid = neighbor_buffer_index(nid); + // for (int i = 0; i < realPackDim; i++) { + // vreal(i, sid) = bdvar.recv[nbid](bid); + // bid++; + // } + // for (int i = 0; i < intPackDim; i++) { + // vint(i, sid) = static_cast(bdvar.recv[nbid](bid)); + // bid++; + // } + // }); } } From 981cdb8b7d04fe009eb8833879f4552b7c3b8e25 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 13 Aug 2024 20:51:07 -0600 Subject: [PATCH 11/21] Cleaned up some printfs/marked unused code for deletion --- src/interface/swarm_comms.cpp | 42 ++++++++++++++--------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 2eeb1026bd89..a093434609d1 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -346,21 +346,21 @@ void Swarm::CountReceivedParticles_() { void Swarm::UpdateNeighborBufferReceiveIndices_(ParArray1D &neighbor_index, ParArray1D &buffer_index) { - auto pmb = GetBlockPointer(); - auto neighbor_index_h = neighbor_index.GetHostMirror(); - auto buffer_index_h = - buffer_index.GetHostMirror(); // Index of each particle in its received buffer + // auto pmb = GetBlockPointer(); + // auto neighbor_index_h = neighbor_index.GetHostMirror(); + // auto buffer_index_h = + // buffer_index.GetHostMirror(); // Index of each particle in its received buffer - int id = 0; - for (int n = 0; n < pmb->neighbors.size(); n++) { - for (int m = 0; m < neighbor_received_particles_[n]; m++) { - neighbor_index_h(id) = n; - buffer_index_h(id) = m; - id++; - } - } - neighbor_index.DeepCopy(neighbor_index_h); - buffer_index.DeepCopy(buffer_index_h); + // int id = 0; + // for (int n = 0; n < pmb->neighbors.size(); n++) { + // for (int m = 0; m < neighbor_received_particles_[n]; m++) { + // neighbor_index_h(id) = n; + // buffer_index_h(id) = m; + // id++; + // } + //} + // neighbor_index.DeepCopy(neighbor_index_h); + // buffer_index.DeepCopy(buffer_index_h); } void Swarm::UnloadBuffers_() { @@ -373,9 +373,9 @@ void Swarm::UnloadBuffers_() { if (total_received_particles_ > 0) { auto newParticlesContext = AddEmptyParticles(total_received_particles_); - auto &recv_neighbor_index = recv_neighbor_index_; - auto &recv_buffer_index = recv_buffer_index_; - UpdateNeighborBufferReceiveIndices_(recv_neighbor_index, recv_buffer_index); + // auto &recv_neighbor_index = recv_neighbor_index_; + // auto &recv_buffer_index = recv_buffer_index_; + // UpdateNeighborBufferReceiveIndices_(recv_neighbor_index, recv_buffer_index); auto neighbor_buffer_index = neighbor_buffer_index_; auto &int_vector = std::get()>(vectors_); @@ -394,13 +394,11 @@ void Swarm::UnloadBuffers_() { // TODO(BRR) put neighbor_received_particles_ on device? // Cumulative array of number of particles received by all previous neighbors - printf("total_received_particles_: %i\n", total_received_particles_); ParArray1D nrp("nrp_d", NMAX_NEIGHBORS); auto nrp_h = nrp.GetHostMirror(); nrp_h(0) = 0; for (int n = 1; n < NMAX_NEIGHBORS; n++) { nrp_h(n) = neighbor_received_particles_[n] + nrp_h(n - 1); - if (n < 10) printf("nrp_h(%i) = %i\n", n, nrp_h(n)); } nrp.DeepCopy(nrp_h); @@ -415,21 +413,15 @@ void Swarm::UnloadBuffers_() { nid++; } int bid = (n - nrp(nid)) * particle_size; - printf("n: %i nid: %i bid: %i\n", n, nid, bid); const int nbid = neighbor_buffer_index(nid); - printf("nbid: %i sid: %i\n", nbid, sid); for (int i = 0; i < realPackDim; i++) { - printf("A i: %i\n", i); vreal(i, sid) = bdvar.recv[nbid](bid); bid++; } - printf("bid: %i\n", bid); for (int i = 0; i < intPackDim; i++) { - printf("B i: %i\n", i); vint(i, sid) = static_cast(bdvar.recv[nbid](bid)); bid++; } - printf("n: %i nid: %i\n", n, nid); }); // pmb->par_for( From 6ca217c3a4637706adec9342eb996324bb460f2a Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 14 Aug 2024 09:31:18 -0600 Subject: [PATCH 12/21] New algorithm is cycling, time to clean up --- src/interface/swarm.cpp | 14 +++-- src/interface/swarm.hpp | 20 +++----- src/interface/swarm_comms.cpp | 97 ++++++++++++++--------------------- 3 files changed, 52 insertions(+), 79 deletions(-) diff --git a/src/interface/swarm.cpp b/src/interface/swarm.cpp index f29d6b374c6c..ea5c17a637f7 100644 --- a/src/interface/swarm.cpp +++ b/src/interface/swarm.cpp @@ -69,13 +69,11 @@ Swarm::Swarm(const std::string &label, const Metadata &metadata, const int nmax_ empty_indices_("empty_indices_", nmax_pool_), block_index_("block_index_", nmax_pool_), neighbor_indices_("neighbor_indices_", 4, 4, 4), - new_indices_("new_indices_", nmax_pool_), - from_to_indices_("from_to_indices_", nmax_pool_ + 1), - recv_neighbor_index_("recv_neighbor_index_", nmax_pool_), - recv_buffer_index_("recv_buffer_index_", nmax_pool_), - scratch_a_("scratch_a_", nmax_pool_), scratch_b_("scratch_b_", nmax_pool_), + new_indices_("new_indices_", nmax_pool_), scratch_a_("scratch_a_", nmax_pool_), + scratch_b_("scratch_b_", nmax_pool_), num_particles_to_send_("num_particles_to_send_", NMAX_NEIGHBORS), buffer_counters_("buffer_counters_", NMAX_NEIGHBORS), + neighbor_received_particles_("neighbor_received_particles_", NMAX_NEIGHBORS), cell_sorted_("cell_sorted_", nmax_pool_), mpiStatus(true) { PARTHENON_REQUIRE_THROWS(typeid(Coordinates_t) == typeid(UniformCartesian), "SwarmDeviceContext only supports a uniform Cartesian mesh!"); @@ -87,6 +85,9 @@ Swarm::Swarm(const std::string &label, const Metadata &metadata, const int nmax_ Add(swarm_position::y::name(), Metadata({Metadata::Real})); Add(swarm_position::z::name(), Metadata({Metadata::Real})); + // Create associated host arrays + neighbor_received_particles_h = neighbor_received_particles_.GetHostMirror(); + // Initialize index metadata num_active_ = 0; max_active_index_ = inactive_max_active_index; @@ -202,9 +203,6 @@ void Swarm::SetPoolMax(const std::int64_t nmax_pool) { Kokkos::resize(marked_for_removal_, nmax_pool); Kokkos::resize(empty_indices_, nmax_pool); Kokkos::resize(new_indices_, nmax_pool); - Kokkos::resize(from_to_indices_, nmax_pool + 1); - Kokkos::resize(recv_neighbor_index_, nmax_pool); - Kokkos::resize(recv_buffer_index_, nmax_pool); Kokkos::resize(scratch_a_, nmax_pool); Kokkos::resize(scratch_b_, nmax_pool); diff --git a/src/interface/swarm.hpp b/src/interface/swarm.hpp index 2a550d506b7b..db645cdcb22a 100644 --- a/src/interface/swarm.hpp +++ b/src/interface/swarm.hpp @@ -255,8 +255,6 @@ class Swarm { void SetNeighborIndices_(); void CountReceivedParticles_(); - void UpdateNeighborBufferReceiveIndices_(ParArray1D &neighbor_index, - ParArray1D &buffer_index); template SwarmVariablePack PackAllVariables_(PackIndexMap &vmap); @@ -282,15 +280,11 @@ class Swarm { ParArrayND block_index_; // Neighbor index for each particle. -1 for current block. ParArrayND neighbor_indices_; // Indexing of vbvar's neighbor array. -1 for same. // k,j indices unused in 3D&2D, 2D, respectively - ParArray1D new_indices_; // Persistent array that provides the new indices when - // AddEmptyParticles is called. Always defragmented. - int new_indices_max_idx_; // Maximum valid index of new_indices_ array. - ParArray1D from_to_indices_; // Array used for sorting particles during defragment - // step (size nmax_pool + 1). - ParArray1D recv_neighbor_index_; // Neighbor indices for received particles - ParArray1D recv_buffer_index_; // Buffer indices for received particles - ParArray1D scratch_a_; // Scratch memory for index sorting - ParArray1D scratch_b_; // Scratch memory for index sorting + ParArray1D new_indices_; // Persistent array that provides the new indices when + // AddEmptyParticles is called. Always defragmented. + int new_indices_max_idx_; // Maximum valid index of new_indices_ array. + ParArray1D scratch_a_; // Scratch memory for index sorting + ParArray1D scratch_b_; // Scratch memory for index sorting constexpr static int no_block_ = -2; constexpr static int this_block_ = -1; @@ -298,8 +292,8 @@ class Swarm { ParArray1D num_particles_to_send_; ParArray1D buffer_counters_; - - std::vector neighbor_received_particles_; + ParArray1D neighbor_received_particles_; + ParArray1D neighbor_received_particles_h; int total_received_particles_; ParArrayND neighbor_buffer_index_; // Map from neighbor index to neighbor bufid diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index a093434609d1..b6c55bb9834c 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -174,8 +174,6 @@ void Swarm::SetupPersistentMPI() { // Build up convenience array of neighbor indices SetNeighborIndices_(); - neighbor_received_particles_.resize(nbmax); - // Build device array mapping neighbor index to neighbor bufid if (pmb->neighbors.size() > 0) { ParArrayND neighbor_buffer_index("Neighbor buffer index", pmb->neighbors.size()); @@ -197,10 +195,6 @@ void Swarm::CountParticlesToSend_() { // Fence to make sure particles aren't currently being transported locally // TODO(BRR) do this operation on device. pmb->exec_space.fence(); - // auto num_particles_to_send_h = num_particles_to_send_.GetHostMirror(); - // for (int n = 0; n < pmb->neighbors.size(); n++) { - // num_particles_to_send_h(n) = 0; - //} const int particle_size = GetParticleDataSize(); vbswarm->particle_size = particle_size; @@ -334,33 +328,17 @@ void Swarm::CountReceivedParticles_() { if (vbswarm->bd_var_.flag[bufid] == BoundaryStatus::arrived) { PARTHENON_DEBUG_REQUIRE(vbswarm->recv_size[bufid] % vbswarm->particle_size == 0, "Receive buffer is not divisible by particle size!"); - neighbor_received_particles_[n] = + neighbor_received_particles_h(n) = vbswarm->recv_size[bufid] / vbswarm->particle_size; - printf("nid: %i bufid: %i nrecvd: %i\n", n, bufid, neighbor_received_particles_[n]); - total_received_particles_ += neighbor_received_particles_[n]; + total_received_particles_ += neighbor_received_particles_h(n); + printf("n = %i recvd %i!\n", n, neighbor_received_particles_h(n)); } else { - neighbor_received_particles_[n] = 0; + neighbor_received_particles_h(n) = 0; } } -} - -void Swarm::UpdateNeighborBufferReceiveIndices_(ParArray1D &neighbor_index, - ParArray1D &buffer_index) { - // auto pmb = GetBlockPointer(); - // auto neighbor_index_h = neighbor_index.GetHostMirror(); - // auto buffer_index_h = - // buffer_index.GetHostMirror(); // Index of each particle in its received buffer - - // int id = 0; - // for (int n = 0; n < pmb->neighbors.size(); n++) { - // for (int m = 0; m < neighbor_received_particles_[n]; m++) { - // neighbor_index_h(id) = n; - // buffer_index_h(id) = m; - // id++; - // } - //} - // neighbor_index.DeepCopy(neighbor_index_h); - // buffer_index.DeepCopy(buffer_index_h); + for (int n = 0; n < NMAX_NEIGHBORS; n++) { + printf("early nrph(%i) = %i\n", n, neighbor_received_particles_h(n)); + } } void Swarm::UnloadBuffers_() { @@ -373,9 +351,6 @@ void Swarm::UnloadBuffers_() { if (total_received_particles_ > 0) { auto newParticlesContext = AddEmptyParticles(total_received_particles_); - // auto &recv_neighbor_index = recv_neighbor_index_; - // auto &recv_buffer_index = recv_buffer_index_; - // UpdateNeighborBufferReceiveIndices_(recv_neighbor_index, recv_buffer_index); auto neighbor_buffer_index = neighbor_buffer_index_; auto &int_vector = std::get()>(vectors_); @@ -394,13 +369,35 @@ void Swarm::UnloadBuffers_() { // TODO(BRR) put neighbor_received_particles_ on device? // Cumulative array of number of particles received by all previous neighbors - ParArray1D nrp("nrp_d", NMAX_NEIGHBORS); - auto nrp_h = nrp.GetHostMirror(); - nrp_h(0) = 0; - for (int n = 1; n < NMAX_NEIGHBORS; n++) { - nrp_h(n) = neighbor_received_particles_[n] + nrp_h(n - 1); + // ParArray1D nrp("nrp_d", NMAX_NEIGHBORS); + // Change meaning of neighbor_received_particles from particles per neighbor to + // cumulative particles per neighbor + // auto nrp_h = nrp.GetHostMirror(); + int val_prev = 0; + for (int n = 0; n < NMAX_NEIGHBORS; n++) { + printf("nrph(%i) = %i\n", n, neighbor_received_particles_h(n)); + double val_curr = neighbor_received_particles_h(n); + printf("val curr: %i\n", val_curr); + neighbor_received_particles_h(n) += val_prev; + printf("new nrph(%i) = %i\n", n, neighbor_received_particles_h(n)); + val_prev += val_curr; + // printf("nrp[%i]: %i\n", n, neighbor_received_particles_h(n)); + + // double val2 = neighbor_received_particles_h(n); + // neighbor_received_particles_h + + // val = neighbor_received_particles_h(n); + // const Real val = neighbor_received_partices_h(n); + // if (n == 0) { + // neighbor_received_particles_h + //} } - nrp.DeepCopy(nrp_h); + neighbor_received_particles_.DeepCopy(neighbor_received_particles_h); + // nrp_h(0) = 0; + // for (int n = 1; n < NMAX_NEIGHBORS; n++) { + // nrp_h(n) = neighbor_received_particles_[n] + nrp_h(n - 1); + // } + // nrp.DeepCopy(nrp_h); pmb->par_for( PARTHENON_AUTO_LABEL, 0, newParticlesContext.GetNewParticlesMaxIndex(), @@ -409,10 +406,12 @@ void Swarm::UnloadBuffers_() { const int sid = newParticlesContext.GetNewParticleIndex(n); // Get neighbor id int nid = 0; - while (n > nrp(nid) - 1) { + // while (n > nrp(nid) - 1) { + while (n > neighbor_received_particles_(nid) - 1) { nid++; } - int bid = (n - nrp(nid)) * particle_size; + // int bid = (n - nrp(nid)) * particle_size; + int bid = (n - neighbor_received_particles_(nid)) * particle_size; const int nbid = neighbor_buffer_index(nid); for (int i = 0; i < realPackDim; i++) { vreal(i, sid) = bdvar.recv[nbid](bid); @@ -423,24 +422,6 @@ void Swarm::UnloadBuffers_() { bid++; } }); - - // pmb->par_for( - // PARTHENON_AUTO_LABEL, 0, newParticlesContext.GetNewParticlesMaxIndex(), - // // n is both new particle index and index over buffer values - // KOKKOS_LAMBDA(const int n) { - // const int sid = newParticlesContext.GetNewParticleIndex(n); - // const int nid = recv_neighbor_index(n); - // int bid = recv_buffer_index(n) * particle_size; - // const int nbid = neighbor_buffer_index(nid); - // for (int i = 0; i < realPackDim; i++) { - // vreal(i, sid) = bdvar.recv[nbid](bid); - // bid++; - // } - // for (int i = 0; i < intPackDim; i++) { - // vint(i, sid) = static_cast(bdvar.recv[nbid](bid)); - // bid++; - // } - // }); } } From 0fa6d3076ab8b5dc6f91af809d0fb41d44e34d1d Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 14 Aug 2024 10:51:46 -0600 Subject: [PATCH 13/21] Fixed indexing bug... --- example/particles/particles.cpp | 1 + src/interface/swarm_comms.cpp | 75 +++++++++++++++++++-------------- 2 files changed, 45 insertions(+), 31 deletions(-) diff --git a/example/particles/particles.cpp b/example/particles/particles.cpp index 92803a3d5dd2..a5688e9b1bd8 100644 --- a/example/particles/particles.cpp +++ b/example/particles/particles.cpp @@ -469,6 +469,7 @@ TaskStatus TransportParticles(MeshBlock *pmb, const Real t0, const Real dt) { } }); } + printf("Done transporting\n"); return TaskStatus::complete; } diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index b6c55bb9834c..15553ecd9731 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -296,6 +296,7 @@ void Swarm::LoadBuffers_() { bdvar.send[bufid](buffer_index) = static_cast(vint(i, n)); buffer_index++; } + printf("SENDING xyz: %e %e %e\n", x(n), y(n), z(n)); } } }); @@ -331,14 +332,10 @@ void Swarm::CountReceivedParticles_() { neighbor_received_particles_h(n) = vbswarm->recv_size[bufid] / vbswarm->particle_size; total_received_particles_ += neighbor_received_particles_h(n); - printf("n = %i recvd %i!\n", n, neighbor_received_particles_h(n)); } else { neighbor_received_particles_h(n) = 0; } } - for (int n = 0; n < NMAX_NEIGHBORS; n++) { - printf("early nrph(%i) = %i\n", n, neighbor_received_particles_h(n)); - } } void Swarm::UnloadBuffers_() { @@ -347,6 +344,7 @@ void Swarm::UnloadBuffers_() { CountReceivedParticles_(); auto &bdvar = vbswarm->bd_var_; + const int nbmax = vbswarm->bd_var_.nbmax; if (total_received_particles_ > 0) { auto newParticlesContext = AddEmptyParticles(total_received_particles_); @@ -367,37 +365,36 @@ void Swarm::UnloadBuffers_() { const int particle_size = GetParticleDataSize(); auto swarm_d = GetDeviceContext(); - // TODO(BRR) put neighbor_received_particles_ on device? - // Cumulative array of number of particles received by all previous neighbors + // TODO(BRR) DEBUG // ParArray1D nrp("nrp_d", NMAX_NEIGHBORS); + // auto nrp_h = nrp.GetHostMirror(); + // nrp_h(0) = 0; + // for (int n = 1; n < NMAX_NEIGHBORS; n++) { + // // nrp_h(n) = neighbor_received_particles_[n] + nrp_h(n - 1); + // nrp_h(n) = neighbor_received_particles_h(n) + nrp_h(n - 1); + //} + // nrp.DeepCopy(nrp_h); + // Change meaning of neighbor_received_particles from particles per neighbor to // cumulative particles per neighbor - // auto nrp_h = nrp.GetHostMirror(); int val_prev = 0; - for (int n = 0; n < NMAX_NEIGHBORS; n++) { - printf("nrph(%i) = %i\n", n, neighbor_received_particles_h(n)); + for (int n = 0; n < nbmax; n++) { + printf("[%i] recvd: %i\n", n, neighbor_received_particles_h(n)); double val_curr = neighbor_received_particles_h(n); - printf("val curr: %i\n", val_curr); neighbor_received_particles_h(n) += val_prev; - printf("new nrph(%i) = %i\n", n, neighbor_received_particles_h(n)); val_prev += val_curr; - // printf("nrp[%i]: %i\n", n, neighbor_received_particles_h(n)); - - // double val2 = neighbor_received_particles_h(n); - // neighbor_received_particles_h - - // val = neighbor_received_particles_h(n); - // const Real val = neighbor_received_partices_h(n); - // if (n == 0) { - // neighbor_received_particles_h - //} + printf("[%i] nrp: %i\n", n, neighbor_received_particles_h(n)); + // neighbor recv: %i\n", n, nrp_h(n), + // neighbor_received_particles_h(n)); } neighbor_received_particles_.DeepCopy(neighbor_received_particles_h); - // nrp_h(0) = 0; - // for (int n = 1; n < NMAX_NEIGHBORS; n++) { - // nrp_h(n) = neighbor_received_particles_[n] + nrp_h(n - 1); - // } - // nrp.DeepCopy(nrp_h); + + // TODO(BRR) DEBUG + // neighbor_received_particles_.DeepCopy(nrp_h); + + auto &x = Get(swarm_position::x::name()).Get(); + auto &y = Get(swarm_position::y::name()).Get(); + auto &z = Get(swarm_position::z::name()).Get(); pmb->par_for( PARTHENON_AUTO_LABEL, 0, newParticlesContext.GetNewParticlesMaxIndex(), @@ -406,13 +403,28 @@ void Swarm::UnloadBuffers_() { const int sid = newParticlesContext.GetNewParticleIndex(n); // Get neighbor id int nid = 0; - // while (n > nrp(nid) - 1) { - while (n > neighbor_received_particles_(nid) - 1) { - nid++; + + if (n >= neighbor_received_particles_(nbmax - 1)) { + nid = nbmax - 1; + } else { + while (n >= neighbor_received_particles_(nid)) { + nid++; + } } - // int bid = (n - nrp(nid)) * particle_size; - int bid = (n - neighbor_received_particles_(nid)) * particle_size; + + // while (n > neighbor_received_particles_(nid) - 1) { + // nid++; + //} + int bid = nid == 0 + ? n * particle_size + : (n - neighbor_received_particles_(nid - 1)) * particle_size; const int nbid = neighbor_buffer_index(nid); + printf("[n: %i] nid: %i bid: %i (%i - %i) nbid: %i sid: %i\n", n, nid, bid, n, + neighbor_received_particles_(nid), nbid, sid); + if (bid < 0) { + printf("bid %i!\n", bid); + exit(-1); + } for (int i = 0; i < realPackDim; i++) { vreal(i, sid) = bdvar.recv[nbid](bid); bid++; @@ -421,6 +433,7 @@ void Swarm::UnloadBuffers_() { vint(i, sid) = static_cast(bdvar.recv[nbid](bid)); bid++; } + printf("RECEIVING xyz: %e %e %e\n", x(sid), y(sid), z(sid)); }); } } From a37cad21b2fc3742fb6f0e40cca177b3aebc7abd Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 14 Aug 2024 10:55:02 -0600 Subject: [PATCH 14/21] Clean up --- src/interface/swarm_comms.cpp | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 15553ecd9731..96345433c6da 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -365,33 +365,16 @@ void Swarm::UnloadBuffers_() { const int particle_size = GetParticleDataSize(); auto swarm_d = GetDeviceContext(); - // TODO(BRR) DEBUG - // ParArray1D nrp("nrp_d", NMAX_NEIGHBORS); - // auto nrp_h = nrp.GetHostMirror(); - // nrp_h(0) = 0; - // for (int n = 1; n < NMAX_NEIGHBORS; n++) { - // // nrp_h(n) = neighbor_received_particles_[n] + nrp_h(n - 1); - // nrp_h(n) = neighbor_received_particles_h(n) + nrp_h(n - 1); - //} - // nrp.DeepCopy(nrp_h); - // Change meaning of neighbor_received_particles from particles per neighbor to // cumulative particles per neighbor int val_prev = 0; for (int n = 0; n < nbmax; n++) { - printf("[%i] recvd: %i\n", n, neighbor_received_particles_h(n)); double val_curr = neighbor_received_particles_h(n); neighbor_received_particles_h(n) += val_prev; val_prev += val_curr; - printf("[%i] nrp: %i\n", n, neighbor_received_particles_h(n)); - // neighbor recv: %i\n", n, nrp_h(n), - // neighbor_received_particles_h(n)); } neighbor_received_particles_.DeepCopy(neighbor_received_particles_h); - // TODO(BRR) DEBUG - // neighbor_received_particles_.DeepCopy(nrp_h); - auto &x = Get(swarm_position::x::name()).Get(); auto &y = Get(swarm_position::y::name()).Get(); auto &z = Get(swarm_position::z::name()).Get(); @@ -401,9 +384,8 @@ void Swarm::UnloadBuffers_() { // n is both new particle index and index over buffer values KOKKOS_LAMBDA(const int n) { const int sid = newParticlesContext.GetNewParticleIndex(n); - // Get neighbor id + // Search for neighbor id over cumulative indices int nid = 0; - if (n >= neighbor_received_particles_(nbmax - 1)) { nid = nbmax - 1; } else { @@ -412,19 +394,11 @@ void Swarm::UnloadBuffers_() { } } - // while (n > neighbor_received_particles_(nid) - 1) { - // nid++; - //} + // Convert neighbor id to buffer id int bid = nid == 0 ? n * particle_size : (n - neighbor_received_particles_(nid - 1)) * particle_size; const int nbid = neighbor_buffer_index(nid); - printf("[n: %i] nid: %i bid: %i (%i - %i) nbid: %i sid: %i\n", n, nid, bid, n, - neighbor_received_particles_(nid), nbid, sid); - if (bid < 0) { - printf("bid %i!\n", bid); - exit(-1); - } for (int i = 0; i < realPackDim; i++) { vreal(i, sid) = bdvar.recv[nbid](bid); bid++; @@ -433,7 +407,6 @@ void Swarm::UnloadBuffers_() { vint(i, sid) = static_cast(bdvar.recv[nbid](bid)); bid++; } - printf("RECEIVING xyz: %e %e %e\n", x(sid), y(sid), z(sid)); }); } } @@ -465,7 +438,6 @@ bool Swarm::Receive(BoundaryCommSubset phase) { if (bdvar.flag[nb.bufid] == BoundaryStatus::arrived) { bdvar.flag[nb.bufid] = BoundaryStatus::completed; } else if (bdvar.flag[nb.bufid] == BoundaryStatus::waiting) { - // printf("block %i Waiting on %i\n", pmb->gid, nb.bufid); all_boundaries_received = false; } } From 5fe308c41acd84601fe95669d872ba1ec4885e0a Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 14 Aug 2024 11:20:08 -0600 Subject: [PATCH 15/21] finished_transport shouldnt be provided by swarm --- src/interface/swarm.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/interface/swarm.hpp b/src/interface/swarm.hpp index db645cdcb22a..56bc668d0e41 100644 --- a/src/interface/swarm.hpp +++ b/src/interface/swarm.hpp @@ -230,10 +230,6 @@ class Swarm { SwarmVariablePack PackVariables(const std::vector &name, PackIndexMap &vmap); - // Temporarily public - // TODO(BRR) Remove this and require downstream codes make their own boolean? - bool finished_transport; - void LoadBuffers_(); void UnloadBuffers_(); From 8e3ad9ad3abcf3f635a12c43766b559618d61683 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Mon, 19 Aug 2024 15:59:17 -0600 Subject: [PATCH 16/21] Reverting to old manual iterative tasking --- example/particles/particles.cpp | 162 +++++++++++++++++++++++++------- example/particles/particles.hpp | 2 + src/tasks/tasks.hpp | 2 + 3 files changed, 134 insertions(+), 32 deletions(-) diff --git a/example/particles/particles.cpp b/example/particles/particles.cpp index a5688e9b1bd8..fac5c72ca710 100644 --- a/example/particles/particles.cpp +++ b/example/particles/particles.cpp @@ -506,10 +506,67 @@ TaskStatus CheckCompletion(MeshBlock *pmb, const Real tf) { } } +// TaskID GetNIncomplete(TaskID dependency_in, TaskList &tl, AllReduce n_incomplete, +// MeshBlock *pmb, const Real tf) { +// using TQ = TaskQualifier; +// +// auto zero_inc = tl.AddTask( +// TQ::once_per_region | TQ::local_sync, dependency_in, +// [](AllReduce *n_inc) { +// n_inc->val = 0; +// return TaskStatus::complete; +// }, +// n_incomplete); +// auto get_n_inc_loc = tl.AddTask(TQ::local_sync, zero_inc, n_incomplete, pmb, tf) +//} + +// Custom step function to allow for looping over MPI-related tasks until complete +TaskListStatus ParticleDriver::Step() { + TaskListStatus status; + // integrator.dt = tm.dt; // TODO(BRR) remove? require first order time integrator? + + BlockList_t &blocks = pmesh->block_list; + auto num_task_lists_executed_independently = blocks.size(); + + // Create all the particles that will be created during the step + status = MakeParticlesCreationTaskCollection().Execute(); + PARTHENON_REQUIRE(status == TaskListStatus::complete, "Task list failed!"); + + // Transport particles iteratively until all particles reach final time + status = IterativeTransport(); + // status = MakeParticlesTransportTaskCollection().Execute(); + PARTHENON_REQUIRE(status == TaskListStatus::complete, "Task list failed!"); + + // Use a more traditional task list for predictable post-MPI evaluations. + status = MakeFinalizationTaskCollection().Execute(); + PARTHENON_REQUIRE(status == TaskListStatus::complete, "Task list failed!"); + + return status; +} + +TaskCollection ParticleDriver::MakeParticlesCreationTaskCollection() const { + TaskCollection tc; + TaskID none(0); + const Real t0 = tm.time; + const BlockList_t &blocks = pmesh->block_list; + + auto num_task_lists_executed_independently = blocks.size(); + TaskRegion &async_region0 = tc.AddRegion(num_task_lists_executed_independently); + for (int i = 0; i < blocks.size(); i++) { + auto &pmb = blocks[i]; + auto &tl = async_region0[i]; + auto create_some_particles = tl.AddTask(none, CreateSomeParticles, pmb.get(), t0); + } + + return tc; +} + TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { using TQ = TaskQualifier; TaskCollection tc; + + // std::cout << tc; TaskID none(0); BlockList_t &blocks = pmesh->block_list; @@ -520,17 +577,25 @@ TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { auto ® = tc.AddRegion(blocks.size()); + // std::cout << tc; + + AllReduce n_incomplete; + for (int i = 0; i < blocks.size(); i++) { printf("i: %i\n", i); auto &pmb = blocks[i]; auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); auto &tl = reg[i]; + printf("tl: %p\n", &tl); // If this task is enrolled then the iterative loop will cycle - auto pushtmp = tl.AddTask(none, TransportParticles, pmb.get(), t0, 0.); + // Add regular task + auto dummy = tl.AddTask(none, TransportParticles, pmb.get(), t0, 0.); + // Add task sublist + // auto dummy = tl.AddTask(none, []() {}); // auto [itl, push] = tl.AddSublist(none, {i, max_transport_iterations}); - auto [itl, push] = tl.AddSublist(pushtmp, {i, max_transport_iterations}); + auto [itl, push] = tl.AddSublist(dummy, {i, max_transport_iterations}); auto transport = itl.AddTask(none, TransportParticles, pmb.get(), t0, dt); auto reset_comms = itl.AddTask(transport, &SwarmContainer::ResetCommunication, sc.get()); @@ -538,53 +603,86 @@ TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { BoundaryCommSubset::all); auto receive = itl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); - auto complete = itl.AddTask(TQ::global_sync | TQ::completion, receive, - CheckCompletion, pmb.get(), t0 + dt); - } - return tc; -} - -// Custom step function to allow for looping over MPI-related tasks until complete -TaskListStatus ParticleDriver::Step() { - TaskListStatus status; - // integrator.dt = tm.dt; // TODO(BRR) remove? require first order time integrator? - - BlockList_t &blocks = pmesh->block_list; - auto num_task_lists_executed_independently = blocks.size(); + auto get_n_incomplete = + GetNIncomplete(receive, itl, &n_incomplete, pmb.get(), t0 + dt); + itl.AddTask(receive, itl, GetNIncomplete, pmb.get(), t0 + dt, &n_incomplete); + auto check_incomplete = + itl.AddTask(TC::completion, get_n_incomplete, CheckNIncomplete, n_incomplete); - // Create all the particles that will be created during the step - status = MakeParticlesCreationTaskCollection().Execute(); - PARTHENON_REQUIRE(status == TaskListStatus::complete, "Task list failed!"); + // auto complete = itl.AddTask(TQ::global_sync | TQ::completion, receive, + // CheckCompletion, pmb.get(), t0 + dt); + } - // Transport particles iteratively until all particles reach final time - status = MakeParticlesTransportTaskCollection().Execute(); - PARTHENON_REQUIRE(status == TaskListStatus::complete, "Task list failed!"); + std::cout << tc; - // Use a more traditional task list for predictable post-MPI evaluations. - status = MakeFinalizationTaskCollection().Execute(); - PARTHENON_REQUIRE(status == TaskListStatus::complete, "Task list failed!"); + return tc; +} - return status; +TaskStatus CheckCompletion(const BlockList_t &blocks, const Real tf) { + int num_sent_local = 0; + for (auto &block : blocks) { + auto sc = block->meshblock_data.Get()->GetSwarmData(); + auto swarm = sc->Get("my_particles"); + swarm->finished_transport = false; + num_sent_local += swarm->num_particles_sent_; + } + exit(-1); } -TaskCollection ParticleDriver::MakeParticlesCreationTaskCollection() const { +TaskCollection ParticleDriver::IterativeTransportTaskCollection(bool &done) { TaskCollection tc; TaskID none(0); - const Real t0 = tm.time; const BlockList_t &blocks = pmesh->block_list; + const int nblocks = blocks.size(); + const Real t0 = tm.time; + const Real dt = tm.dt; - auto num_task_lists_executed_independently = blocks.size(); - TaskRegion &async_region0 = tc.AddRegion(num_task_lists_executed_independently); - for (int i = 0; i < blocks.size(); i++) { + TaskRegion async_region = tc.AddRegion(nblocks); + for (int i = 0; i < nblocks; i++) { auto &pmb = blocks[i]; - auto &tl = async_region0[i]; - auto create_some_particles = tl.AddTask(none, CreateSomeParticles, pmb.get(), t0); + auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); + auto &tl = async_region[i]; + + auto transport = tl.AddTask(none, TransportParticles, pmb.get(), t0, dt); + auto reset_comms = + tl.AddTask(transport, &SwarmContainer::ResetCommunication, sc.get()); + auto send = + tl.AddTask(reset_comms, &SwarmContainer::Send, sc.get(), BoundaryCommSubset::all); + auto receive = + tl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); + } + + TaskRegion sync_region = tc.AddRegion(nblocks); + { + auto &tl = sync_region[0]; + auto check_completion = tl.AddTask(none, CheckCompletion, blocks, t0 + dt); } return tc; } +// TODO(BRR) to be replaced by iterative tasklist machinery +TaskListStatus ParticleDriver::IterativeTransport() const { + TaskListStatus status; + // const Real t0 = tm.time; + // const Real dt = tm.dt; + + // const BlockList_t &blocks = pmesh->block_list; + // const int nblocks = blocks.size(); + + bool transport_done = false; + int n_transport_iter = 0; + int n_transport_iter_max = 1000; + while (!transport_done) { + status = IterativeTransportTaskCollection(transport_done).Execute(); + + n_transport_iter++; + PARTHENON_REQUIRE(n_transport_iter < n_transport_iter_max, + "Too many transport iterations!"); + } +} + TaskCollection ParticleDriver::MakeFinalizationTaskCollection() const { TaskCollection tc; TaskID none(0); diff --git a/example/particles/particles.hpp b/example/particles/particles.hpp index 21cef09a6879..2724d3d78d12 100644 --- a/example/particles/particles.hpp +++ b/example/particles/particles.hpp @@ -34,6 +34,8 @@ class ParticleDriver : public EvolutionDriver { : EvolutionDriver(pin, app_in, pm), integrator(pin) {} TaskCollection MakeParticlesCreationTaskCollection() const; TaskCollection MakeParticlesTransportTaskCollection() const; + TaskListStatus IterativeTransport() const; + TaskCollection IterativeTransportTaskCollection() const; TaskCollection MakeFinalizationTaskCollection() const; TaskListStatus Step(); diff --git a/src/tasks/tasks.hpp b/src/tasks/tasks.hpp index 91b9e103d6aa..075a5d230058 100644 --- a/src/tasks/tasks.hpp +++ b/src/tasks/tasks.hpp @@ -425,7 +425,9 @@ class TaskList { template std::pair AddSublist(TID &&dep, std::pair minmax_iters) { + printf("AddSublist!\n"); sublists.push_back(std::make_shared(dep, minmax_iters)); + printf(" sublists size: %i\n", sublists.size()); auto &tl = *sublists.back(); tl.SetID(unique_id); return std::make_pair(std::ref(tl), TaskID(tl.last_task)); From 78c79b629695ad9c8686bfade6d53fb7a3684fc9 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Mon, 19 Aug 2024 16:04:43 -0600 Subject: [PATCH 17/21] Still working... --- example/particles/particles.cpp | 117 +++++++++++++++++++------------- example/particles/particles.hpp | 2 +- 2 files changed, 70 insertions(+), 49 deletions(-) diff --git a/example/particles/particles.cpp b/example/particles/particles.cpp index fac5c72ca710..cd0c3dc8490c 100644 --- a/example/particles/particles.cpp +++ b/example/particles/particles.cpp @@ -566,71 +566,92 @@ TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { TaskCollection tc; - // std::cout << tc; - TaskID none(0); - BlockList_t &blocks = pmesh->block_list; + //// std::cout << tc; + // TaskID none(0); + // BlockList_t &blocks = pmesh->block_list; - const int max_transport_iterations = 1000; + // const int max_transport_iterations = 1000; - const Real t0 = tm.time; - const Real dt = tm.dt; + // const Real t0 = tm.time; + // const Real dt = tm.dt; - auto ® = tc.AddRegion(blocks.size()); + // auto ® = tc.AddRegion(blocks.size()); + + //// std::cout << tc; + + // AllReduce n_incomplete; + + // for (int i = 0; i < blocks.size(); i++) { + // printf("i: %i\n", i); + // auto &pmb = blocks[i]; + // auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); + // auto &tl = reg[i]; + // printf("tl: %p\n", &tl); + + // // If this task is enrolled then the iterative loop will cycle + // // Add regular task + // auto dummy = tl.AddTask(none, TransportParticles, pmb.get(), t0, 0.); + + // // Add task sublist + // // auto dummy = tl.AddTask(none, []() {}); + // // auto [itl, push] = tl.AddSublist(none, {i, max_transport_iterations}); + // auto [itl, push] = tl.AddSublist(dummy, {i, max_transport_iterations}); + // auto transport = itl.AddTask(none, TransportParticles, pmb.get(), t0, dt); + // auto reset_comms = + // itl.AddTask(transport, &SwarmContainer::ResetCommunication, sc.get()); + // auto send = itl.AddTask(reset_comms, &SwarmContainer::Send, sc.get(), + // BoundaryCommSubset::all); + // auto receive = + // itl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); + + // auto get_n_incomplete = + // GetNIncomplete(receive, itl, &n_incomplete, pmb.get(), t0 + dt); + // itl.AddTask(receive, itl, GetNIncomplete, pmb.get(), t0 + dt, &n_incomplete); + // auto check_incomplete = + // itl.AddTask(TC::completion, get_n_incomplete, CheckNIncomplete, n_incomplete); + + // // auto complete = itl.AddTask(TQ::global_sync | TQ::completion, receive, + // // CheckCompletion, pmb.get(), t0 + dt); + //} // std::cout << tc; - AllReduce n_incomplete; - - for (int i = 0; i < blocks.size(); i++) { - printf("i: %i\n", i); - auto &pmb = blocks[i]; - auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); - auto &tl = reg[i]; - printf("tl: %p\n", &tl); - - // If this task is enrolled then the iterative loop will cycle - // Add regular task - auto dummy = tl.AddTask(none, TransportParticles, pmb.get(), t0, 0.); - - // Add task sublist - // auto dummy = tl.AddTask(none, []() {}); - // auto [itl, push] = tl.AddSublist(none, {i, max_transport_iterations}); - auto [itl, push] = tl.AddSublist(dummy, {i, max_transport_iterations}); - auto transport = itl.AddTask(none, TransportParticles, pmb.get(), t0, dt); - auto reset_comms = - itl.AddTask(transport, &SwarmContainer::ResetCommunication, sc.get()); - auto send = itl.AddTask(reset_comms, &SwarmContainer::Send, sc.get(), - BoundaryCommSubset::all); - auto receive = - itl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); - - auto get_n_incomplete = - GetNIncomplete(receive, itl, &n_incomplete, pmb.get(), t0 + dt); - itl.AddTask(receive, itl, GetNIncomplete, pmb.get(), t0 + dt, &n_incomplete); - auto check_incomplete = - itl.AddTask(TC::completion, get_n_incomplete, CheckNIncomplete, n_incomplete); - - // auto complete = itl.AddTask(TQ::global_sync | TQ::completion, receive, - // CheckCompletion, pmb.get(), t0 + dt); - } - - std::cout << tc; - return tc; } TaskStatus CheckCompletion(const BlockList_t &blocks, const Real tf) { - int num_sent_local = 0; + int num_unfinished_local = 0; for (auto &block : blocks) { auto sc = block->meshblock_data.Get()->GetSwarmData(); auto swarm = sc->Get("my_particles"); - swarm->finished_transport = false; - num_sent_local += swarm->num_particles_sent_; + // swarm->finished_transport = false; + // num_sent_local += swarm->num_particles_sent_; + int max_active_index = swarm->GetMaxActiveIndex(); + + auto &t = swarm->Get("t").Get(); + + auto swarm_d = swarm->GetDeviceContext(); + + printf("nsl: %i\n", num_unfinished_local); + int num_unfinished_block = 0; + parthenon::par_reduce( + PARTHENON_AUTO_LABEL, 0, max_active_index, + KOKKOS_LAMBDA(const int n, int &num_unfinished) { + if (swarm_d.IsActive(n)) { + if (t(n) < tf) { + num_unfinished++; + } + } + }, + Kokkos::Sum(num_unfinished_block)); + num_unfinished_local += num_unfinished_block; + printf("nul: %i\n", num_unfinished_local); } exit(-1); + return TaskStatus::complete; } -TaskCollection ParticleDriver::IterativeTransportTaskCollection(bool &done) { +TaskCollection ParticleDriver::IterativeTransportTaskCollection(bool &done) const { TaskCollection tc; TaskID none(0); const BlockList_t &blocks = pmesh->block_list; diff --git a/example/particles/particles.hpp b/example/particles/particles.hpp index 2724d3d78d12..b4f4c4060ce4 100644 --- a/example/particles/particles.hpp +++ b/example/particles/particles.hpp @@ -35,7 +35,7 @@ class ParticleDriver : public EvolutionDriver { TaskCollection MakeParticlesCreationTaskCollection() const; TaskCollection MakeParticlesTransportTaskCollection() const; TaskListStatus IterativeTransport() const; - TaskCollection IterativeTransportTaskCollection() const; + TaskCollection IterativeTransportTaskCollection(bool &) const; TaskCollection MakeFinalizationTaskCollection() const; TaskListStatus Step(); From 867b46592127fa705c2484d49cf7b7d50fc476ef Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 20 Aug 2024 09:44:37 -0600 Subject: [PATCH 18/21] Starting to make progress... --- example/particles/particles.cpp | 44 +++++++++++++++++++++++++++------ example/particles/particles.hpp | 2 +- src/tasks/tasks.hpp | 2 ++ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/example/particles/particles.cpp b/example/particles/particles.cpp index cd0c3dc8490c..6b3c04ec1d83 100644 --- a/example/particles/particles.cpp +++ b/example/particles/particles.cpp @@ -230,6 +230,7 @@ TaskStatus DepositParticles(MeshBlock *pmb) { } TaskStatus CreateSomeParticles(MeshBlock *pmb, const Real t0) { + printf("%s:%i\n", __FILE__, __LINE__); PARTHENON_INSTRUMENT auto pkg = pmb->packages.Get("particles_package"); @@ -336,11 +337,13 @@ TaskStatus CreateSomeParticles(MeshBlock *pmb, const Real t0) { rng_pool.free_state(rng_gen); }); } + printf("%s:%i\n", __FILE__, __LINE__); return TaskStatus::complete; } TaskStatus TransportParticles(MeshBlock *pmb, const Real t0, const Real dt) { + printf("%s:%i\n", __FILE__, __LINE__); PARTHENON_INSTRUMENT if (dt < 1.e-10) { printf("dt: %e\n", dt); @@ -619,7 +622,7 @@ TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { return tc; } -TaskStatus CheckCompletion(const BlockList_t &blocks, const Real tf) { +TaskStatus CountNumSent(const BlockList_t &blocks, const Real tf, bool *done) { int num_unfinished_local = 0; for (auto &block : blocks) { auto sc = block->meshblock_data.Get()->GetSwarmData(); @@ -647,11 +650,27 @@ TaskStatus CheckCompletion(const BlockList_t &blocks, const Real tf) { num_unfinished_local += num_unfinished_block; printf("nul: %i\n", num_unfinished_local); } - exit(-1); + + int num_unfinished_global = num_unfinished_local; + printf("num_unfinished_global = %i\n", num_unfinished_global); +#ifdef MPI_PARALLEL + MPI_Allreduce(&num_unfinished_local, &num_unfinished_global, 1, MPI_INT, MPI_SUM, + MPI_COMM_WORLD); +#endif // MPI_PARALLEL + + if (num_unfinished_global > 0) { + printf("not done\n"); + *done = false; + } else { + printf("done\n"); + *done = true; + } + return TaskStatus::complete; } -TaskCollection ParticleDriver::IterativeTransportTaskCollection(bool &done) const { +TaskCollection ParticleDriver::IterativeTransportTaskCollection( + bool *done) const { // bool &done) const { TaskCollection tc; TaskID none(0); const BlockList_t &blocks = pmesh->block_list; @@ -659,7 +678,7 @@ TaskCollection ParticleDriver::IterativeTransportTaskCollection(bool &done) cons const Real t0 = tm.time; const Real dt = tm.dt; - TaskRegion async_region = tc.AddRegion(nblocks); + TaskRegion &async_region = tc.AddRegion(nblocks); for (int i = 0; i < nblocks; i++) { auto &pmb = blocks[i]; auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); @@ -673,12 +692,14 @@ TaskCollection ParticleDriver::IterativeTransportTaskCollection(bool &done) cons auto receive = tl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); } + printf("%s:%i\n", __FILE__, __LINE__); - TaskRegion sync_region = tc.AddRegion(nblocks); + TaskRegion &sync_region = tc.AddRegion(1); { auto &tl = sync_region[0]; - auto check_completion = tl.AddTask(none, CheckCompletion, blocks, t0 + dt); + auto check_completion = tl.AddTask(none, CountNumSent, blocks, t0 + dt, done); } + printf("%s:%i\n", __FILE__, __LINE__); return tc; } @@ -694,14 +715,21 @@ TaskListStatus ParticleDriver::IterativeTransport() const { bool transport_done = false; int n_transport_iter = 0; - int n_transport_iter_max = 1000; + int n_transport_iter_max = 10; + printf("%s:%i\n", __FILE__, __LINE__); while (!transport_done) { - status = IterativeTransportTaskCollection(transport_done).Execute(); + printf("%s:%i\n", __FILE__, __LINE__); + status = IterativeTransportTaskCollection(&transport_done) + .Execute(); // transport_done).Execute(); + printf("done? %i\n", static_cast(transport_done)); + printf("%s:%i\n", __FILE__, __LINE__); n_transport_iter++; PARTHENON_REQUIRE(n_transport_iter < n_transport_iter_max, "Too many transport iterations!"); } + + return status; } TaskCollection ParticleDriver::MakeFinalizationTaskCollection() const { diff --git a/example/particles/particles.hpp b/example/particles/particles.hpp index b4f4c4060ce4..e2df9400ed1d 100644 --- a/example/particles/particles.hpp +++ b/example/particles/particles.hpp @@ -35,7 +35,7 @@ class ParticleDriver : public EvolutionDriver { TaskCollection MakeParticlesCreationTaskCollection() const; TaskCollection MakeParticlesTransportTaskCollection() const; TaskListStatus IterativeTransport() const; - TaskCollection IterativeTransportTaskCollection(bool &) const; + TaskCollection IterativeTransportTaskCollection(bool *done) const; // bool &) const; TaskCollection MakeFinalizationTaskCollection() const; TaskListStatus Step(); diff --git a/src/tasks/tasks.hpp b/src/tasks/tasks.hpp index 075a5d230058..47589b142215 100644 --- a/src/tasks/tasks.hpp +++ b/src/tasks/tasks.hpp @@ -546,6 +546,8 @@ class TaskRegion { public: TaskRegion() = delete; + TaskRegion(const TaskRegion&) = delete; // Prevent copying TaskRegions during AddRegion + // calls which is a segfault explicit TaskRegion(const int num_lists) : task_lists(num_lists) { for (int i = 0; i < num_lists; i++) task_lists[i].SetID(i); From b0ed97312af25c132cfe549538c0a440c1a9c7ed Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 20 Aug 2024 09:49:56 -0600 Subject: [PATCH 19/21] Cleaned up --- example/particles/particles.cpp | 206 +++++++++++--------------------- example/particles/particles.hpp | 2 +- src/interface/swarm_comms.cpp | 3 - 3 files changed, 69 insertions(+), 142 deletions(-) diff --git a/example/particles/particles.cpp b/example/particles/particles.cpp index 6b3c04ec1d83..217881dadf34 100644 --- a/example/particles/particles.cpp +++ b/example/particles/particles.cpp @@ -230,7 +230,6 @@ TaskStatus DepositParticles(MeshBlock *pmb) { } TaskStatus CreateSomeParticles(MeshBlock *pmb, const Real t0) { - printf("%s:%i\n", __FILE__, __LINE__); PARTHENON_INSTRUMENT auto pkg = pmb->packages.Get("particles_package"); @@ -337,19 +336,12 @@ TaskStatus CreateSomeParticles(MeshBlock *pmb, const Real t0) { rng_pool.free_state(rng_gen); }); } - printf("%s:%i\n", __FILE__, __LINE__); return TaskStatus::complete; } TaskStatus TransportParticles(MeshBlock *pmb, const Real t0, const Real dt) { - printf("%s:%i\n", __FILE__, __LINE__); PARTHENON_INSTRUMENT - if (dt < 1.e-10) { - printf("dt: %e\n", dt); - } - - printf("TransportParticles gid: %i\n", pmb->gid); auto swarm = pmb->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); auto pkg = pmb->packages.Get("particles_package"); @@ -381,8 +373,6 @@ TaskStatus TransportParticles(MeshBlock *pmb, const Real t0, const Real dt) { auto swarm_d = swarm->GetDeviceContext(); - printf("HERE! orbit? %i\n", orbiting_particles); - // Simple particle push: push particles half the smallest zone width until they have // traveled one integrator timestep's worth of time. Particles orbit the origin. if (orbiting_particles) { @@ -472,57 +462,10 @@ TaskStatus TransportParticles(MeshBlock *pmb, const Real t0, const Real dt) { } }); } - printf("Done transporting\n"); return TaskStatus::complete; } -TaskStatus CheckCompletion(MeshBlock *pmb, const Real tf) { - printf("CheckCompletion gid: %i\n", pmb->gid); - auto swarm = pmb->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); - - int max_active_index = swarm->GetMaxActiveIndex(); - - auto &t = swarm->Get("t").Get(); - - auto swarm_d = swarm->GetDeviceContext(); - - int num_unfinished = 0; - parthenon::par_reduce( - PARTHENON_AUTO_LABEL, 0, max_active_index, - KOKKOS_LAMBDA(const int n, int &num_unfinished) { - if (swarm_d.IsActive(n)) { - if (t(n) < tf) { - num_unfinished++; - } - } - }, - Kokkos::Sum(num_unfinished)); - printf("num_unfinished: %i\n", num_unfinished); - - if (num_unfinished > 0) { - printf("ITERATE! %i\n", pmb->gid); - return TaskStatus::iterate; - } else { - printf("COMPLETE! %i\n", pmb->gid); - return TaskStatus::complete; - } -} - -// TaskID GetNIncomplete(TaskID dependency_in, TaskList &tl, AllReduce n_incomplete, -// MeshBlock *pmb, const Real tf) { -// using TQ = TaskQualifier; -// -// auto zero_inc = tl.AddTask( -// TQ::once_per_region | TQ::local_sync, dependency_in, -// [](AllReduce *n_inc) { -// n_inc->val = 0; -// return TaskStatus::complete; -// }, -// n_incomplete); -// auto get_n_inc_loc = tl.AddTask(TQ::local_sync, zero_inc, n_incomplete, pmb, tf) -//} - // Custom step function to allow for looping over MPI-related tasks until complete TaskListStatus ParticleDriver::Step() { TaskListStatus status; @@ -564,78 +507,83 @@ TaskCollection ParticleDriver::MakeParticlesCreationTaskCollection() const { return tc; } -TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { - using TQ = TaskQualifier; - - TaskCollection tc; - - //// std::cout << tc; - // TaskID none(0); - // BlockList_t &blocks = pmesh->block_list; - - // const int max_transport_iterations = 1000; - - // const Real t0 = tm.time; - // const Real dt = tm.dt; - - // auto ® = tc.AddRegion(blocks.size()); - - //// std::cout << tc; - - // AllReduce n_incomplete; - - // for (int i = 0; i < blocks.size(); i++) { - // printf("i: %i\n", i); - // auto &pmb = blocks[i]; - // auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); - // auto &tl = reg[i]; - // printf("tl: %p\n", &tl); - - // // If this task is enrolled then the iterative loop will cycle - // // Add regular task - // auto dummy = tl.AddTask(none, TransportParticles, pmb.get(), t0, 0.); - - // // Add task sublist - // // auto dummy = tl.AddTask(none, []() {}); - // // auto [itl, push] = tl.AddSublist(none, {i, max_transport_iterations}); - // auto [itl, push] = tl.AddSublist(dummy, {i, max_transport_iterations}); - // auto transport = itl.AddTask(none, TransportParticles, pmb.get(), t0, dt); - // auto reset_comms = - // itl.AddTask(transport, &SwarmContainer::ResetCommunication, sc.get()); - // auto send = itl.AddTask(reset_comms, &SwarmContainer::Send, sc.get(), - // BoundaryCommSubset::all); - // auto receive = - // itl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); - - // auto get_n_incomplete = - // GetNIncomplete(receive, itl, &n_incomplete, pmb.get(), t0 + dt); - // itl.AddTask(receive, itl, GetNIncomplete, pmb.get(), t0 + dt, &n_incomplete); - // auto check_incomplete = - // itl.AddTask(TC::completion, get_n_incomplete, CheckNIncomplete, n_incomplete); - - // // auto complete = itl.AddTask(TQ::global_sync | TQ::completion, receive, - // // CheckCompletion, pmb.get(), t0 + dt); - //} - - // std::cout << tc; - - return tc; -} +// TODO(BRR) To be used in the future, currently there appears to be a bug in iterative +// tasking +// TaskStatus CheckCompletion(MeshBlock *pmb, const Real tf) { +// auto swarm = pmb->meshblock_data.Get()->GetSwarmData()->Get("my_particles"); +// +// int max_active_index = swarm->GetMaxActiveIndex(); +// +// auto &t = swarm->Get("t").Get(); +// +// auto swarm_d = swarm->GetDeviceContext(); +// +// int num_unfinished = 0; +// parthenon::par_reduce( +// PARTHENON_AUTO_LABEL, 0, max_active_index, +// KOKKOS_LAMBDA(const int n, int &num_unfinished) { +// if (swarm_d.IsActive(n)) { +// if (t(n) < tf) { +// num_unfinished++; +// } +// } +// }, +// Kokkos::Sum(num_unfinished)); +// +// if (num_unfinished > 0) { +// return TaskStatus::iterate; +// } else { +// return TaskStatus::complete; +// } +//} +// TaskCollection ParticleDriver::MakeParticlesTransportTaskCollection() const { +// using TQ = TaskQualifier; +// +// TaskCollection tc; +// +// TaskID none(0); +// BlockList_t &blocks = pmesh->block_list; +// +// const int max_transport_iterations = 1000; +// +// const Real t0 = tm.time; +// const Real dt = tm.dt; +// +// auto ® = tc.AddRegion(blocks.size()); +// +// for (int i = 0; i < blocks.size(); i++) { +// auto &pmb = blocks[i]; +// auto &sc = pmb->meshblock_data.Get()->GetSwarmData(); +// auto &tl = reg[i]; +// +// // Add task sublist +// auto [itl, push] = tl.AddSublist(none, {i, max_transport_iterations}); +// auto transport = itl.AddTask(none, TransportParticles, pmb.get(), t0, dt); +// auto reset_comms = +// itl.AddTask(transport, &SwarmContainer::ResetCommunication, sc.get()); +// auto send = itl.AddTask(reset_comms, &SwarmContainer::Send, sc.get(), +// BoundaryCommSubset::all); +// auto receive = +// itl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); +// +// auto complete = itl.AddTask(TQ::global_sync | TQ::completion, receive, +// CheckCompletion, pmb.get(), t0 + dt); +// } +// +// return tc; +//} TaskStatus CountNumSent(const BlockList_t &blocks, const Real tf, bool *done) { int num_unfinished_local = 0; for (auto &block : blocks) { auto sc = block->meshblock_data.Get()->GetSwarmData(); auto swarm = sc->Get("my_particles"); - // swarm->finished_transport = false; - // num_sent_local += swarm->num_particles_sent_; int max_active_index = swarm->GetMaxActiveIndex(); auto &t = swarm->Get("t").Get(); auto swarm_d = swarm->GetDeviceContext(); - printf("nsl: %i\n", num_unfinished_local); int num_unfinished_block = 0; parthenon::par_reduce( PARTHENON_AUTO_LABEL, 0, max_active_index, @@ -648,29 +596,24 @@ TaskStatus CountNumSent(const BlockList_t &blocks, const Real tf, bool *done) { }, Kokkos::Sum(num_unfinished_block)); num_unfinished_local += num_unfinished_block; - printf("nul: %i\n", num_unfinished_local); } int num_unfinished_global = num_unfinished_local; - printf("num_unfinished_global = %i\n", num_unfinished_global); #ifdef MPI_PARALLEL MPI_Allreduce(&num_unfinished_local, &num_unfinished_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); #endif // MPI_PARALLEL if (num_unfinished_global > 0) { - printf("not done\n"); *done = false; } else { - printf("done\n"); *done = true; } return TaskStatus::complete; } -TaskCollection ParticleDriver::IterativeTransportTaskCollection( - bool *done) const { // bool &done) const { +TaskCollection ParticleDriver::IterativeTransportTaskCollection(bool *done) const { TaskCollection tc; TaskID none(0); const BlockList_t &blocks = pmesh->block_list; @@ -692,14 +635,12 @@ TaskCollection ParticleDriver::IterativeTransportTaskCollection( auto receive = tl.AddTask(send, &SwarmContainer::Receive, sc.get(), BoundaryCommSubset::all); } - printf("%s:%i\n", __FILE__, __LINE__); TaskRegion &sync_region = tc.AddRegion(1); { auto &tl = sync_region[0]; auto check_completion = tl.AddTask(none, CountNumSent, blocks, t0 + dt, done); } - printf("%s:%i\n", __FILE__, __LINE__); return tc; } @@ -707,22 +648,11 @@ TaskCollection ParticleDriver::IterativeTransportTaskCollection( // TODO(BRR) to be replaced by iterative tasklist machinery TaskListStatus ParticleDriver::IterativeTransport() const { TaskListStatus status; - // const Real t0 = tm.time; - // const Real dt = tm.dt; - - // const BlockList_t &blocks = pmesh->block_list; - // const int nblocks = blocks.size(); - bool transport_done = false; int n_transport_iter = 0; - int n_transport_iter_max = 10; - printf("%s:%i\n", __FILE__, __LINE__); + int n_transport_iter_max = 1000; while (!transport_done) { - printf("%s:%i\n", __FILE__, __LINE__); - status = IterativeTransportTaskCollection(&transport_done) - .Execute(); // transport_done).Execute(); - printf("done? %i\n", static_cast(transport_done)); - printf("%s:%i\n", __FILE__, __LINE__); + status = IterativeTransportTaskCollection(&transport_done).Execute(); n_transport_iter++; PARTHENON_REQUIRE(n_transport_iter < n_transport_iter_max, diff --git a/example/particles/particles.hpp b/example/particles/particles.hpp index e2df9400ed1d..705572675247 100644 --- a/example/particles/particles.hpp +++ b/example/particles/particles.hpp @@ -35,7 +35,7 @@ class ParticleDriver : public EvolutionDriver { TaskCollection MakeParticlesCreationTaskCollection() const; TaskCollection MakeParticlesTransportTaskCollection() const; TaskListStatus IterativeTransport() const; - TaskCollection IterativeTransportTaskCollection(bool *done) const; // bool &) const; + TaskCollection IterativeTransportTaskCollection(bool *done) const; TaskCollection MakeFinalizationTaskCollection() const; TaskListStatus Step(); diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 96345433c6da..01a505b56f16 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -296,7 +296,6 @@ void Swarm::LoadBuffers_() { bdvar.send[bufid](buffer_index) = static_cast(vint(i, n)); buffer_index++; } - printf("SENDING xyz: %e %e %e\n", x(n), y(n), z(n)); } } }); @@ -306,7 +305,6 @@ void Swarm::LoadBuffers_() { } void Swarm::Send(BoundaryCommSubset phase) { - printf("Swarm::Send gid: %i\n", GetBlockPointer()->gid); auto pmb = GetBlockPointer(); const int nneighbor = pmb->neighbors.size(); auto swarm_d = GetDeviceContext(); @@ -412,7 +410,6 @@ void Swarm::UnloadBuffers_() { } bool Swarm::Receive(BoundaryCommSubset phase) { - // printf("Swarm::Receive gid: %i\n", GetBlockPointer()->gid); auto pmb = GetBlockPointer(); const int nneighbor = pmb->neighbors.size(); From c2ef49dbd1aa48f9b341803ac7971099f5dbd82a Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 20 Aug 2024 09:54:25 -0600 Subject: [PATCH 20/21] format --- src/tasks/tasks.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/tasks/tasks.hpp b/src/tasks/tasks.hpp index 47589b142215..d432f0a7ea21 100644 --- a/src/tasks/tasks.hpp +++ b/src/tasks/tasks.hpp @@ -425,9 +425,7 @@ class TaskList { template std::pair AddSublist(TID &&dep, std::pair minmax_iters) { - printf("AddSublist!\n"); sublists.push_back(std::make_shared(dep, minmax_iters)); - printf(" sublists size: %i\n", sublists.size()); auto &tl = *sublists.back(); tl.SetID(unique_id); return std::make_pair(std::ref(tl), TaskID(tl.last_task)); @@ -546,8 +544,8 @@ class TaskRegion { public: TaskRegion() = delete; - TaskRegion(const TaskRegion&) = delete; // Prevent copying TaskRegions during AddRegion - // calls which is a segfault + TaskRegion(const TaskRegion &) = delete; // Prevent copying TaskRegions during AddRegion + // calls which is a segfault explicit TaskRegion(const int num_lists) : task_lists(num_lists) { for (int i = 0; i < num_lists; i++) task_lists[i].SetID(i); From 012ee54525ace83381bcd3ff279078ae1dafeb4c Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 20 Aug 2024 09:59:31 -0600 Subject: [PATCH 21/21] A few leftover print statements --- example/particles/particles.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/example/particles/particles.cpp b/example/particles/particles.cpp index 217881dadf34..0797d8b7eb8e 100644 --- a/example/particles/particles.cpp +++ b/example/particles/particles.cpp @@ -444,9 +444,6 @@ TaskStatus TransportParticles(MeshBlock *pmb, const Real t0, const Real dt) { y(n) += v(1, n) * dt_push; z(n) += v(2, n) * dt_push; t(n) += dt_push; - // if (n < 3) { - // printf("[%i] t: %e xyz: %e %e %e\n", n, t(n), x(n), y(n), z(n)); - //} bool on_current_mesh_block = true; // This call is required to trigger internal boundary condition machinery @@ -469,7 +466,9 @@ TaskStatus TransportParticles(MeshBlock *pmb, const Real t0, const Real dt) { // Custom step function to allow for looping over MPI-related tasks until complete TaskListStatus ParticleDriver::Step() { TaskListStatus status; - // integrator.dt = tm.dt; // TODO(BRR) remove? require first order time integrator? + + PARTHENON_REQUIRE(integrator.nstages == 1, + "Only first order time integration supported!"); BlockList_t &blocks = pmesh->block_list; auto num_task_lists_executed_independently = blocks.size();