Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor how we interface with performance instrumentation #969

Merged
merged 19 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Current develop

### Added (new features/APIs/variables/...)
- [[PR 969]](https://github.com/parthenon-hpc-lab/parthenon/pull/969) New macro-based auto-naming of profiling regions and kernels
- [[PR 981]](https://github.com/parthenon-hpc-lab/parthenon/pull/981) Add IndexSplit
- [[PR 983]](https://github.com/parthenon-hpc-lab/parthenon/pull/983) Add Contains to SparsePack
- [[PR 968]](https://github.com/parthenon-hpc-lab/parthenon/pull/968) Add per package registration of boundary conditions
Expand Down
12 changes: 5 additions & 7 deletions benchmarks/burgers/burgers_package.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ void CalculateDerived(MeshData<Real> *md) {
size_t scratch_size = 0;
constexpr int scratch_level = 0;
parthenon::par_for_outer(
DEFAULT_OUTER_LOOP_PATTERN, "CalculateDerived", DevExecSpace(), scratch_size,
DEFAULT_OUTER_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), scratch_size,
scratch_level, 0, nblocks - 1, kb.s, kb.e, jb.s, jb.e,
KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k, const int j) {
Real *out = &v(b, 0, k, j, 0);
Expand All @@ -169,7 +169,7 @@ void CalculateDerived(MeshData<Real> *md) {

// provide the routine that estimates a stable timestep for this package
Real EstimateTimestepMesh(MeshData<Real> *md) {
Kokkos::Profiling::pushRegion("Task_burgers_EstimateTimestepMesh");
PARTHENON_INSTRUMENT
Mesh *pm = md->GetMeshPointer();
IndexRange ib = md->GetBoundsI(IndexDomain::interior);
IndexRange jb = md->GetBoundsJ(IndexDomain::interior);
Expand Down Expand Up @@ -197,14 +197,13 @@ Real EstimateTimestepMesh(MeshData<Real> *md) {
},
Kokkos::Min<Real>(min_dt));

Kokkos::Profiling::popRegion(); // Task_burgers_EstimateTimestepMesh
return cfl * min_dt;
}

TaskStatus CalculateFluxes(MeshData<Real> *md) {
using parthenon::ScratchPad1D;
using parthenon::team_mbr_t;
Kokkos::Profiling::pushRegion("Task_burgers_CalculateFluxes");
PARTHENON_INSTRUMENT

auto pm = md->GetParentPointer();
const int ndim = pm->ndim;
Expand Down Expand Up @@ -236,7 +235,7 @@ TaskStatus CalculateFluxes(MeshData<Real> *md) {
size_t scratch_size = 0;
constexpr int scratch_level = 0;
parthenon::par_for_outer(
DEFAULT_OUTER_LOOP_PATTERN, "burgers::reconstruction", DevExecSpace(), scratch_size,
DEFAULT_OUTER_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), scratch_size,
scratch_level, 0, nblocks - 1, kb.s - dk, kb.e + dk, jb.s - dj, jb.e + dj,
KOKKOS_LAMBDA(team_mbr_t member, const int b, const int k, const int j) {
bool xrec = (k >= kb.s && k <= kb.e) && (j >= jb.s && j <= jb.e);
Expand Down Expand Up @@ -307,7 +306,7 @@ TaskStatus CalculateFluxes(MeshData<Real> *md) {
// now we'll solve the Riemann problems to get fluxes
scratch_size = 2 * ScratchPad1D<Real>::shmem_size(ib.e + 1);
parthenon::par_for_outer(
DEFAULT_OUTER_LOOP_PATTERN, "burgers::reconstruction", DevExecSpace(), scratch_size,
DEFAULT_OUTER_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), scratch_size,
scratch_level, 0, nblocks - 1, kb.s, kb.e + dk, jb.s, jb.e + dj,
KOKKOS_LAMBDA(team_mbr_t member, const int b, const int k, const int j) {
bool xflux = (k <= kb.e && j <= jb.e);
Expand Down Expand Up @@ -402,7 +401,6 @@ TaskStatus CalculateFluxes(MeshData<Real> *md) {
}
});

Kokkos::Profiling::popRegion(); // Task_burgers_CalculateFluxes
return TaskStatus::complete;
}

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/burgers/parthenon_app_inputs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) {
const auto num_vars = q.GetDim(4);

pmb->par_for(
"Burgers::ProblemGenerator", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int k, const int j, const int i) {
const Real x = coords.Xc<1>(i);
const Real y = coords.Xc<2>(j);
Expand Down
29 changes: 29 additions & 0 deletions doc/sphinx/src/instrumentation.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
.. _instrumentation:

Performance Instrumentation
===========================

Parthenon provides several macros that make instrumenting your code simple. For now,
these macros instantiate Kokkos profiling regions via calls to
``Kokkos::Profiling::pushRegion`` and ``Kokkos::Profiling::popRegion``, meaning all the
Kokkos profiling tools should work straightforwardly with Parthenon-based applications.

- ``PARTHENON_INSTRUMENT``: Instantiates an object that pushes a profiling region on
construction and pops the region on destruction. The name of the region is
auto-generated and takes the form ``"file_name::line_number::function_name"``. The region
being profiled is controlled by invoking the macro at the appropriate scope.
- ``PARTHENON_INSTRUMENT_REGION(name)``: Same as ``PARTHENON_INSTRUMENT``, but uses the
provided name instead of the auto-generated name.
- ``PARTHENON_INSTRUMENT_REGION_PUSH``: A trivial wrapper around ``pushRegion`` where
the name is auto-generated as above.
- ``PARTHENON_INSTRUMENT_REGION_POP``: A trivial wrapper around ``popRegion``.

In addition to these macros, Parthenon provides the ``PARTHENON_AUTO_LABEL`` macro which
can be used to provide a label to kernels (e.g. through the various ``par_for``
functions). The auto-generated name is the same as was described above.

Though not required, the use of the auto-generated names is highly recommended. In
addition to avoiding possible name collisions, the auto-generated names provide a simple
structure that is amenable to post-processing profiling results to ease analysis. For
example, the ``process_timer.py`` script that ships with Parthenon post-processes the
results of the Kokkos simple kernel timer output to provide a convenient view of the data.
34 changes: 17 additions & 17 deletions example/advection/advection_package.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,7 @@ AmrTag CheckRefinement(MeshBlockData<Real> *rc) {

typename Kokkos::MinMax<Real>::value_type minmax;
pmb->par_reduce(
"advection check refinement", 0, v.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
ib.e,
PARTHENON_AUTO_LABEL, 0, v.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int n, const int k, const int j, const int i,
typename Kokkos::MinMax<Real>::value_type &lminmax) {
lminmax.min_val =
Expand Down Expand Up @@ -287,7 +286,7 @@ void PreFill(MeshBlockData<Real> *rc) {
const int out = imap.get("one_minus_advected").first;
const auto num_vars = rc->Get("advected").data.GetDim(4);
pmb->par_for(
"advection_package::PreFill", 0, num_vars - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
PARTHENON_AUTO_LABEL, 0, num_vars - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) {
v(out + n, k, j, i) = 1.0 - v(in + n, k, j, i);
});
Expand All @@ -311,7 +310,7 @@ void SquareIt(MeshBlockData<Real> *rc) {
const int out = imap.get("one_minus_advected_sq").first;
const auto num_vars = rc->Get("advected").data.GetDim(4);
pmb->par_for(
"advection_package::SquareIt", 0, num_vars - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
PARTHENON_AUTO_LABEL, 0, num_vars - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) {
v(out + n, k, j, i) = v(in + n, k, j, i) * v(in + n, k, j, i);
});
Expand All @@ -328,8 +327,8 @@ void SquareIt(MeshBlockData<Real> *rc) {
if (profile == "smooth_gaussian") {
const auto &advected = rc->Get("advected").data;
pmb->par_for(
"advection_package::SquareIt bval check", 0, num_vars - 1, kb.s, kb.e, jb.s, jb.e,
ib.s, ib.e, KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) {
PARTHENON_AUTO_LABEL, 0, num_vars - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) {
PARTHENON_REQUIRE(advected(n, k, j, i) != 0.0,
"Advected not properly initialized.");
});
Expand Down Expand Up @@ -364,8 +363,8 @@ void PostFill(MeshBlockData<Real> *rc) {
const int out37 = imap.get("one_minus_sqrt_one_minus_advected_sq_37").first;
const auto num_vars = rc->Get("advected").data.GetDim(4);
pmb->par_for(
"advection_package::PostFill", 0, num_vars - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
ib.e, KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) {
PARTHENON_AUTO_LABEL, 0, num_vars - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) {
v(out12 + n, k, j, i) = 1.0 - sqrt(v(in + n, k, j, i));
v(out37 + n, k, j, i) = 1.0 - v(out12 + n, k, j, i);
});
Expand Down Expand Up @@ -398,7 +397,8 @@ Real AdvectionHst(MeshData<Real> *md) {
const bool volume_weighting = std::is_same<T, Kokkos::Sum<Real, HostExecSpace>>::value;

pmb->par_reduce(
"AdvectionHst", 0, advected_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
PARTHENON_AUTO_LABEL, 0, advected_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
ib.e,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lresult) {
const auto &coords = advected_pack.GetCoords(b);
// `join` is a function of the Kokkos::ReducerConecpt that allows to use the same
Expand Down Expand Up @@ -429,7 +429,7 @@ Real EstimateTimestepBlock(MeshBlockData<Real> *rc) {
// this is obviously overkill for this constant velocity problem
Real min_dt;
pmb->par_reduce(
"advection_package::EstimateTimestep", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int k, const int j, const int i, Real &lmin_dt) {
if (vx != 0.0)
lmin_dt = std::min(lmin_dt, coords.Dxc<X1DIR>(k, j, i) / std::abs(vx));
Expand All @@ -449,7 +449,7 @@ Real EstimateTimestepBlock(MeshBlockData<Real> *rc) {
TaskStatus CalculateFluxes(std::shared_ptr<MeshBlockData<Real>> &rc) {
using parthenon::MetadataFlag;

Kokkos::Profiling::pushRegion("Task_Advection_CalculateFluxes");
PARTHENON_INSTRUMENT
auto pmb = rc->GetBlockPointer();

IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior);
Expand All @@ -476,8 +476,8 @@ TaskStatus CalculateFluxes(std::shared_ptr<MeshBlockData<Real>> &rc) {
size_t scratch_size_in_bytes = parthenon::ScratchPad2D<Real>::shmem_size(nvar, nx1);
// get x-fluxes
pmb->par_for_outer(
"x1 flux", 2 * scratch_size_in_bytes, scratch_level, kb.s, kb.e, jb.s, jb.e,
KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int k, const int j) {
PARTHENON_AUTO_LABEL, 2 * scratch_size_in_bytes, scratch_level, kb.s, kb.e, jb.s,
jb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int k, const int j) {
parthenon::ScratchPad2D<Real> ql(member.team_scratch(scratch_level), nvar, nx1);
parthenon::ScratchPad2D<Real> qr(member.team_scratch(scratch_level), nvar, nx1);
// get reconstructed state on faces
Expand Down Expand Up @@ -509,8 +509,8 @@ TaskStatus CalculateFluxes(std::shared_ptr<MeshBlockData<Real>> &rc) {
// get y-fluxes
if (pmb->pmy_mesh->ndim >= 2) {
pmb->par_for_outer(
"x2 flux", 3 * scratch_size_in_bytes, scratch_level, kb.s, kb.e, jb.s, jb.e + 1,
KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int k, const int j) {
PARTHENON_AUTO_LABEL, 3 * scratch_size_in_bytes, scratch_level, kb.s, kb.e, jb.s,
jb.e + 1, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int k, const int j) {
// the overall algorithm/use of scratch pad here is clear inefficient and kept
// just for demonstrating purposes. The key point is that we cannot reuse
// reconstructed arrays for different `j` with `j` being part of the outer
Expand Down Expand Up @@ -552,7 +552,8 @@ TaskStatus CalculateFluxes(std::shared_ptr<MeshBlockData<Real>> &rc) {
// get z-fluxes
if (pmb->pmy_mesh->ndim == 3) {
pmb->par_for_outer(
"x3 flux", 3 * scratch_size_in_bytes, scratch_level, kb.s, kb.e + 1, jb.s, jb.e,
PARTHENON_AUTO_LABEL, 3 * scratch_size_in_bytes, scratch_level, kb.s, kb.e + 1,
jb.s, jb.e,
KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int k, const int j) {
// the overall algorithm/use of scratch pad here is clear inefficient and kept
// just for demonstrating purposes. The key point is that we cannot reuse
Expand Down Expand Up @@ -592,7 +593,6 @@ TaskStatus CalculateFluxes(std::shared_ptr<MeshBlockData<Real>> &rc) {
});
}

Kokkos::Profiling::popRegion(); // Task_Advection_CalculateFluxes
return TaskStatus::complete;
}

Expand Down
5 changes: 2 additions & 3 deletions example/advection/parthenon_app_inputs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) {
if (profile == "block") profile_type = 3;

pmb->par_for(
"Advection::ProblemGenerator", 0, num_vars - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
PARTHENON_AUTO_LABEL, 0, num_vars - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) {
if (profile_type == 0) {
Real x = cos_a2 * (coords.Xc<1>(i) * cos_a3 + coords.Xc<2>(j) * sin_a3) +
Expand All @@ -99,8 +99,7 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) {
// initialize some arbitrary cells in the first block that move in all 6 directions
if (profile_type == 3 && block_id == 0) {
pmb->par_for(
"Advection::ProblemGenerator bvals test", 0, 1,
KOKKOS_LAMBDA(const int /*unused*/) {
PARTHENON_AUTO_LABEL, 0, 1, KOKKOS_LAMBDA(const int /*unused*/) {
q(idx_adv, 4, 4, 4) = 10.0;
q(idx_v, 4, 4, 4) = vx;
q(idx_adv, 4, 6, 4) = 10.0;
Expand Down
2 changes: 1 addition & 1 deletion example/calculate_pi/calculate_pi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ void SetInOrOut(MeshBlockData<Real> *rc) {
// Loop bounds are set to catch the case where the edge is between the
// cell centers of the first/last real cell and the first ghost cell
pmb->par_for(
"SetInOrOut", kb.s, kb.e, jb.s - 1, jb.e + 1, ib.s - 1, ib.e + 1,
PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s - 1, jb.e + 1, ib.s - 1, ib.e + 1,
KOKKOS_LAMBDA(const int k, const int j, const int i) {
Real rsq = std::pow(coords.Xc<1>(i), 2) + std::pow(coords.Xc<2>(j), 2);
if (rsq < radius * radius) {
Expand Down
2 changes: 1 addition & 1 deletion example/kokkos_pi/kokkos_pi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ result_t naiveParFor(int n_block, int n_mesh, int n_iter, double radius) {
auto inOrOut = base->PackVariables({Metadata::Independent});
// iops = 0 fops = 11
par_for(
DEFAULT_LOOP_PATTERN, "par_for in or out", DevExecSpace(), 0,
DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0,
inOrOut.GetDim(4) - 1, nghost, inOrOut.GetDim(3) - nghost - 1, nghost,
inOrOut.GetDim(2) - nghost - 1, nghost, inOrOut.GetDim(1) - nghost - 1,
KOKKOS_LAMBDA(const int l, const int k_grid, const int j_grid,
Expand Down
4 changes: 2 additions & 2 deletions example/particle_leapfrog/particle_leapfrog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) {
// This hardcoded implementation should only used in PGEN and not during runtime
// addition of particles as indices need to be taken into account.
pmb->par_for(
"CreateParticles", 0, num_particles_this_block - 1, KOKKOS_LAMBDA(const int n) {
PARTHENON_AUTO_LABEL, 0, num_particles_this_block - 1, KOKKOS_LAMBDA(const int n) {
const auto &m = ids_this_block(n);

id(n) = m; // global unique id
Expand Down Expand Up @@ -227,7 +227,7 @@ TaskStatus TransportParticles(MeshBlock *pmb, const StagedIntegrator *integrator
const Real ay = 0.0;
const Real az = 0.0;
pmb->par_for(
"Leapfrog", 0, max_active_index, KOKKOS_LAMBDA(const int n) {
PARTHENON_AUTO_LABEL, 0, max_active_index, KOKKOS_LAMBDA(const int n) {
if (swarm_d.IsActive(n)) {
// drift
x(n) += v(0, n) * 0.5 * dt;
Expand Down
16 changes: 8 additions & 8 deletions example/particle_tracers/particle_tracers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ TaskStatus AdvectTracers(MeshBlock *pmb, const StagedIntegrator *integrator) {

auto swarm_d = swarm->GetDeviceContext();
pmb->par_for(
"Tracer advection", 0, max_active_index, KOKKOS_LAMBDA(const int n) {
PARTHENON_AUTO_LABEL, 0, max_active_index, KOKKOS_LAMBDA(const int n) {
if (swarm_d.IsActive(n)) {
x(n) += vx * dt;
y(n) += vy * dt;
Expand Down Expand Up @@ -219,13 +219,13 @@ TaskStatus DepositTracers(MeshBlock *pmb) {
auto &tracer_dep = pmb->meshblock_data.Get()->Get("tracer_deposition").data;
// Reset particle count
pmb->par_for(
"ZeroParticleDep", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int k, const int j, const int i) { tracer_dep(k, j, i) = 0.; });

const int ndim = pmb->pmy_mesh->ndim;

pmb->par_for(
"DepositTracers", 0, swarm->GetMaxActiveIndex(), KOKKOS_LAMBDA(const int n) {
PARTHENON_AUTO_LABEL, 0, swarm->GetMaxActiveIndex(), KOKKOS_LAMBDA(const int n) {
if (swarm_d.IsActive(n)) {
int i = static_cast<int>(std::floor((x(n) - minx_i) / dx_i) + ib.s);
int j = 0;
Expand Down Expand Up @@ -269,7 +269,7 @@ TaskStatus CalculateFluxes(MeshBlockData<Real> *mbd) {

// Spatially first order upwind method
pmb->par_for(
"CalculateFluxesX1", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e + 1,
PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e + 1,
KOKKOS_LAMBDA(const int k, const int j, const int i) {
// X1
if (vx > 0.) {
Expand All @@ -282,7 +282,7 @@ TaskStatus CalculateFluxes(MeshBlockData<Real> *mbd) {
if (ndim > 1) {
auto x2flux = mbd->Get("advected").flux[X2DIR].Get<4>();
pmb->par_for(
"CalculateFluxesX2", kb.s, kb.e, jb.s, jb.e + 1, ib.s, ib.e,
PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e + 1, ib.s, ib.e,
KOKKOS_LAMBDA(const int k, const int j, const int i) {
// X2
if (vy > 0.) {
Expand All @@ -296,7 +296,7 @@ TaskStatus CalculateFluxes(MeshBlockData<Real> *mbd) {
if (ndim > 2) {
auto x3flux = mbd->Get("advected").flux[X3DIR].Get<4>();
pmb->par_for(
"CalculateFluxesX3", kb.s, kb.e + 1, jb.s, jb.e, ib.s, ib.e,
PARTHENON_AUTO_LABEL, kb.s, kb.e + 1, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int k, const int j, const int i) {
// X3
if (vz > 0.) {
Expand Down Expand Up @@ -355,7 +355,7 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) {
const Real kwave = 2. * M_PI / (x_max_mesh - x_min_mesh);

pmb->par_for(
"Init advected profile", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int k, const int j, const int i) {
advected(k, j, i) = advected_mean + advected_amp * sin(kwave * coords.Xc<1>(i));
});
Expand Down Expand Up @@ -387,7 +387,7 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) {
// This hardcoded implementation should only used in PGEN and not during runtime
// addition of particles as indices need to be taken into account.
pmb->par_for(
"CreateParticles", 0, num_tracers_meshblock - 1, KOKKOS_LAMBDA(const int n) {
PARTHENON_AUTO_LABEL, 0, num_tracers_meshblock - 1, KOKKOS_LAMBDA(const int n) {
auto rng_gen = rng_pool.get_state();

// Rejection sample the x position
Expand Down
Loading
Loading