From 71df3bdbce519f104c8645bedd03efc9a1769bf7 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Thu, 4 Jan 2024 21:31:07 -0800 Subject: [PATCH] initialize the distributions on the GPU (#498) * initialize the distributions on the GPU * Fix Thermal: `const` operator * Python Support, Cleaning * Cleanup: Unused, Resize Fix the missing resize before redistribute. Remove the old, unusued, serial initialization logic. * Thermal Distribution: GPU Memory Remove stack memory usage and use GPU memory for pre-computed CDF. * Cosmetic Cleaning * Waterbag: Clean C++ Math Avoid promoting to double in C functions. * Cosmetic Cleaning for Clarity * Fix CUDA Compile On older NVCC * Analysis: Slightly Increase Sampling Tolerance Run on local GPU (RTX A2000). --------- Co-authored-by: Axel Huebl --- docs/source/usage/python.rst | 3 +- examples/alignment/analysis_alignment.py | 2 +- .../apochromatic/analysis_apochromatic.py | 2 +- examples/chicane/analysis_chicane.py | 4 +- examples/compression/analysis_compression.py | 4 +- examples/cyclotron/analysis_cyclotron.py | 2 +- examples/distgen/analysis_gaussian.py | 4 +- examples/distgen/analysis_kurth4d.py | 4 +- examples/distgen/analysis_semigaussian.py | 4 +- examples/expanding_beam/analysis_expanding.py | 2 +- examples/fodo/analysis_fodo.py | 4 +- examples/fodo_chromatic/analysis_fodo_chr.py | 4 +- examples/fodo_rf/analysis_fodo_rf.py | 4 +- examples/iota_lattice/analysis_iotalattice.py | 4 +- .../iota_lattice/analysis_iotalattice_sdep.py | 8 +- examples/kurth/analysis_kurth_periodic.py | 4 +- examples/multipole/analysis_multipole.py | 4 +- .../analysis_quadrupole_softedge.py | 4 +- examples/solenoid/analysis_solenoid.py | 4 +- src/initialization/InitDistribution.H | 103 ++++++ src/initialization/InitDistribution.cpp | 58 ++-- src/particles/ImpactXParticleContainer.H | 20 +- src/particles/ImpactXParticleContainer.cpp | 110 ++++--- src/particles/distribution/Gaussian.H | 9 + src/particles/distribution/KVdist.H | 9 + src/particles/distribution/Kurth4D.H | 9 + src/particles/distribution/Kurth6D.H | 9 + src/particles/distribution/None.H | 9 + src/particles/distribution/Semigaussian.H | 9 + src/particles/distribution/Thermal.H | 307 ++++++++++-------- src/particles/distribution/Triangle.H | 9 + src/particles/distribution/Waterbag.H | 37 ++- src/python/ImpactXParticleContainer.cpp | 2 - 33 files changed, 499 insertions(+), 272 deletions(-) create mode 100644 src/initialization/InitDistribution.H diff --git a/docs/source/usage/python.rst b/docs/source/usage/python.rst index 2729edcbd..96cd5a83a 100644 --- a/docs/source/usage/python.rst +++ b/docs/source/usage/python.rst @@ -237,7 +237,7 @@ Particles This class stores particles, distributed over MPI ranks. - .. py:method:: add_n_particles(lev, x, y, t, px, py, pt, qm, bchchg) + .. py:method:: add_n_particles(x, y, t, px, py, pt, qm, bchchg) Add new particles to the container for fixed s. @@ -245,7 +245,6 @@ Particles been created, meaning after the call to :py:meth:`ImpactX.init_grids` has been made in the ImpactX class. - :param lev: mesh-refinement level :param x: positions in x :param y: positions in y :param t: positions as time-of-flight in c*t diff --git a/examples/alignment/analysis_alignment.py b/examples/alignment/analysis_alignment.py index d84e95fd8..ddd4bd77b 100755 --- a/examples/alignment/analysis_alignment.py +++ b/examples/alignment/analysis_alignment.py @@ -76,7 +76,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = 1.8 * num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.0 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/apochromatic/analysis_apochromatic.py b/examples/apochromatic/analysis_apochromatic.py index 78f98e707..c893b4fd6 100755 --- a/examples/apochromatic/analysis_apochromatic.py +++ b/examples/apochromatic/analysis_apochromatic.py @@ -108,7 +108,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = 15.0 * num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 19.0 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/chicane/analysis_chicane.py b/examples/chicane/analysis_chicane.py index 07ceb5cdd..f09f3fbcc 100755 --- a/examples/chicane/analysis_chicane.py +++ b/examples/chicane/analysis_chicane.py @@ -58,7 +58,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.2 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -85,7 +85,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.2 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/compression/analysis_compression.py b/examples/compression/analysis_compression.py index 775e1a903..f1aa959b1 100755 --- a/examples/compression/analysis_compression.py +++ b/examples/compression/analysis_compression.py @@ -69,7 +69,7 @@ def get_moments(beam): print(f" gamma={initial_gamma_ref:e}") atol = 0.0 # ignored -rtol = 1.8 * num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 1.9 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -99,7 +99,7 @@ def get_moments(beam): atol = 0.0 # ignored -rtol = 1.8 * num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 1.9 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/cyclotron/analysis_cyclotron.py b/examples/cyclotron/analysis_cyclotron.py index 472f8cf74..e90dd76ec 100755 --- a/examples/cyclotron/analysis_cyclotron.py +++ b/examples/cyclotron/analysis_cyclotron.py @@ -86,7 +86,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = 1.8 * num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.0 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/distgen/analysis_gaussian.py b/examples/distgen/analysis_gaussian.py index c2c582b7d..11deb81ff 100755 --- a/examples/distgen/analysis_gaussian.py +++ b/examples/distgen/analysis_gaussian.py @@ -58,7 +58,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.3 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -85,7 +85,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.3 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/distgen/analysis_kurth4d.py b/examples/distgen/analysis_kurth4d.py index 40851889b..c2a7560b4 100755 --- a/examples/distgen/analysis_kurth4d.py +++ b/examples/distgen/analysis_kurth4d.py @@ -58,7 +58,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 1.3 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -85,7 +85,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 1.3 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/distgen/analysis_semigaussian.py b/examples/distgen/analysis_semigaussian.py index c2c582b7d..11deb81ff 100755 --- a/examples/distgen/analysis_semigaussian.py +++ b/examples/distgen/analysis_semigaussian.py @@ -58,7 +58,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.3 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -85,7 +85,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.3 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/expanding_beam/analysis_expanding.py b/examples/expanding_beam/analysis_expanding.py index cdf5ac0cf..48eecad55 100755 --- a/examples/expanding_beam/analysis_expanding.py +++ b/examples/expanding_beam/analysis_expanding.py @@ -58,7 +58,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 1.5 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/fodo/analysis_fodo.py b/examples/fodo/analysis_fodo.py index d4c4fc29c..3b09df05e 100755 --- a/examples/fodo/analysis_fodo.py +++ b/examples/fodo/analysis_fodo.py @@ -59,7 +59,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.2 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -86,7 +86,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.2 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/fodo_chromatic/analysis_fodo_chr.py b/examples/fodo_chromatic/analysis_fodo_chr.py index d4c4fc29c..3b09df05e 100755 --- a/examples/fodo_chromatic/analysis_fodo_chr.py +++ b/examples/fodo_chromatic/analysis_fodo_chr.py @@ -59,7 +59,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.2 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -86,7 +86,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.2 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/fodo_rf/analysis_fodo_rf.py b/examples/fodo_rf/analysis_fodo_rf.py index 32ed5f6ee..0bcd71343 100755 --- a/examples/fodo_rf/analysis_fodo_rf.py +++ b/examples/fodo_rf/analysis_fodo_rf.py @@ -58,7 +58,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.2 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -85,7 +85,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.2 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/iota_lattice/analysis_iotalattice.py b/examples/iota_lattice/analysis_iotalattice.py index f84cfffb3..d7529bd17 100755 --- a/examples/iota_lattice/analysis_iotalattice.py +++ b/examples/iota_lattice/analysis_iotalattice.py @@ -58,7 +58,7 @@ def get_moments(beam): ) atol = 0.0 # a big number -rtol = 1.5 * num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 1.9 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -78,7 +78,7 @@ def get_moments(beam): ) atol = 0.0 # a big number -rtol = 1.8 * num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 1.9 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/iota_lattice/analysis_iotalattice_sdep.py b/examples/iota_lattice/analysis_iotalattice_sdep.py index 13e457798..92998c22f 100755 --- a/examples/iota_lattice/analysis_iotalattice_sdep.py +++ b/examples/iota_lattice/analysis_iotalattice_sdep.py @@ -56,7 +56,7 @@ def read_all_files(file_pattern): print(f" meanH={meanH:e} sigH={sigH:e} meanI={meanI:e} sigI={sigI:e}") atol = 0.0 # a big number -rtol = 1.5 * num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 1.6 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -73,7 +73,7 @@ def read_all_files(file_pattern): print(f" meanH={meanH:e} sigH={sigH:e} meanI={meanI:e} sigI={sigI:e}") atol = 0.0 # a big number -rtol = 1.5 * num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 1.6 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -94,14 +94,14 @@ def read_all_files(file_pattern): Hrms = np.sqrt(sigH**2 + meanH**2) Irms = np.sqrt(sigI**2 + meanI**2) -atol = 4.0e-3 * Hrms +atol = 4.2e-3 * Hrms rtol = 0.0 # large number print() print(f" atol={atol} (ignored: rtol~={rtol})") print(f" dH_max={beam_joined['dH'].max()}") assert np.allclose(beam_joined["dH"], 0.0, rtol=rtol, atol=atol) -atol = 5.5e-3 * Irms +atol = 5.7e-3 * Irms rtol = 0.0 print() print(f" atol={atol} (ignored: rtol~={rtol})") diff --git a/examples/kurth/analysis_kurth_periodic.py b/examples/kurth/analysis_kurth_periodic.py index 88a7b7a5c..6a42288c3 100755 --- a/examples/kurth/analysis_kurth_periodic.py +++ b/examples/kurth/analysis_kurth_periodic.py @@ -58,7 +58,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = 1.5 * num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 1.7 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -85,7 +85,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = 1.5 * num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 1.7 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/multipole/analysis_multipole.py b/examples/multipole/analysis_multipole.py index 4aa2358b6..f3a87f2a3 100755 --- a/examples/multipole/analysis_multipole.py +++ b/examples/multipole/analysis_multipole.py @@ -58,7 +58,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.2 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -85,7 +85,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.5 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/quadrupole_softedge/analysis_quadrupole_softedge.py b/examples/quadrupole_softedge/analysis_quadrupole_softedge.py index c2c582b7d..9c0c60bbd 100755 --- a/examples/quadrupole_softedge/analysis_quadrupole_softedge.py +++ b/examples/quadrupole_softedge/analysis_quadrupole_softedge.py @@ -58,7 +58,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.2 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -85,7 +85,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 2.2 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/examples/solenoid/analysis_solenoid.py b/examples/solenoid/analysis_solenoid.py index 2a686f030..87c0594e3 100755 --- a/examples/solenoid/analysis_solenoid.py +++ b/examples/solenoid/analysis_solenoid.py @@ -58,7 +58,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 1.3 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( @@ -85,7 +85,7 @@ def get_moments(beam): ) atol = 0.0 # ignored -rtol = num_particles**-0.5 # from random sampling of a smooth distribution +rtol = 1.3 * num_particles**-0.5 # from random sampling of a smooth distribution print(f" rtol={rtol} (ignored: atol~={atol})") assert np.allclose( diff --git a/src/initialization/InitDistribution.H b/src/initialization/InitDistribution.H new file mode 100644 index 000000000..a3186b2ad --- /dev/null +++ b/src/initialization/InitDistribution.H @@ -0,0 +1,103 @@ +/* Copyright 2022-2023 The Regents of the University of California, through Lawrence + * Berkeley National Laboratory (subject to receipt of any required + * approvals from the U.S. Dept. of Energy). All rights reserved. + * + * This file is part of ImpactX. + * + * Authors: Axel Huebl, Andrew Myers + * License: BSD-3-Clause-LBNL + */ +#ifndef IMPACTX_INITIALIZATION_INITDISTRIBUTION_H +#define IMPACTX_INITIALIZATION_INITDISTRIBUTION_H + +#include "particles/ImpactXParticleContainer.H" + +#include // for AMREX_RESTRICT +#include + +#include // for std::move + + +namespace impactx::initialization +{ + /** Initialize a single particle's data using the given distribution + * + * Note: we usually would just write a C++ lambda below in ParallelFor. But, due to restrictions + * in NVCC as of 11.5, we cannot write a lambda in a lambda as we also std::visit the element + * types of our lattice elements list. + * error #3206-D: An extended __device__ lambda cannot be defined inside a generic lambda expression("operator()"). + * Thus, we fall back to writing a C++ functor here, instead of nesting two lambdas. + * + * Nvidia bug report: 3458976 + * Minimal demonstrator: https://cuda.godbolt.org/z/39e4q53Ye + * + * @tparam T_Distribution This can be a \see Gaussian, \see Waterbag, \see Kurth6D, \see Thermal etc. + */ + template + struct InitSingleParticleData + { + /** Constructor taking in pointers to particle data + * + * @param distribution the type of distribution function to call + * @param part_x the array to the particle position (x) + * @param part_y the array to the particle position (y) + * @param part_t the array to the particle position (t) + * @param part_px the array to the particle momentum (x) + * @param part_py the array to the particle momentum (y) + * @param part_pt the array to the particle momentum (t) + */ + InitSingleParticleData ( + T_Distribution distribution, + amrex::ParticleReal* AMREX_RESTRICT part_x, + amrex::ParticleReal* AMREX_RESTRICT part_y, + amrex::ParticleReal* AMREX_RESTRICT part_t, + amrex::ParticleReal* AMREX_RESTRICT part_px, + amrex::ParticleReal* AMREX_RESTRICT part_py, + amrex::ParticleReal* AMREX_RESTRICT part_pt + ) + : m_distribution(std::move(distribution)), + m_part_x(part_x), m_part_y(part_y), m_part_t(part_t), + m_part_px(part_px), m_part_py(part_py), m_part_pt(part_pt) + { + } + + InitSingleParticleData () = delete; + InitSingleParticleData (InitSingleParticleData const &) = default; + InitSingleParticleData (InitSingleParticleData &&) = default; + ~InitSingleParticleData () = default; + + /** Initialize the data for a single particle + * + * @param i particle index in the current box + * @param engine a random number engine (with associated state) + */ + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + void + operator() ( + amrex::Long i, + amrex::RandomEngine const & engine + ) const + { + m_distribution( + m_part_x[i], + m_part_y[i], + m_part_t[i], + m_part_px[i], + m_part_py[i], + m_part_pt[i], + engine + ); + } + + private: + T_Distribution const m_distribution; + amrex::ParticleReal* const AMREX_RESTRICT m_part_x; + amrex::ParticleReal* const AMREX_RESTRICT m_part_y; + amrex::ParticleReal* const AMREX_RESTRICT m_part_t; + amrex::ParticleReal* const AMREX_RESTRICT m_part_px; + amrex::ParticleReal* const AMREX_RESTRICT m_part_py; + amrex::ParticleReal* const AMREX_RESTRICT m_part_pt; + }; +} // namespace impactx::initialization + +#endif // IMPACTX_INITIALIZATION_INITDISTRIBUTION_H diff --git a/src/initialization/InitDistribution.cpp b/src/initialization/InitDistribution.cpp index 5e0dd0700..64a31ec15 100644 --- a/src/initialization/InitDistribution.cpp +++ b/src/initialization/InitDistribution.cpp @@ -7,6 +7,8 @@ * Authors: Axel Huebl, Chad Mitchell, Ji Qiang * License: BSD-3-Clause-LBNL */ +#include "initialization/InitDistribution.H" + #include "ImpactX.H" #include "particles/ImpactXParticleContainer.H" #include "particles/distribution/All.H" @@ -21,6 +23,7 @@ #include #include +#include #include @@ -56,12 +59,6 @@ namespace impactx ); } - // init particles - amrex::Vector x, y, t; - amrex::Vector px, py, pt; - amrex::ParticleReal ix, iy, it, ipx, ipy, ipt; - amrex::RandomEngine rng; - // Logic: We initialize 1/Nth of particles, independent of their // position, per MPI rank. We then measure the distribution's spatial // extent, create a grid, resize it to fit the beam, and then @@ -71,34 +68,41 @@ namespace impactx int const navg = npart / nprocs; int const nleft = npart - navg * nprocs; int npart_this_proc = (myproc < nleft) ? navg+1 : navg; - auto const rel_part_this_proc = amrex::ParticleReal(npart_this_proc) / - amrex::ParticleReal(npart); + auto const rel_part_this_proc = + amrex::ParticleReal(npart_this_proc) / amrex::ParticleReal(npart); + + // alloc data for particle attributes + amrex::Gpu::DeviceVector x, y, t; + amrex::Gpu::DeviceVector px, py, pt; + x.resize(npart_this_proc); + y.resize(npart_this_proc); + t.resize(npart_this_proc); + px.resize(npart_this_proc); + py.resize(npart_this_proc); + pt.resize(npart_this_proc); std::visit([&](auto&& distribution){ // initialize distributions distribution.initialize(bunch_charge, ref); - // alloc data for particle attributes - x.reserve(npart_this_proc); - y.reserve(npart_this_proc); - t.reserve(npart_this_proc); - px.reserve(npart_this_proc); - py.reserve(npart_this_proc); - pt.reserve(npart_this_proc); - - for (amrex::Long i = 0; i < npart_this_proc; ++i) { - distribution(ix, iy, it, ipx, ipy, ipt, rng); - x.push_back(ix); - y.push_back(iy); - t.push_back(it); - px.push_back(ipx); - py.push_back(ipy); - pt.push_back(ipt); - } + amrex::ParticleReal * const AMREX_RESTRICT x_ptr = x.data(); + amrex::ParticleReal * const AMREX_RESTRICT y_ptr = y.data(); + amrex::ParticleReal * const AMREX_RESTRICT t_ptr = t.data(); + amrex::ParticleReal * const AMREX_RESTRICT px_ptr = px.data(); + amrex::ParticleReal * const AMREX_RESTRICT py_ptr = py.data(); + amrex::ParticleReal * const AMREX_RESTRICT pt_ptr = pt.data(); + + using Distribution = std::remove_reference_t< std::remove_cv_t >; + initialization::InitSingleParticleData const init_single_particle_data( + distribution, x_ptr, y_ptr, t_ptr, px_ptr, py_ptr, pt_ptr); + amrex::ParallelForRNG(npart_this_proc, init_single_particle_data); + + // finalize distributions and deallocate temporary device global memory + amrex::Gpu::streamSynchronize(); + distribution.finalize(); }, distr); - int const lev = 0; - m_particle_container->AddNParticles(lev, x, y, t, px, py, pt, + m_particle_container->AddNParticles(x, y, t, px, py, pt, ref.qm_qeeV(), bunch_charge * rel_part_this_proc); diff --git a/src/particles/ImpactXParticleContainer.H b/src/particles/ImpactXParticleContainer.H index 7ab1abed6..4d4f7dee5 100644 --- a/src/particles/ImpactXParticleContainer.H +++ b/src/particles/ImpactXParticleContainer.H @@ -156,7 +156,6 @@ namespace impactx * or AmrCore::InitFromCheckpoint has been made in the ImpactX * class. * - * @param lev mesh-refinement level * @param x positions in x * @param y positions in y * @param t positions as time-of-flight in c*t @@ -167,15 +166,16 @@ namespace impactx * @param bchchg total charge within a bunch in C */ void - AddNParticles (int lev, - amrex::Vector const & x, - amrex::Vector const & y, - amrex::Vector const & t, - amrex::Vector const & px, - amrex::Vector const & py, - amrex::Vector const & pt, - amrex::ParticleReal const & qm, - amrex::ParticleReal const & bchchg); + AddNParticles ( + amrex::Gpu::DeviceVector const & x, + amrex::Gpu::DeviceVector const & y, + amrex::Gpu::DeviceVector const & t, + amrex::Gpu::DeviceVector const & px, + amrex::Gpu::DeviceVector const & py, + amrex::Gpu::DeviceVector const & pt, + amrex::ParticleReal qm, + amrex::ParticleReal bchchg + ); /** Register storage for lost particles * diff --git a/src/particles/ImpactXParticleContainer.cpp b/src/particles/ImpactXParticleContainer.cpp index d333b1e75..e18d53341 100644 --- a/src/particles/ImpactXParticleContainer.cpp +++ b/src/particles/ImpactXParticleContainer.cpp @@ -100,19 +100,19 @@ namespace impactx } void - ImpactXParticleContainer::AddNParticles (int lev, - amrex::Vector const & x, - amrex::Vector const & y, - amrex::Vector const & t, - amrex::Vector const & px, - amrex::Vector const & py, - amrex::Vector const & pt, - amrex::ParticleReal const & qm, - amrex::ParticleReal const & bchchg) + ImpactXParticleContainer::AddNParticles ( + amrex::Gpu::DeviceVector const & x, + amrex::Gpu::DeviceVector const & y, + amrex::Gpu::DeviceVector const & t, + amrex::Gpu::DeviceVector const & px, + amrex::Gpu::DeviceVector const & py, + amrex::Gpu::DeviceVector const & pt, + amrex::ParticleReal qm, + amrex::ParticleReal bchchg + ) { BL_PROFILE("ImpactX::AddNParticles"); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(lev == 0, "AddNParticles: only lev=0 is supported yet."); AMREX_ALWAYS_ASSERT(x.size() == y.size()); AMREX_ALWAYS_ASSERT(x.size() == t.size()); AMREX_ALWAYS_ASSERT(x.size() == px.size()); @@ -123,48 +123,60 @@ namespace impactx int const np = x.size(); auto& particle_tile = DefineAndReturnParticleTile(0, 0, 0); + auto old_np = particle_tile.numParticles(); + auto new_np = old_np + np; + particle_tile.resize(new_np); - /* Create a temporary tile to obtain data from simulation. This data - * is then copied to the permanent tile which is stored on the particle - * (particle_tile). - */ - using PinnedTile = amrex::ParticleTile< - amrex::Particle, - NArrayReal, NArrayInt, - amrex::PinnedArenaAllocator - >; - PinnedTile pinned_tile; - pinned_tile.define(NumRuntimeRealComps(), NumRuntimeIntComps()); - - for (int i = 0; i < np; i++) + // Update NextID to include particles created in this function + int pid; +#ifdef AMREX_USE_OMP +#pragma omp critical (add_beam_nextid) +#endif { - ParticleType p; - p.id() = ParticleType::NextID(); - p.cpu() = amrex::ParallelDescriptor::MyProc(); - p.pos(RealAoS::x) = x[i]; - p.pos(RealAoS::y) = y[i]; - p.pos(RealAoS::t) = t[i]; - // write position, creating cpu id, and particle id - pinned_tile.push_back(p); + pid = ParticleType::NextID(); + ParticleType::NextID(pid+np); } - - // write Real attributes (SoA) to particle initialized zero - DefineAndReturnParticleTile(0, 0, 0); - - pinned_tile.push_back_real(RealSoA::px, px); - pinned_tile.push_back_real(RealSoA::py, py); - pinned_tile.push_back_real(RealSoA::pt, pt); - pinned_tile.push_back_real(RealSoA::qm, np, qm); - pinned_tile.push_back_real(RealSoA::w, np, bchchg/ablastr::constant::SI::q_e/np); - - /* Redistributes particles to their respective tiles (spatial bucket - * sort per box over MPI ranks) - */ - auto old_np = particle_tile.numParticles(); - auto new_np = old_np + pinned_tile.numParticles(); - particle_tile.resize(new_np); - amrex::copyParticles( - particle_tile, pinned_tile, 0, old_np, pinned_tile.numParticles()); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE( + static_cast(pid) + static_cast(np) < amrex::LongParticleIds::LastParticleID, + "ERROR: overflow on particle id numbers"); + + const int cpuid = amrex::ParallelDescriptor::MyProc(); + + auto * AMREX_RESTRICT pstructs = particle_tile.GetArrayOfStructs()().dataPtr(); + auto & soa = particle_tile.GetStructOfArrays().GetRealData(); + amrex::ParticleReal * const AMREX_RESTRICT px_arr = soa[RealSoA::px].dataPtr(); + amrex::ParticleReal * const AMREX_RESTRICT py_arr = soa[RealSoA::py].dataPtr(); + amrex::ParticleReal * const AMREX_RESTRICT pt_arr = soa[RealSoA::pt].dataPtr(); + amrex::ParticleReal * const AMREX_RESTRICT qm_arr = soa[RealSoA::qm].dataPtr(); + amrex::ParticleReal * const AMREX_RESTRICT w_arr = soa[RealSoA::w ].dataPtr(); + + amrex::ParticleReal const * const AMREX_RESTRICT x_ptr = x.data(); + amrex::ParticleReal const * const AMREX_RESTRICT y_ptr = y.data(); + amrex::ParticleReal const * const AMREX_RESTRICT t_ptr = t.data(); + amrex::ParticleReal const * const AMREX_RESTRICT px_ptr = px.data(); + amrex::ParticleReal const * const AMREX_RESTRICT py_ptr = py.data(); + amrex::ParticleReal const * const AMREX_RESTRICT pt_ptr = pt.data(); + + amrex::ParallelFor(np, + [=] AMREX_GPU_DEVICE (int i) noexcept + { + ParticleType& p = pstructs[old_np + i]; + p.id() = pid + i; + p.cpu() = cpuid; + p.pos(RealAoS::x) = x_ptr[i]; + p.pos(RealAoS::y) = y_ptr[i]; + p.pos(RealAoS::t) = t_ptr[i]; + + px_arr[old_np+i] = px_ptr[i]; + py_arr[old_np+i] = py_ptr[i]; + pt_arr[old_np+i] = pt_ptr[i]; + qm_arr[old_np+i] = qm; + w_arr[old_np+i] = bchchg/ablastr::constant::SI::q_e/np; + }); + + // safety first: in case passed attribute arrays were temporary, we + // want to make sure the ParallelFor has ended here + amrex::Gpu::streamSynchronize(); } void diff --git a/src/particles/distribution/Gaussian.H b/src/particles/distribution/Gaussian.H index 83b19e1da..89ceb5479 100644 --- a/src/particles/distribution/Gaussian.H +++ b/src/particles/distribution/Gaussian.H @@ -60,6 +60,15 @@ namespace impactx::distribution { } + /** Close and deallocate all data and handles. + * + * Nothing to do here. + */ + void + finalize () + { + } + /** Return 1 6D particle coordinate * * @param x particle position in x diff --git a/src/particles/distribution/KVdist.H b/src/particles/distribution/KVdist.H index 65dac699c..4bf9ee94f 100644 --- a/src/particles/distribution/KVdist.H +++ b/src/particles/distribution/KVdist.H @@ -61,6 +61,15 @@ namespace impactx::distribution { } + /** Close and deallocate all data and handles. + * + * Nothing to do here. + */ + void + finalize () + { + } + /** Return 1 6D particle coordinate * * @param x particle position in x diff --git a/src/particles/distribution/Kurth4D.H b/src/particles/distribution/Kurth4D.H index 988448241..0c3102db8 100644 --- a/src/particles/distribution/Kurth4D.H +++ b/src/particles/distribution/Kurth4D.H @@ -61,6 +61,15 @@ namespace impactx::distribution { } + /** Close and deallocate all data and handles. + * + * Nothing to do here. + */ + void + finalize () + { + } + /** Return 1 6D particle coordinate * * @param x particle position in x diff --git a/src/particles/distribution/Kurth6D.H b/src/particles/distribution/Kurth6D.H index d1ed44ed4..86574ce04 100644 --- a/src/particles/distribution/Kurth6D.H +++ b/src/particles/distribution/Kurth6D.H @@ -63,6 +63,15 @@ namespace impactx::distribution { } + /** Close and deallocate all data and handles. + * + * Nothing to do here. + */ + void + finalize () + { + } + /** Return 1 6D particle coordinate * * @param x particle position in x diff --git a/src/particles/distribution/None.H b/src/particles/distribution/None.H index 79e3657e3..0fa9501ab 100644 --- a/src/particles/distribution/None.H +++ b/src/particles/distribution/None.H @@ -37,6 +37,15 @@ namespace impactx::distribution { } + /** Close and deallocate all data and handles. + * + * Nothing to do here. + */ + void + finalize () + { + } + /** Return 1 6D particle coordinate * * Sets all values to zero. diff --git a/src/particles/distribution/Semigaussian.H b/src/particles/distribution/Semigaussian.H index 9b91e568a..2f1a942cc 100644 --- a/src/particles/distribution/Semigaussian.H +++ b/src/particles/distribution/Semigaussian.H @@ -61,6 +61,15 @@ namespace impactx::distribution { } + /** Close and deallocate all data and handles. + * + * Nothing to do here. + */ + void + finalize () + { + } + /** Return 1 6D particle coordinate * * @param x particle position in x diff --git a/src/particles/distribution/Thermal.H b/src/particles/distribution/Thermal.H index b2cc31819..63169332a 100644 --- a/src/particles/distribution/Thermal.H +++ b/src/particles/distribution/Thermal.H @@ -14,6 +14,7 @@ #include +#include #include #include #include @@ -23,6 +24,7 @@ #include #include #include +#include namespace impactx @@ -36,68 +38,69 @@ namespace distribution static constexpr amrex::ParticleReal rout = 10.0; ///< final r value for numerical integration static constexpr int nsteps = 2000; ///< number of radial steps for numerical integration - struct Rprofile + amrex::ParticleReal m_f1; ///< cumulative distribution of first population + amrex::ParticleReal m_f2; ///< cumulative distribution of second population + amrex::ParticleReal m_phi1; ///< potential generated by first population + amrex::ParticleReal m_phi2; ///< potential generated by second population + amrex::ParticleReal m_p1; ///< normalization constant of first population + amrex::ParticleReal m_p2; ///< normalization constant of second population + amrex::ParticleReal m_rmin; ///< minimum r value for tabulated cdf + amrex::ParticleReal m_rmax; ///< maximum r value for tabulated cdf + int m_nbins; ///< number of radial bins for tabulated cdf + amrex::ParticleReal* m_cdf1 = nullptr; ///< tabulated cumulative distribution (first) + amrex::ParticleReal* m_cdf2 = nullptr; ///< tabulated cumulative distribution (second) + amrex::ParticleReal m_Cintensity; ///< space charge intensity parameter + amrex::ParticleReal m_bg; ///< reference value of relativistic beta*gamma + amrex::ParticleReal m_k; ///< linear focusing strength (1/meters) + amrex::ParticleReal m_T1; ///< temperature k*T of the primary (core) population + amrex::ParticleReal m_T2; ///< temperature k*T of the secondary (halo) population + amrex::ParticleReal m_w; ///< weight of the secondary (halo) population + + // GPU data + static inline amrex::Gpu::DeviceVector m_d_cdf1; + static inline amrex::Gpu::DeviceVector m_d_cdf2; + + ThermalData ( + amrex::ParticleReal kin, + amrex::ParticleReal T1in, + amrex::ParticleReal T2in, + amrex::ParticleReal p1in, + amrex::ParticleReal p2in, + amrex::ParticleReal win + ) { - amrex::ParticleReal f1; ///< cumulative distribution of first population - amrex::ParticleReal f2; ///< cumulative distribution of second population - amrex::ParticleReal phi1; ///< potential generated by first population - amrex::ParticleReal phi2; ///< potential generated by second population - amrex::ParticleReal p1; ///< normalization constant of first population - amrex::ParticleReal p2; ///< normalization constant of second population - amrex::ParticleReal rmin; ///< minimum r value for tabulated cdf - amrex::ParticleReal rmax; ///< maximum r value for tabulated cdf - int nbins; ///< number of radial bins for tabulated cdf - amrex::ParticleReal cdf1[2001]; ///< tabulated cumulative distribution (first) - amrex::ParticleReal cdf2[2001]; ///< tabulated cumulative distribution (second) - amrex::ParticleReal Cintensity; ///< space charge intensity parameter - amrex::ParticleReal bg; ///< reference value of relativistic beta*gamma - amrex::ParticleReal k; ///< linear focusing strength (1/meters) - amrex::ParticleReal T1; ///< temperature k*T of the primary (core) population - amrex::ParticleReal T2; ///< temperature k*T of the secondary (halo) population - amrex::ParticleReal w; ///< weight of the secondary (halo) population - - Rprofile (amrex::ParticleReal kin, - amrex::ParticleReal T1in, - amrex::ParticleReal T2in, - amrex::ParticleReal p1in, - amrex::ParticleReal p2in, - amrex::ParticleReal win) - { - k = kin; - T1 = T1in; - T2 = T2in; - p1 = p1in; - p2 = p2in; - w = win; - } - }; + m_k = kin; + m_T1 = T1in; + m_T2 = T2in; + m_p1 = p1in; + m_p2 = p2in; + m_w = win; + } /** Populate the radial CDF data. * * @param[in] bunch_charge the bunch charge in C * @param[in] refpart the reference particle - * @param[inout] data the data to contain the radial CDF */ - static void - generate_radial_dist (amrex::ParticleReal bunch_charge, RefPart const & refpart, Rprofile & data) - + void + generate_radial_dist (amrex::ParticleReal bunch_charge, RefPart const & refpart) { using namespace amrex::literals; // for _rt and _prt using namespace ablastr::constant::math; // for pi // Get relativistic beta*gamma amrex::ParticleReal bg = refpart.beta_gamma(); - data.bg = bg; + m_bg = bg; // Get reference particle rest energy (eV) and charge (q_e) amrex::ParticleReal Erest = refpart.mass_MeV()*1.0e6; amrex::ParticleReal q_e = refpart.charge_qe(); // Set space charge intensity - data.Cintensity = q_e*bunch_charge/(pow(bg,2)*Erest*ablastr::constant::SI::ep0); + m_Cintensity = q_e*bunch_charge/(pow(bg,2)*Erest*ablastr::constant::SI::ep0); // Set minimum and maximum radius - amrex::ParticleReal r_scale = matched_scale_radius(data); + amrex::ParticleReal r_scale = matched_scale_radius(); amrex::ParticleReal rmin = rin*r_scale; amrex::ParticleReal rmax = rout*r_scale; // amrex::PrintToFile("equilibrium_params.out") << r_scale << " " << data.Cintensity << "\n"; @@ -105,49 +108,70 @@ namespace distribution // Scale the parameters p1 and p2 amrex::ParticleReal rt2pi = sqrt(2.0_prt*pi); amrex::ParticleReal p_scale = pow(r_scale*rt2pi,-3); - data.p1 = data.p1*p_scale; - data.p2 = data.p2*p_scale; + m_p1 = m_p1*p_scale; + m_p2 = m_p2*p_scale; // store integration parameters - data.nbins = nsteps; - data.rmin = rmin; - data.rmax = rmax; + m_nbins = nsteps; + m_rmin = rmin; + m_rmax = rmax; + + // allocate CDFs + std::vector cdf1(m_nbins+1); + std::vector cdf2(m_nbins+1); + m_cdf1 = cdf1.data(); + m_cdf2 = cdf2.data(); // set initial conditions - data.f1 = 0.0_prt; - data.f2 = 0.0_prt; - data.phi1 = 0.0_prt; - data.phi2 = 0.0_prt; - data.cdf1[0] = 0.0_prt; - data.cdf2[0] = 0.0_prt; + m_f1 = 0.0_prt; + m_f2 = 0.0_prt; + m_phi1 = 0.0_prt; + m_phi2 = 0.0_prt; + m_cdf1[0] = 0.0_prt; + m_cdf2[0] = 0.0_prt; // integrate ODEs for the radial density - integrate(data,rmin,rmax,nsteps); + integrate(rmin,rmax,nsteps); // a search over normalization parameters p1, p2 can be added here + // rescale cdf to ensure the exact range [0,1] + for (int n = 0; n < m_nbins; ++n) { + m_cdf1[n] = m_cdf1[n] / m_cdf1[m_nbins]; + m_cdf2[n] = m_cdf2[n] / m_cdf2[m_nbins]; + } + + // copy CFD data to device + m_d_cdf1 = amrex::Gpu::DeviceVector(m_nbins+1); + m_d_cdf2 = amrex::Gpu::DeviceVector(m_nbins+1); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, + cdf1.begin(), cdf1.end(), + m_d_cdf1.begin()); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, + cdf2.begin(), cdf2.end(), + m_d_cdf2.begin()); + amrex::Gpu::streamSynchronize(); } - static amrex::ParticleReal - matched_scale_radius(Rprofile & data) + amrex::ParticleReal + matched_scale_radius () { using namespace amrex::literals; // for _rt and _prt using namespace ablastr::constant::math; // for pi - amrex::ParticleReal k = data.k; - amrex::ParticleReal kT = (1.0_prt-data.w)*data.T1 + data.w*data.T2; - amrex::ParticleReal a = data.Cintensity/(4.0_prt*pi*5.0_prt*sqrt(5.0_prt)); + amrex::ParticleReal k = m_k; + amrex::ParticleReal kT = (1.0_prt - m_w) * m_T1 + m_w * m_T2; + amrex::ParticleReal a = m_Cintensity/(4.0_prt*pi*5.0_prt*sqrt(5.0_prt)); amrex::ParticleReal rscale = sqrt(kT + pow(a*k,2.0/3.0))/k; return rscale; } - static void + void integrate ( - Rprofile & data, - amrex::ParticleReal const in, - amrex::ParticleReal const out, - int const steps + amrex::ParticleReal in, + amrex::ParticleReal out, + int steps ) { using namespace amrex::literals; // for _rt and _prt @@ -163,12 +187,12 @@ namespace distribution for (int j=0; j < steps; ++j) { // for a second-order Strang splitting - map1(half,data,reval); - map2(tau,data,reval); - map1(half,data,reval); + map1(half, reval); + map2(tau, reval); + map1(half, reval); // store tabulated CDF - data.cdf1[j+1] = data.f1; - data.cdf2[j+1] = data.f2; + m_cdf1[j+1] = m_f1; + m_cdf2[j+1] = m_f2; // write cumulative density to file for debugging /* comment in for debugging amrex::PrintToFile("density1.out") << reval << " " << data.f1 << "\n"; @@ -180,68 +204,65 @@ namespace distribution } - static void + void map1 ( amrex::ParticleReal const tau, - Rprofile & data, amrex::ParticleReal & reval ) { using namespace amrex::literals; // for _rt and _prt using namespace ablastr::constant::math; // for pi - amrex::ParticleReal const f1 = data.f1; - amrex::ParticleReal const f2 = data.f2; - amrex::ParticleReal const phi1 = data.phi1; - amrex::ParticleReal const phi2 = data.phi2; + amrex::ParticleReal const f1 = m_f1; + amrex::ParticleReal const f2 = m_f2; + amrex::ParticleReal const phi1 = m_phi1; + amrex::ParticleReal const phi2 = m_phi2; amrex::ParticleReal const r = reval; // Apply map to update phi1, phi2, and r: reval = r + tau; - data.phi1 = phi1 + f1/(4.0_prt*pi*reval) - f1/(4.0_prt*pi*r); - data.phi2 = phi2 + f2/(4.0_prt*pi*reval) - f2/(4.0_prt*pi*r);; - data.f1 = f1; - data.f2 = f2; - + m_phi1 = phi1 + f1/(4.0_prt*pi*reval) - f1/(4.0_prt*pi*r); + m_phi2 = phi2 + f2/(4.0_prt*pi*reval) - f2/(4.0_prt*pi*r);; + m_f1 = f1; + m_f2 = f2; } - static void + void map2 ( amrex::ParticleReal const tau, - Rprofile & data, amrex::ParticleReal & reval ) { using namespace amrex::literals; // for _rt and _prt using namespace ablastr::constant::math; // for pi - amrex::ParticleReal const f1 = data.f1; - amrex::ParticleReal const f2 = data.f2; - amrex::ParticleReal const phi1 = data.phi1; - amrex::ParticleReal const phi2 = data.phi2; + amrex::ParticleReal const f1 = m_f1; + amrex::ParticleReal const f2 = m_f2; + amrex::ParticleReal const phi1 = m_phi1; + amrex::ParticleReal const phi2 = m_phi2; amrex::ParticleReal const r = reval; - amrex::ParticleReal const k = data.k; - amrex::ParticleReal const w = data.w; - amrex::ParticleReal const T1 = data.T1; - amrex::ParticleReal const T2 = data.T2; + amrex::ParticleReal const k = m_k; + amrex::ParticleReal const w = m_w; + amrex::ParticleReal const T1 = m_T1; + amrex::ParticleReal const T2 = m_T2; // Define space charge intensity parameters - amrex::ParticleReal const c1 = (1.0_prt-w)*data.Cintensity; - amrex::ParticleReal const c2 = w*data.Cintensity; + amrex::ParticleReal const c1 = (1.0_prt-w) * m_Cintensity; + amrex::ParticleReal const c2 = w * m_Cintensity; // Define intermediate quantities amrex::ParticleReal potential = 0.0_prt; potential = pow(k*r,2.0)/2.0_prt + c1*phi1 + c2*phi2; - amrex::ParticleReal Pdensity1 = data.p1*exp(-potential/T1); - amrex::ParticleReal Pdensity2 = data.p2*exp(-potential/T2); + amrex::ParticleReal Pdensity1 = m_p1*exp(-potential/T1); + amrex::ParticleReal Pdensity2 = m_p2*exp(-potential/T2); // amrex::ParticleReal Pdensity_tot = (1.0_prt-w)*Pdensity1 + w*Pdensity2; // amrex::PrintToFile("Pdensity.out") << reval << " " << Pdensity_tot << "\n"; // Apply map to update f1 and f2: - data.phi1 = phi1; - data.phi2 = phi2; - data.f1 = f1 + tau*4.0_prt*pi*pow(r,2.0)*Pdensity1; - data.f2 = f2 + tau*4.0_prt*pi*pow(r,2.0)*Pdensity2; + m_phi1 = phi1; + m_phi2 = phi2; + m_f1 = f1 + tau*4.0_prt*pi*pow(r,2.0)*Pdensity1; + m_f2 = f2 + tau*4.0_prt*pi*pow(r,2.0)*Pdensity2; reval = r; } }; @@ -270,12 +291,11 @@ namespace distribution amrex::ParticleReal halo ) : m_k(k), - m_kT(kT), + m_T1(kT), m_T2(kT_halo), m_normalize(normalize), m_normalize_halo(normalize_halo), - m_halo(halo), - m_data(ThermalData::Rprofile(m_k, m_kT, m_T2, m_normalize, m_normalize_halo, m_halo)) + m_halo(halo) { } @@ -289,7 +309,29 @@ namespace distribution void initialize (amrex::ParticleReal bunch_charge, RefPart const & ref) { // Generate the struct "data" containing the radial CDF: - distribution::ThermalData::generate_radial_dist(bunch_charge, ref, m_data); + ThermalData data(m_k, m_T1, m_T2, m_normalize, m_normalize_halo, m_halo); + data.generate_radial_dist(bunch_charge, ref); + + // share data + m_w = data.m_w; + m_T1 = data.m_T1; + m_T2 = data.m_T2; + m_rmin = data.m_rmin; + m_rmax = data.m_rmax; + m_nbins = data.m_nbins; + m_bg = data.m_bg; + m_cdf1 = data.m_d_cdf1.data(); + m_cdf2 = data.m_d_cdf2.data(); + } + + /** Close and deallocate all data and handles. + */ + void + finalize () + { + // deallocate + ThermalData::m_d_cdf1.clear(); + ThermalData::m_d_cdf2.clear(); } /** Return 1 6D particle coordinate @@ -304,13 +346,15 @@ namespace distribution */ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void operator() ( - amrex::ParticleReal & AMREX_RESTRICT x, - amrex::ParticleReal & AMREX_RESTRICT y, - amrex::ParticleReal & AMREX_RESTRICT t, - amrex::ParticleReal & AMREX_RESTRICT px, - amrex::ParticleReal & AMREX_RESTRICT py, - amrex::ParticleReal & AMREX_RESTRICT pt, - amrex::RandomEngine const& engine) { + amrex::ParticleReal & AMREX_RESTRICT x, + amrex::ParticleReal & AMREX_RESTRICT y, + amrex::ParticleReal & AMREX_RESTRICT t, + amrex::ParticleReal & AMREX_RESTRICT px, + amrex::ParticleReal & AMREX_RESTRICT py, + amrex::ParticleReal & AMREX_RESTRICT pt, + amrex::RandomEngine const & engine + ) const + { using namespace amrex::literals; // for _rt and _prt using namespace ablastr::constant::math; // for pi @@ -342,7 +386,7 @@ namespace distribution g6 = ln1*sin(2_prt*pi*u2); // Scale the last three variables to produce the momenta: - amrex::ParticleReal kT = (uhalo > m_data.w) ? m_data.T1 : m_data.T2; // select core or halo value + amrex::ParticleReal kT = (uhalo > m_w) ? m_T1 : m_T2; // select core or halo value px = sqrt(kT)*g4; py = sqrt(kT)*g5; pz = sqrt(kT)*g6; @@ -354,47 +398,42 @@ namespace distribution g3 /= norm; // Collect the radial CDF returned by generate_radial_dist: - amrex::ParticleReal rmin = m_data.rmin; - amrex::ParticleReal rmax = m_data.rmax; - int const nbins = m_data.nbins; - constexpr int length = 2001; // Ideally, int const length = nbins + 1; - amrex::ParticleReal cdf[length]; - for (int n = 0; n < length; ++n) { - cdf[n] = (uhalo > m_data.w) ? m_data.cdf1[n] : m_data.cdf2[n]; //select core or halo CDF - } - for (int n = 0; n < length; ++n) { - cdf[n] = cdf[n]/cdf[nbins]; //rescale cdf to ensure the exact range [0,1] - } + + // select core or halo CDF + amrex::ParticleReal const * cdf = (uhalo > m_w) ? m_cdf1 : m_cdf2; // Generate a radial coordinate from the CDF amrex::ParticleReal u = amrex::Random(engine); - amrex::ParticleReal *ptr = amrex::lower_bound(cdf, cdf + nbins + 1, u); - int off = amrex::max(0, (int)(ptr - cdf - 1)); + amrex::ParticleReal const * ptr = amrex::lower_bound(cdf, cdf + m_nbins + 1, u); + int const off = amrex::max(0, int(ptr - cdf - 1)); amrex::ParticleReal tv = (u - cdf[off]) / (cdf[off + 1] - cdf[off]); - amrex::ParticleReal xv = (off + tv) / (float)(nbins); - amrex::ParticleReal r = rmin + (rmax - rmin) * xv; + amrex::ParticleReal xv = (off + tv) / amrex::ParticleReal(m_nbins); + amrex::ParticleReal r = m_rmin + (m_rmax - m_rmin) * xv; // Scale to produce samples with the correct radial density: x = g1*r; y = g2*r; z = g3*r; - // Transform logitudinal variables into the laboratory frame: - t = -z/(m_data.bg); - pt = -pz*(m_data.bg); - - (void) m_k; - (void) m_kT; - (void) m_T2; - (void) m_halo; + // Transform longitudinal variables into the laboratory frame: + t = -z / m_bg; + pt = -pz * m_bg; } private: amrex::ParticleReal m_k; //! linear focusing strength (1/meters) - amrex::ParticleReal m_kT, m_T2; //! temperature of each particle population + amrex::ParticleReal m_T1, m_T2; //! temperature of each particle population amrex::ParticleReal m_normalize, m_normalize_halo; //! normalization constant of first/second population amrex::ParticleReal m_halo; //! relative weight of halo population - ThermalData::Rprofile m_data; //! struct containing radial profile data + amrex::ParticleReal m_rmin; ///< minimum r value for tabulated cdf + amrex::ParticleReal m_rmax; ///< maximum r value for tabulated cdf + int m_nbins; ///< number of radial bins for tabulated cdf + amrex::ParticleReal m_bg; ///< reference value of relativistic beta*gamma + amrex::ParticleReal m_w; ///< weight of the secondary (halo) population + + // radial profile data + amrex::ParticleReal const * m_cdf1 = nullptr; //! non-owning pointer to device core CDF + amrex::ParticleReal const * m_cdf2 = nullptr; //! non-owning pointer to device halo CDF }; } // namespace distribution diff --git a/src/particles/distribution/Triangle.H b/src/particles/distribution/Triangle.H index 45c3fbaaa..61e916d51 100644 --- a/src/particles/distribution/Triangle.H +++ b/src/particles/distribution/Triangle.H @@ -58,6 +58,15 @@ namespace impactx::distribution { } + /** Close and deallocate all data and handles. + * + * Nothing to do here. + */ + void + finalize () + { + } + /** Return 1 6D particle coordinate * * @param x particle position in x diff --git a/src/particles/distribution/Waterbag.H b/src/particles/distribution/Waterbag.H index d41ed6eb2..b86837e78 100644 --- a/src/particles/distribution/Waterbag.H +++ b/src/particles/distribution/Waterbag.H @@ -60,6 +60,15 @@ namespace impactx::distribution { } + /** Close and deallocate all data and handles. + * + * Nothing to do here. + */ + void + finalize () + { + } + /** Return 1 6D particle coordinate * * @param x particle position in x @@ -91,22 +100,22 @@ namespace impactx::distribution // Generate six standard normal random variables using Box-Muller: u1 = amrex::Random(engine); u2 = amrex::Random(engine); - ln1 = sqrt(-2_prt*log(u1)); - g1 = ln1*cos(2_prt*pi*u2); - g2 = ln1*sin(2_prt*pi*u2); + ln1 = std::sqrt(-2_prt*std::log(u1)); + g1 = ln1*std::cos(2_prt*pi*u2); + g2 = ln1*std::sin(2_prt*pi*u2); u1 = amrex::Random(engine); u2 = amrex::Random(engine); - ln1 = sqrt(-2_prt*log(u1)); - g3 = ln1*cos(2_prt*pi*u2); - g4 = ln1*sin(2_prt*pi*u2); + ln1 = std::sqrt(-2_prt*std::log(u1)); + g3 = ln1*std::cos(2_prt*pi*u2); + g4 = ln1*std::sin(2_prt*pi*u2); u1 = amrex::Random(engine); u2 = amrex::Random(engine); - ln1 = sqrt(-2_prt*log(u1)); - g5 = ln1*cos(2_prt*pi*u2); - g6 = ln1*sin(2_prt*pi*u2); + ln1 = std::sqrt(-2_prt*std::log(u1)); + g5 = ln1*std::cos(2_prt*pi*u2); + g6 = ln1*std::sin(2_prt*pi*u2); // Normalize to produce uniform samples on the unit sphere: - norm = sqrt(g1*g1+g2*g2+g3*g3+g4*g4+g5*g5+g6*g6); + norm = std::sqrt(g1*g1+g2*g2+g3*g3+g4*g4+g5*g5+g6*g6); g1 /= norm; g2 /= norm; g3 /= norm; @@ -116,7 +125,7 @@ namespace impactx::distribution // Scale to produce uniform samples in a ball (unit variance): u1 = amrex::Random(engine); - u2 = sqrt(8_prt)*pow(u1,1_prt/6); + u2 = std::sqrt(8_prt)*std::pow(u1,1_prt/6_prt); x = g1*u2; y = g2*u2; t = g3*u2; @@ -125,17 +134,17 @@ namespace impactx::distribution pt = g6*u2; // Transform to produce the desired second moments/correlations: - root = sqrt(1.0_prt-m_muxpx*m_muxpx); + root = std::sqrt(1.0_prt-m_muxpx*m_muxpx); a1 = m_sigmaX*x/root; a2 = m_sigmaPx*(-m_muxpx*x/root+px); x = a1; px = a2; - root = sqrt(1.0_prt-m_muypy*m_muypy); + root = std::sqrt(1.0_prt-m_muypy*m_muypy); a1 = m_sigmaY*y/root; a2 = m_sigmaPy*(-m_muypy*y/root+py); y = a1; py = a2; - root = sqrt(1.0_prt-m_mutpt*m_mutpt); + root = std::sqrt(1.0_prt-m_mutpt*m_mutpt); a1 = m_sigmaT*t/root; a2 = m_sigmaPt*(-m_mutpt*t/root+pt); t = a1; diff --git a/src/python/ImpactXParticleContainer.cpp b/src/python/ImpactXParticleContainer.cpp index c1a645b8a..e9f25f9e3 100644 --- a/src/python/ImpactXParticleContainer.cpp +++ b/src/python/ImpactXParticleContainer.cpp @@ -59,7 +59,6 @@ void init_impactxparticlecontainer(py::module& m) .def("add_n_particles", &ImpactXParticleContainer::AddNParticles, - py::arg("lev"), py::arg("x"), py::arg("y"), py::arg("t"), py::arg("px"), py::arg("py"), py::arg("pt"), py::arg("qm"), py::arg("bchchg"), @@ -67,7 +66,6 @@ void init_impactxparticlecontainer(py::module& m) "Note: This can only be used *after* the initialization (grids) have\n" " been created, meaning after the call to ImpactX.init_grids\n" " has been made in the ImpactX class.\n\n" - ":param lev: mesh-refinement level\n" ":param x: positions in x\n" ":param y: positions in y\n" ":param t: positions as time-of-flight in c*t\n"