Skip to content

Commit

Permalink
Merge branch 'develop' into jmm/serial-pool
Browse files Browse the repository at this point in the history
  • Loading branch information
jonahm-LANL committed Jan 6, 2025
2 parents 0af302e + d65f3e6 commit a86b271
Show file tree
Hide file tree
Showing 35 changed files with 286 additions and 371 deletions.
24 changes: 18 additions & 6 deletions .github/workflows/check-compilers.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
name: Check compilers

on: [push, pull_request]
on:
# run every day at 06:00 UTC
schedule:
- cron: '0 6 * * *'
# when triggered manually
workflow_dispatch:
# when auto merge is enabled (hack to make sure it's run before merging)
pull_request:
types: [auto_merge_enabled]

# Cancel "duplicated" workflows triggered by pushes to internal
# branches with associated PRs.
Expand All @@ -14,7 +22,7 @@ jobs:
strategy:
matrix:
cxx: ['g++', 'clang++-15']
cmake_build_type: ['Release', 'Debug']
cmake_build_type: ['Release', 'DbgNoSym']
device: ['cuda', 'host']
parallel: ['serial', 'mpi']
exclude:
Expand All @@ -23,7 +31,7 @@ jobs:
# https://github.com/lanl/parthenon/issues/630
- cxx: clang++-15
device: cuda
cmake_build_type: Debug
cmake_build_type: DbgNoSym
runs-on: ubuntu-latest
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
Expand All @@ -48,25 +56,29 @@ jobs:
strategy:
matrix:
cxx: ['hipcc']
cmake_build_type: ['Release', 'Debug']
cmake_build_type: ['Release', 'DbgNoSym']
device: ['hip']
parallel: ['serial', 'mpi']
runs-on: ubuntu-latest
container:
image: ghcr.io/parthenon-hpc-lab/rocm5.4.3-mpi-hdf5
image: ghcr.io/parthenon-hpc-lab/rocm6.2-mpi-hdf5
env:
CMAKE_GENERATOR: Ninja
steps:
- uses: actions/checkout@v2
with:
submodules: 'true'
- name: CMake
# Manually chaning the arch for this (debug) build as the
# -O0 option causes compiler issue for the navi 1030 GPU at
# compile time, see https://github.com/parthenon-hpc-lab/parthenon/pull/1191#issuecomment-2492035364
run: |
cmake -B builddir \
-DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \
-DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} \
-DMACHINE_CFG=${PWD}/cmake/machinecfg/GitHubActions.cmake \
-DMACHINE_VARIANT=${{ matrix.device }}_${{ matrix.parallel }}
-DMACHINE_VARIANT=${{ matrix.device }}_${{ matrix.parallel }} \
-DKokkos_ARCH_AMD_GFX90A=ON -DKokkos_ARCH_NAVI1030=OFF
- name: Build
run: |
cmake --build builddir --parallel 2
4 changes: 3 additions & 1 deletion .github/workflows/ci-extended.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ env:
CMAKE_BUILD_PARALLEL_LEVEL: 5 # num threads for build
MACHINE_CFG: cmake/machinecfg/CI.cmake
OMPI_MCA_mpi_common_cuda_event_max: 1000
# CUDA IPC within docker repeated seem to cause issue on the CI machine
OMPI_MCA_btl_smcuda_use_cuda_ipc: 0
# https://github.com/open-mpi/ompi/issues/4948#issuecomment-395468231
OMPI_MCA_btl_vader_single_copy_mechanism: none

Expand All @@ -34,7 +36,7 @@ jobs:
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
# map to local user id on CI machine to allow writing to build cache
options: --user 1001
options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728
steps:
- uses: actions/checkout@v3
with:
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/ci-short.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ env:
CMAKE_BUILD_PARALLEL_LEVEL: 5 # num threads for build
MACHINE_CFG: cmake/machinecfg/CI.cmake
OMPI_MCA_mpi_common_cuda_event_max: 1000
# CUDA IPC within docker repeated seem to cause issue on the CI machine
OMPI_MCA_btl_smcuda_use_cuda_ipc: 0
# https://github.com/open-mpi/ompi/issues/4948#issuecomment-395468231
OMPI_MCA_btl_vader_single_copy_mechanism: none

Expand All @@ -22,7 +24,7 @@ jobs:
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
# map to local user id on CI machine to allow writing to build cache
options: --user 1001
options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728
steps:
- uses: actions/checkout@v3
with:
Expand All @@ -47,7 +49,7 @@ jobs:
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
# map to local user id on CI machine to allow writing to build cache
options: --user 1001
options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728
steps:
- uses: actions/checkout@v3
with:
Expand Down Expand Up @@ -79,7 +81,7 @@ jobs:
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
# map to local user id on CI machine to allow writing to build cache
options: --user 1001
options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728
steps:
- uses: actions/checkout@v3
with:
Expand Down
29 changes: 17 additions & 12 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,30 @@
## Current develop

### Added (new features/APIs/variables/...)
- [[PR 1210]](https://github.com/parthenon-hpc-lab/parthenon/pull/1210) Add cycle based output
- [[PR 1103]](https://github.com/parthenon-hpc-lab/parthenon/pull/1103) Add sparsity to vector wave equation test
- [[PR 1185]](https://github.com/parthenon-hpc-lab/parthenon/pull/1185/files) Bugfix to particle defragmentation
- [[PR 1185]](https://github.com/parthenon-hpc-lab/parthenon/pull/1185) Bugfix to particle defragmentation
- [[PR 1184]](https://github.com/parthenon-hpc-lab/parthenon/pull/1184) Fix swarm block neighbor indexing in 1D, 2D
- [[PR 1183]](https://github.com/parthenon-hpc-lab/parthenon/pull/1183) Fix particle leapfrog example initialization data
- [[PR 1179]](https://github.com/parthenon-hpc-lab/parthenon/pull/1179) Make a global variable for whether simulation is a restart
- [[PR 1171]](https://github.com/parthenon-hpc-lab/parthenon/pull/1171) Add PARTHENON_USE_SYSTEM_PACKAGES build option
- [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Make flux field Metadata accessible, add Metadata::CellMemAligned flag, small perfomance upgrades

### Changed (changing behavior/API/variables/...)
- [[PR 1216]](https://github.com/parthenon-hpc-lab/parthenon/pull/1216) Move to Kokkos 4.5
- [[PR 1191]](https://github.com/parthenon-hpc-lab/parthenon/pull/1191) Update Kokkos version to 4.4.1
- [[PR 1209]](https://github.com/parthenon-hpc-lab/parthenon/pull/1209) Ordered history output
- [[PR 1206]](https://github.com/parthenon-hpc-lab/parthenon/pull/1206) Leapfrog fix
- [[PR1203]](https://github.com/parthenon-hpc-lab/parthenon/pull/1203) Pin Ubuntu CI image
- [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag
- [[PR 1203]](https://github.com/parthenon-hpc-lab/parthenon/pull/1203) Pin Ubuntu CI image
- [[PR 1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag
- [[PR 1187]](https://github.com/parthenon-hpc-lab/parthenon/pull/1187) Make DataCollection::Add safer and generalize MeshBlockData::Initialize
- [[Issue 1165]](https://github.com/parthenon-hpc-lab/parthenon/issues/1165) Bump Kokkos submodule to 4.4.1
- [[PR 1171]](https://github.com/parthenon-hpc-lab/parthenon/pull/1171) Add PARTHENON_USE_SYSTEM_PACKAGES build option
- [[PR 1172]](https://github.com/parthenon-hpc-lab/parthenon/pull/1172) Make parthenon manager robust against external MPI init and finalize calls

### Fixed (not changing behavior/API/variables/...)
- [[PR 1217]](https://github.com/parthenon-hpc-lab/parthenon/pull/1217) Swarm comm debug fix
- [[PR 1215]](https://github.com/parthenon-hpc-lab/parthenon/pull/1215) Fix issue with uninitialized input parameter block data
- [[PR 1211]](https://github.com/parthenon-hpc-lab/parthenon/pull/1211) remove inline from WriteTaskGraph
- [[PR 1188]](https://github.com/parthenon-hpc-lab/parthenon/pull/1188) Fix hdf5 output issue for metadata none variables, update test.
- [[PR 1170]](https://github.com/parthenon-hpc-lab/parthenon/pull/1170) Fixed incorrect initialization of array by a const not constexpr
- [[PR 1189]](https://github.com/parthenon-hpc-lab/parthenon/pull/1189) Address CUDA MPI/ICP issue with Kokkos <=4.4.1
Expand All @@ -35,7 +40,7 @@


### Incompatibilities (i.e. breaking changes)
- [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag
- [[PR 1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag

## Release 24.08
Date: 2024-08-30
Expand Down Expand Up @@ -159,12 +164,12 @@ Date: 2024-03-21
- [[PR 973]](https://github.com/parthenon-hpc-lab/parthenon/pull/973) Multigrid performance upgrades

### Fixed (not changing behavior/API/variables/...)
- [[PR1023]](https://github.com/parthenon-hpc-lab/parthenon/pull/1023) Fix broken param of a scalar bool
- [[PR1012]](https://github.com/parthenon-hpc-lab/parthenon/pull/1012) Remove accidentally duplicated code
- [[PR992]](https://github.com/parthenon-hpc-lab/parthenon/pull/992) Allow custom PR ops with sparse pools
- [[PR988]](https://github.com/parthenon-hpc-lab/parthenon/pull/988) Fix bug in neighbor finding routine for small, periodic, refined meshes
- [[PR986]](https://github.com/parthenon-hpc-lab/parthenon/pull/986) Fix bug in sparse boundary communication BndInfo cacheing
- [[PR978]](https://github.com/parthenon-hpc-lab/parthenon/pull/978) remove erroneous sparse check
- [[PR 1023]](https://github.com/parthenon-hpc-lab/parthenon/pull/1023) Fix broken param of a scalar bool
- [[PR 1012]](https://github.com/parthenon-hpc-lab/parthenon/pull/1012) Remove accidentally duplicated code
- [[PR 992]](https://github.com/parthenon-hpc-lab/parthenon/pull/992) Allow custom PR ops with sparse pools
- [[PR 988]](https://github.com/parthenon-hpc-lab/parthenon/pull/988) Fix bug in neighbor finding routine for small, periodic, refined meshes
- [[PR 986]](https://github.com/parthenon-hpc-lab/parthenon/pull/986) Fix bug in sparse boundary communication BndInfo cacheing
- [[PR 978]](https://github.com/parthenon-hpc-lab/parthenon/pull/978) remove erroneous sparse check

### Infrastructure (changes irrelevant to downstream codes)
- [[PR 1027]](https://github.com/parthenon-hpc-lab/parthenon/pull/1027) Refactor RestartReader as abstract class
Expand Down Expand Up @@ -231,7 +236,7 @@ Date: 2023-11-16
- [[PR 901]](https://github.com/parthenon-hpc-lab/parthenon/pull/901) Implement shared element ownership model

### Removed (removing behavior/API/varaibles/...)
- [[PR 930](https://github.com/parthenon-hpc-lab/parthenon/pull/930) Remove ParthenonManager::ParthenonInit as it is error-prone and the split functions are the recommended usage.
- [[PR 930]](https://github.com/parthenon-hpc-lab/parthenon/pull/930) Remove ParthenonManager::ParthenonInit as it is error-prone and the split functions are the recommended usage.


## Release 0.8.0
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Parthenon -- a performance portable block-structured adaptive mesh refinement fr

* CMake 3.16 or greater
* C++17 compatible compiler
* Kokkos 4.0.1 or greater
* Kokkos 4.4.1 or greater

## Optional (enabling features)

Expand Down
3 changes: 3 additions & 0 deletions cmake/machinecfg/GitHubActions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ message(STATUS "Loading machine configuration for GitHub Actions CI. ")

# common options
set(NUM_MPI_PROC_TESTING "2" CACHE STRING "CI runs tests with 2 MPI ranks")
set(Kokkos_ENABLE_ROCTHRUST OFF CACHE BOOL "Temporarily disabled as the container needs to be updated to the `-complete` base image.")

set(CMAKE_CXX_FLAGS_DBGNOSYM "-O0" CACHE STRING "Debug build without symbols")

set(MACHINE_CXX_FLAGS "")
if (${MACHINE_VARIANT} MATCHES "cuda")
Expand Down
41 changes: 40 additions & 1 deletion doc/sphinx/src/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ Kokkos wrappers/abstractions
- ``par_for`` wrappers use inclusive bounds, i.e., the loop will
include the last index given
- ``ParArrayND`` arrays by default allocate on the *device* using
default precision configured
default precision configured and come with a `State` that can
be used to store additional metadata.
- ``ParArray#DRaw`` directly map to Kokkos ``Views`` that are allocated
on *device* using default precision.
- To create an array on the host with identical layout to the device
array either use

Expand Down Expand Up @@ -62,6 +65,42 @@ parallelism interface that is needed for managing memory cached in
tightly nested loops. The wrappers are documented
:ref:`here <nested par for>`.

View of Views
-------------

Special care needs to be taken when working with a ``View`` of ``Views``.

To repeat the Kokkos documenation: `Don't use them <https://kokkos.org/kokkos-core-wiki/ProgrammingGuide/View.html#can-i-make-a-view-of-views>`__

But if you have to (which is the case in some places inside Parthenon)
then follow this pattern:

.. code:: c++

ParArray1DRaw<ParArray1D<Real>> view_of_pararrays(parthenon::ViewOfViewAlloc("myname"), 10);

The ``ViewOfViewAlloc`` ensures that the ``Kokkos::SequentialHostInit`` property is added,
which results in the (inner ``View`` ) deallocators being called on the host (rather than on
the device by default).
Also note the use of the "raw" ``ParArray1DRaw``, which directly maps to a Kokkos ``View``
(that is required to process the allocation property as this interface is not exposed
in the more generic ``ParArrayND``).

Similarly, when you create a host mirror of said ``View`` of ``View`` add the additional
property for the same reason.

.. code:: c++

// explicit theoretical example -- don't use this
auto view_of_pararrays_h =
Kokkos::create_mirror_view(Kokkos::view_alloc(Kokkos::SequentialHostInit), view_of_pararrays);

// but instead use this interface provided by Parthenon:
auto view_of_pararrays_h = create_view_of_view_mirror(view_of_pararrays);


Note that the ``SequentialHostInit`` was only added in Kokkos 4.4.1 (which is now the default in Parthenon).

The need for reductions within function handling ``MeshBlock`` data
-------------------------------------------------------------------

Expand Down
14 changes: 12 additions & 2 deletions doc/sphinx/src/outputs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,20 @@ Outputs
Outputs from Parthenon are controlled via ``<parthenon/output*>`` blocks,
where ``*`` should be replaced by a unique integer for each block.

The frequency of outputs can be controlled for each block separately
and can be triggered by either (simulation) time or cycle, i.e.,

- ``dt = 0.1`` means that the output for the block is written every 0.1
in simulation time.
- ``dn = 100`` means that the output for the block is written every 100
cycles.

Note that only one option can be chosen for a given block.
To disable an output block without removing it from the input file set
the block's ``dt < 0.0``.
the block's ``dt < 0.0`` and ``dn < 0`` (which is also happening by default
if the paramter is not provided in the input file).

In addition to time base outputs, two additional options to trigger
In addition to time or cycle based outputs, two additional options to trigger
outputs (applies to HDF5, restart and histogram outputs) exist.

- Signaling: If ``Parthenon`` catches a signal, e.g., ``SIGALRM`` which
Expand Down
2 changes: 1 addition & 1 deletion external/Kokkos
Submodule Kokkos updated 1227 files
14 changes: 6 additions & 8 deletions scripts/docker/Dockerfile.hip-rocm
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM rocm/dev-ubuntu-20.04:5.4.3
FROM rocm/dev-ubuntu-24.04:6.2

RUN apt-get clean && apt-get update -y && \
DEBIAN_FRONTEND="noninteractive" TZ=America/New_York apt-get install -y --no-install-recommends git python3-minimal libpython3-stdlib bc hwloc wget openssh-client python3-numpy python3-h5py python3-matplotlib lcov curl cmake ninja-build openmpi-bin libopenmpi-dev && \
Expand All @@ -14,12 +14,10 @@ RUN cd /tmp && \
cd / && \
rm -rf /tmp/hdf5-1.10.8*

# "mpic++ --showme" forgets open-pal in Ubuntu 20.04 + OpenMPI 4.0.3
# https://bugs.launchpad.net/ubuntu/+source/openmpi/+bug/1941786
# https://github.com/open-mpi/ompi/issues/9317
ENV LDFLAGS="-lopen-pal"

RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10

# uid 1000 maps to the one running the container on the CI host
RUN useradd --create-home --shell /bin/bash -u 1000 -G render ci
# Latest image has default user with uid 1000 (which maps to the one running the container on the CI host
# Need to add user to the group that can access the GPU
RUN usermod -a -G render ubuntu

WORKDIR /home/ubuntu
1 change: 0 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ add_library(parthenon
outputs/restart.hpp
outputs/restart_hdf5.cpp
outputs/restart_hdf5.hpp
outputs/vtk.cpp

parthenon/driver.hpp
parthenon/package.hpp
Expand Down
3 changes: 0 additions & 3 deletions src/bvals/bvals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,6 @@ class BoundarySwarm : public BoundaryCommunication {
explicit BoundarySwarm(std::weak_ptr<MeshBlock> pmb, const std::string &label);
~BoundarySwarm() = default;

std::vector<ParArrayND<int>> vars_int;
std::vector<ParArrayND<Real>> vars_real;

// (usuallly the std::size_t unsigned integer type)
std::vector<BoundaryCommunication *>::size_type bswarm_index;

Expand Down
4 changes: 2 additions & 2 deletions src/bvals/comms/bnd_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
namespace parthenon {

void ProResCache_t::Initialize(int n_regions, StateDescriptor *pkg) {
prores_info = ParArray1D<ProResInfo>("prores_info", n_regions);
prores_info_h = Kokkos::create_mirror_view(prores_info);
prores_info = ProResInfoArr_t(ViewOfViewAlloc("prores_info"), n_regions);
prores_info_h = create_view_of_view_mirror(prores_info);
int nref_funcs = pkg->NumRefinementFuncs();
// Note that assignment of Kokkos views resets them, but
// buffer_subset_sizes is a std::vector. It must be cleared, then
Expand Down
7 changes: 4 additions & 3 deletions src/bvals/comms/bnd_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "bvals/neighbor_block.hpp"
#include "coordinates/coordinates.hpp"
#include "interface/variable_state.hpp"
#include "kokkos_abstraction.hpp"
#include "mesh/domain.hpp"
#include "mesh/forest/logical_coordinate_transformation.hpp"
#include "utils/communication_buffer.hpp"
Expand Down Expand Up @@ -127,11 +128,11 @@ struct ProResInfo {
int GetBufferSize(MeshBlock *pmb, const NeighborBlock &nb,
std::shared_ptr<Variable<Real>> v);

using BndInfoArr_t = ParArray1D<BndInfo>;
using BndInfoArr_t = ParArray1DRaw<BndInfo>;
using BndInfoArrHost_t = typename BndInfoArr_t::HostMirror;

using ProResInfoArr_t = ParArray1D<ProResInfo>;
using ProResInfoArrHost_t = typename ParArray1D<ProResInfo>::HostMirror;
using ProResInfoArr_t = ParArray1DRaw<ProResInfo>;
using ProResInfoArrHost_t = typename ProResInfoArr_t::HostMirror;
class StateDescriptor;
struct ProResCache_t {
ProResInfoArr_t prores_info{};
Expand Down
5 changes: 3 additions & 2 deletions src/bvals/comms/bvals_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "bvals/comms/bnd_info.hpp"
#include "bvals/comms/bvals_in_one.hpp"
#include "interface/variable.hpp"
#include "kokkos_abstraction.hpp"
#include "mesh/domain.hpp"
#include "mesh/mesh.hpp"
#include "mesh/meshblock.hpp"
Expand Down Expand Up @@ -215,8 +216,8 @@ inline void RebuildBufferCache(std::shared_ptr<MeshData<Real>> md, int nbound,
using namespace loops;
using namespace loops::shorthands;
BvarsSubCache_t &cache = md->GetBvarsCache().GetSubCache(BOUND_TYPE, SENDER);
cache.bnd_info = BndInfoArr_t("bnd_info", nbound);
cache.bnd_info_h = Kokkos::create_mirror_view(cache.bnd_info);
cache.bnd_info = BndInfoArr_t(ViewOfViewAlloc("bnd_info"), nbound);
cache.bnd_info_h = create_view_of_view_mirror(cache.bnd_info);

// prolongation/restriction sub-sets
// TODO(JMM): Right now I exclude fluxcorrection boundaries but if
Expand Down
Loading

0 comments on commit a86b271

Please sign in to comment.