Skip to content

Commit

Permalink
Merge pull request #1203 from CEED/jrwrigh/smartsim
Browse files Browse the repository at this point in the history
fluids: SmartSim Online Data-Driven SGS training
  • Loading branch information
jrwrigh authored Nov 27, 2023
2 parents f22a855 + 4e139b5 commit 3451ca5
Show file tree
Hide file tree
Showing 26 changed files with 1,044 additions and 86 deletions.
3 changes: 3 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ noether-cpu:
# Libraries for examples
# -- PETSc with HIP (minimal)
- export PETSC_DIR=/projects/petsc PETSC_ARCH=mpich-hip && git -C $PETSC_DIR -c safe.directory=$PETSC_DIR describe
- source /home/jawr8143/SmartSimTestingSoftware/bin/activate && export SMARTREDIS_DIR=/home/jawr8143/SmartSimTestingSoftware/smartredis/install
- echo "-------------- PETSc ---------------" && make -C $PETSC_DIR info
- make -k -j$((NPROC_CPU / NPROC_POOL)) BACKENDS="$BACKENDS_CPU" JUNIT_BATCH="cpu" junit search="petsc fluids solids"
# -- MFEM v4.2
Expand Down Expand Up @@ -114,6 +115,7 @@ noether-rocm:
# Libraries for examples
# -- PETSc with HIP (minimal)
- export PETSC_DIR=/projects/petsc PETSC_ARCH=mpich-hip && git -C $PETSC_DIR -c safe.directory=$PETSC_DIR describe
- source /home/jawr8143/SmartSimTestingSoftware/bin/activate && export SMARTREDIS_DIR=/home/jawr8143/SmartSimTestingSoftware/smartredis/install
- echo "-------------- PETSc ---------------" && make -C $PETSC_DIR info
- make -k -j$((NPROC_GPU / NPROC_POOL)) BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="hip" junit search="petsc fluids solids"
# -- MFEM v4.2
Expand Down Expand Up @@ -241,6 +243,7 @@ noether-cuda:
# Libraries for examples
# -- PETSc with CUDA (minimal)
- export PETSC_DIR=/projects/petsc PETSC_ARCH=mpich-cuda-O PETSC_OPTIONS='-use_gpu_aware_mpi 0' && git -C $PETSC_DIR -c safe.directory=$PETSC_DIR describe
- source /home/jawr8143/SmartSimTestingSoftware/bin/activate && export SMARTREDIS_DIR=/home/jawr8143/SmartSimTestingSoftware/smartredis/install
- echo "-------------- PETSc ---------------" && make -C $PETSC_DIR info
- make -k -j$((NPROC_GPU / NPROC_POOL)) JUNIT_BATCH="cuda" junit BACKENDS="$BACKENDS_GPU" search="petsc fluids solids"
# Report status
Expand Down
11 changes: 8 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,10 @@ nekexamples := $(OBJDIR)/nek-bps
petscexamples.c := $(wildcard examples/petsc/*.c)
petscexamples := $(petscexamples.c:examples/petsc/%.c=$(OBJDIR)/petsc-%)
# Fluid Dynamics Examples
fluidsexamples.c := $(sort $(wildcard examples/fluids/*.c))
fluidsexamples := $(fluidsexamples.c:examples/fluids/%.c=$(OBJDIR)/fluids-%)
fluidsexamples.c := $(sort $(wildcard examples/fluids/*.c))
fluidsexamples.py := examples/fluids/smartsim_regression_framework.py
fluidsexamples := $(fluidsexamples.c:examples/fluids/%.c=$(OBJDIR)/fluids-%)
fluidsexamples += $(fluidsexamples.py:examples/fluids/%.py=$(OBJDIR)/fluids-py-%)
# Solid Mechanics Examples
solidsexamples.c := $(sort $(wildcard examples/solids/*.c))
solidsexamples := $(solidsexamples.c:examples/solids/%.c=$(OBJDIR)/solids-%)
Expand Down Expand Up @@ -608,6 +610,9 @@ $(OBJDIR)/fluids-% : examples/fluids/%.c examples/fluids/src/*.c examples/fluids
PETSC_DIR="$(abspath $(PETSC_DIR))" OPT="$(OPT)" $*
cp examples/fluids/$* $@

$(OBJDIR)/fluids-py-% : examples/fluids/%.py $(OBJDIR)/fluids-navierstokes
cp $< $@

$(OBJDIR)/solids-% : examples/solids/%.c examples/solids/%.h \
examples/solids/problems/*.c examples/solids/src/*.c \
examples/solids/include/*.h examples/solids/problems/*.h examples/solids/qfunctions/*.h \
Expand Down Expand Up @@ -824,7 +829,7 @@ print-% :
CONFIG_VARS = CC CXX FC NVCC NVCC_CXX HIPCC \
OPT CFLAGS CPPFLAGS CXXFLAGS FFLAGS NVCCFLAGS HIPCCFLAGS SYCLFLAGS \
AR ARFLAGS LDFLAGS LDLIBS LIBCXX SED \
MAGMA_DIR OCCA_DIR XSMM_DIR CUDA_DIR CUDA_ARCH MFEM_DIR PETSC_DIR NEK5K_DIR ROCM_DIR HIP_ARCH SYCL_DIR
MAGMA_DIR OCCA_DIR XSMM_DIR CUDA_DIR CUDA_ARCH MFEM_DIR PETSC_DIR NEK5K_DIR ROCM_DIR HIP_ARCH SYCL_DIR SMARTREDIS_DIR

# $(call needs_save,CFLAGS) returns true (a nonempty string) if CFLAGS
# was set on the command line or in config.mk (where it will appear as
Expand Down
17 changes: 14 additions & 3 deletions examples/fluids/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,20 @@ PROBLEMDIR := problems
src.c := navierstokes.c $(sort $(wildcard $(PROBLEMDIR)/*.c)) $(sort $(wildcard $(SRCDIR)/*.c))
src.o = $(src.c:%.c=$(OBJDIR)/%.o)

# Path to install directory for SmartRedis. Example: /software/smartredis/install
SMARTREDIS_DIR ?=
ifdef SMARTREDIS_DIR
hiredis.pc := $(SMARTREDIS_DIR)/lib/pkgconfig/hiredis.pc
lsmartredis:= -lsmartredis
redis++.pc = $(wildcard $(SMARTREDIS_DIR)/lib/pkgconfig/redis++.pc $(SMARTREDIS_DIR)/lib64/pkgconfig/redis++.pc)

CPPFLAGS += $(call pkgconf, --cflags-only-I $(hiredis.pc) $(redis++.pc))
LDFLAGS += $(call pkgconf, --libs-only-L --libs-only-other $(hiredis.pc) $(redis++.pc))
LDFLAGS += $(patsubst -L%, $(call pkgconf, --variable=ldflag_rpath $(PETSc.pc))%, $(call pkgconf, --libs-only-L $(hiredis.pc) $(redis++.pc)))
LDLIBS += $(call pkgconf, --libs-only-l $(hiredis.pc) $(redis++.pc)) $(lsmartredis)
src.c += $(sort $(wildcard $(SRCDIR)/smartsim/*.c))
endif

all: navierstokes

navierstokes: $(src.o) | $(PETSc.pc) $(ceed.pc)
Expand All @@ -63,9 +77,6 @@ quiet ?= $($(1))
$(OBJDIR)/%.o : %.c | $$(@D)/.DIR
$(call quiet,CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $(abspath $<)

# Rules for building the examples
#%: %.c

print: $(PETSc.pc) $(ceed.pc)
$(info CC : $(CC))
$(info CFLAGS : $(CFLAGS))
Expand Down
30 changes: 29 additions & 1 deletion examples/fluids/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ The state variables are mass density, momentum density, and energy density.

The main Navier-Stokes solver for libCEED is defined in [`navierstokes.c`](navierstokes.c) with different problem definitions according to the application of interest.

## Build instructions

Build by using:

`make`
Expand All @@ -18,6 +20,12 @@ and run with:
./navierstokes -ceed [ceed] -problem [problem type] -degree [degree]
```

If you want to do *in situ* machine-learning training, specify `SMARTREDIS_DIR` in the make command like:

```
make SMARTREDIS_DIR=~/software/smartredis/install
```

## Runtime options

% inclusion-fluids-marker
Expand Down Expand Up @@ -235,7 +243,8 @@ For the case of a square/cubic mesh, the list of face indices to be used with `-

### Boundary conditions

Boundary conditions for compressible viscous flows are notoriously tricky. Here we offer some recommendations
Boundary conditions for compressible viscous flows are notoriously tricky.
Here we offer some recommendations.

#### Inflow

Expand Down Expand Up @@ -717,6 +726,25 @@ For the Density Current, Channel, and Blasius problems, the following common com
- 0
- `m`

* - `-sgs_train_enable`
- Whether to enable *in situ* training of data-driven SGS model. Require building with SmartRedis.
- `false`
- boolean

* - `-sgs_train_write_data_interval`
- Number of timesteps between writing training data into SmartRedis database
- `1`
-

* - `-sgs_train_overwrite_data`
- Whether new training data should overwrite old data on database
- `true`
- boolean

* - `-smartsim_collocated_num_ranks`
- Number of MPI ranks associated with each collocated database (i.e. ranks per node)
- `1`
-
:::

#### Gaussian Wave
Expand Down
3 changes: 3 additions & 0 deletions examples/fluids/include/petsc_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ PetscErrorCode MatGetDiag_Ceed(Mat A, Vec D);
PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y);
PetscErrorCode CreateMatShell_Ceed(OperatorApplyContext ctx, Mat *mat);

PetscErrorCode DMGetGlobalVectorInfo(DM dm, PetscInt *local_size, PetscInt *global_size, VecType *vec_type);
PetscErrorCode DMGetLocalVectorInfo(DM dm, PetscInt *local_size, PetscInt *global_size, VecType *vec_type);

PetscErrorCode VecP2C(Vec X_petsc, PetscMemType *mem_type, CeedVector x_ceed);
PetscErrorCode VecC2P(CeedVector x_ceed, PetscMemType mem_type, Vec X_petsc);
PetscErrorCode VecReadP2C(Vec X_petsc, PetscMemType *mem_type, CeedVector x_ceed);
Expand Down
28 changes: 28 additions & 0 deletions examples/fluids/include/smartsim.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright (c) 2017-2023, Lawrence Livermore National Security, LLC and other CEED contributors.
// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
//
// SPDX-License-Identifier: BSD-2-Clause
//
// This file is part of CEED: http://github.com/ceed

#include <c_client.h>
#include <petscsys.h>
#include <sr_enums.h>

#if defined(__clang_analyzer__)
void PetscSmartRedisCall(SRError);
#else
#define PetscSmartRedisCall(...) \
do { \
SRError ierr_smartredis_call_q_; \
PetscBool disable_calls = PETSC_FALSE; \
PetscStackUpdateLine; \
PetscCall(PetscOptionsGetBool(NULL, NULL, "-smartsim_disable_calls", &disable_calls, NULL)); \
if (disable_calls == PETSC_TRUE) break; \
ierr_smartredis_call_q_ = __VA_ARGS__; \
if (PetscUnlikely(ierr_smartredis_call_q_ != SRNoError)) \
SETERRQ(PETSC_COMM_SELF, ierr_smartredis_call_q_, "SmartRedis Error (Code %d): %s", ierr_smartredis_call_q_, SRGetLastError()); \
} while (0)
#endif

PetscErrorCode SmartRedisVerifyPutTensor(void *c_client, const char *name, const size_t name_length);
39 changes: 37 additions & 2 deletions examples/fluids/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ To close the problem, the subgrid stress must be defined.
For implicit LES, the subgrid stress is set to zero and the numerical properties of the discretized system are assumed to account for the effect of subgrid scale structures on the filtered solution field.
For explicit LES, it is defined by a subgrid stress model.
(sgs-dd-model)=
#### Data-driven SGS Model
The data-driven SGS model implemented here uses a small neural network to compute the SGS term.
Expand All @@ -354,8 +355,7 @@ This is done to keep consistent with legacy file compatibility.
The current data-driven model parameters are not accurate and are for regression testing only.
:::
(problem-advection)=
(differential-filtering)=
### Differential Filtering
There is the option to filter the solution field using differential filtering.
Expand Down Expand Up @@ -448,6 +448,41 @@ To match the "size" of a normal kernel to our differential kernel, we attempt to
To match the box and Gaussian filters "sizes", we use $\beta = 1/10$ and $\beta = 1/6$, respectively.
$\beta$ can be set via `-diff_filter_kernel_scaling`.
### *In Situ* Machine-Learning Model Training
Training machine-learning models normally uses *a priori* (already gathered) data stored on disk.
This is computationally inefficient, particularly as the scale of the problem grows and the data that is saved to disk reduces to a small percentage of the total data generated by a simulation.
One way of working around this to to train a model on data coming from an ongoing simulation, known as *in situ* (in place) learning.
This is implemented in the code using [SmartSim](https://www.craylabs.org/docs/overview.html).
Briefly, the fluid simulation will periodically place data for training purposes into a database that a separate process uses to train a model.
The database used by SmartSim is [Redis](https://redis.com/modules/redis-ai/) and the library to connect to the database is called [SmartRedis](https://www.craylabs.org/docs/smartredis.html).
More information about how to utilize this code in a SmartSim configuration can be found on [SmartSim's website](https://www.craylabs.org/docs/overview.html).
To use this code in a SmartSim *in situ* setup, first the code must be built with SmartRedis enabled.
This is done by specifying the installation directory of SmartRedis using the `SMARTREDIS_DIR` environment variable when building:
```
make SMARTREDIS_DIR=~/software/smartredis/install
```
#### SGS Data-Driven Model *In Situ* Training
Currently the code is only setup to do *in situ* training for the SGS data-driven model.
Training data is split into the model inputs and outputs.
The model inputs are calculated as the same model inputs in the SGS Data-Driven model described {ref}`earlier<sgs-dd-model>`.
The model outputs (or targets in the case of training) are the subgrid stresses.
Both the inputs and outputs are computed from a filtered velocity field, which is calculated via {ref}`differential-filtering`.
The settings for the differential filtering used during training are described in {ref}`differential-filtering`.
The SGS *in situ* training can be enabled using the `-sgs_train_enable` flag.
Data can be processed and placed into the database periodically.
The interval between is controlled by `-sgs_train_write_data_interval`.
There's also the choice of whether to add new training data on each database write or to overwrite the old data with new data.
This is controlled by `-sgs_train_overwrite_data`.
The database may also be located on the same node as a MPI rank (collocated) or located on a separate node (distributed).
It's necessary to know how many ranks are associated with each collocated database, which is set by `-smartsim_collocated_database_num_ranks`.
(problem-advection)=
## Advection
A simplified version of system {eq}`eq-ns`, only accounting for the transport of total energy, is given by
Expand Down
2 changes: 2 additions & 0 deletions examples/fluids/navierstokes.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,8 @@ int main(int argc, char **argv) {
PetscCall(NodalProjectionDataDestroy(user->grad_velo_proj));
PetscCall(SgsDDDataDestroy(user->sgs_dd_data));
PetscCall(DifferentialFilterDataDestroy(user->diff_filter));
PetscCall(SGS_DD_TrainingDataDestroy(user->sgs_dd_train));
PetscCall(SmartSimDataDestroy(user->smartsim));

// -- Vectors
PetscCallCeed(ceed, CeedVectorDestroy(&ceed_data->x_coord));
Expand Down
34 changes: 32 additions & 2 deletions examples/fluids/navierstokes.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ struct AppCtx_private {
} wall_forces;
// Subgrid Stress Model
SGSModelType sgs_model_type;
PetscBool sgs_train_enable;
// Differential Filtering
PetscBool diff_filter_monitor;
MeshTransformType mesh_transform_type;
Expand Down Expand Up @@ -195,15 +196,31 @@ typedef struct {
CeedVector sgs_nodal_ceed;
} *SgsDDData;

typedef struct {
DM dm_dd_training;
PetscInt num_comp_dd_inputs, write_data_interval;
OperatorApplyContext op_training_data_calc_ctx;
NodalProjectionData filtered_grad_velo_proj;
size_t training_data_array_dims[2];
PetscBool overwrite_training_data;
} *SGS_DD_TrainingData;

typedef struct {
DM dm_filter;
PetscInt num_filtered_fields;
CeedInt *num_field_components;
PetscInt field_prim_state, field_velo_prod;
OperatorApplyContext op_rhs_ctx;
KSP ksp;
PetscBool do_mms_test;
} *DiffFilterData;

typedef struct {
void *client;
char rank_id_name[16];
PetscInt collocated_database_num_ranks;
} *SmartSimData;

// PETSc user data
struct User_private {
MPI_Comm comm;
Expand All @@ -224,6 +241,8 @@ struct User_private {
NodalProjectionData grad_velo_proj;
SgsDDData sgs_dd_data;
DiffFilterData diff_filter;
SmartSimData smartsim;
SGS_DD_TrainingData sgs_dd_train;
};

// Units
Expand Down Expand Up @@ -441,8 +460,9 @@ PetscErrorCode TurbulenceStatisticsDestroy(User user, CeedData ceed_data);
PetscErrorCode SgsDDModelSetup(Ceed ceed, User user, CeedData ceed_data, ProblemData *problem);
PetscErrorCode SgsDDDataDestroy(SgsDDData sgs_dd_data);
PetscErrorCode SgsDDModelApplyIFunction(User user, const Vec Q_loc, Vec G_loc);
PetscErrorCode VelocityGradientProjectionSetup(Ceed ceed, User user, CeedData ceed_data, ProblemData *problem);
PetscErrorCode VelocityGradientProjectionApply(User user, Vec Q_loc, Vec VelocityGradient);
PetscErrorCode VelocityGradientProjectionSetup(Ceed ceed, User user, CeedData ceed_data, ProblemData *problem, StateVariable state_var_input,
CeedElemRestriction elem_restr_input, CeedBasis basis_input, NodalProjectionData *pgrad_velo_proj);
PetscErrorCode VelocityGradientProjectionApply(NodalProjectionData grad_velo_proj, Vec Q_loc, Vec VelocityGradient);
PetscErrorCode GridAnisotropyTensorProjectionSetupApply(Ceed ceed, User user, CeedData ceed_data, CeedElemRestriction *elem_restr_grid_aniso,
CeedVector *grid_aniso_vector);
PetscErrorCode GridAnisotropyTensorCalculateCollocatedVector(Ceed ceed, User user, CeedData ceed_data, CeedElemRestriction *elem_restr_grid_aniso,
Expand All @@ -468,4 +488,14 @@ PetscErrorCode TSMonitor_DifferentialFilter(TS ts, PetscInt steps, PetscReal sol
PetscErrorCode DifferentialFilterApply(User user, const PetscReal solution_time, const Vec Q, Vec Filtered_Solution);
PetscErrorCode DifferentialFilterMmsICSetup(ProblemData *problem);

// -----------------------------------------------------------------------------
// SGS Data-Driven Training via SmartSim
// -----------------------------------------------------------------------------
PetscErrorCode SmartSimSetup(User user);
PetscErrorCode SmartSimDataDestroy(SmartSimData smartsim);
PetscErrorCode SGS_DD_TrainingSetup(Ceed ceed, User user, CeedData ceed_data, ProblemData *problem);
PetscErrorCode TSMonitor_SGS_DD_Training(TS ts, PetscInt step_num, PetscReal solution_time, Vec Q, void *ctx);
PetscErrorCode TSPostStep_SGS_DD_Training(TS ts);
PetscErrorCode SGS_DD_TrainingDataDestroy(SGS_DD_TrainingData sgs_dd_train);

#endif // libceed_fluids_examples_navier_stokes_h
5 changes: 3 additions & 2 deletions examples/fluids/problems/sgs_dd_model.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ PetscErrorCode SgsDDModelApplyIFunction(User user, const Vec Q_loc, Vec G_loc) {

PetscFunctionBeginUser;
PetscCall(DMGetGlobalVector(user->grad_velo_proj->dm, &VelocityGradient));
PetscCall(VelocityGradientProjectionApply(user, Q_loc, VelocityGradient));
PetscCall(VelocityGradientProjectionApply(user->grad_velo_proj, Q_loc, VelocityGradient));

// -- Compute Nodal SGS tensor
PetscCall(DMGetLocalVector(sgs_dd_data->dm_sgs, &SGSNodal_loc));
Expand Down Expand Up @@ -298,7 +298,8 @@ PetscErrorCode SgsDDModelSetup(Ceed ceed, User user, CeedData ceed_data, Problem
NewtonianIdealGasContext gas;

PetscFunctionBeginUser;
PetscCall(VelocityGradientProjectionSetup(ceed, user, ceed_data, problem));
PetscCall(VelocityGradientProjectionSetup(ceed, user, ceed_data, problem, user->phys->state_var, ceed_data->elem_restr_q, ceed_data->basis_q,
&user->grad_velo_proj));

PetscCall(PetscNew(&sgsdd_ctx));

Expand Down
20 changes: 1 addition & 19 deletions examples/fluids/qfunctions/differential_filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,11 @@

#include <ceed.h>

#include "differential_filter_enums.h"
#include "newtonian_state.h"
#include "newtonian_types.h"
#include "utils.h"

enum DifferentialFilterStateComponent {
DIFF_FILTER_PRESSURE,
DIFF_FILTER_VELOCITY_X,
DIFF_FILTER_VELOCITY_Y,
DIFF_FILTER_VELOCITY_Z,
DIFF_FILTER_TEMPERATURE,
DIFF_FILTER_STATE_NUM,
};

enum DifferentialFilterVelocitySquared {
DIFF_FILTER_VELOCITY_SQUARED_XX,
DIFF_FILTER_VELOCITY_SQUARED_YY,
DIFF_FILTER_VELOCITY_SQUARED_ZZ,
DIFF_FILTER_VELOCITY_SQUARED_YZ,
DIFF_FILTER_VELOCITY_SQUARED_XZ,
DIFF_FILTER_VELOCITY_SQUARED_XY,
DIFF_FILTER_VELOCITY_SQUARED_NUM,
};

enum DifferentialFilterDampingFunction { DIFF_FILTER_DAMP_NONE, DIFF_FILTER_DAMP_VAN_DRIEST, DIFF_FILTER_DAMP_MMS };

typedef struct DifferentialFilterContext_ *DifferentialFilterContext;
Expand Down
Loading

0 comments on commit 3451ca5

Please sign in to comment.