From 278738880978fac65362d2696328d563ec5b99ad Mon Sep 17 00:00:00 2001 From: David Huber <69919478+DavidHuber-NOAA@users.noreply.github.com> Date: Wed, 11 Aug 2021 10:49:17 -0400 Subject: [PATCH] Port the build and run scripts to S4. (#341) (#362) The changes to fv3gfs_dwn_nems.sh, mod_icec.sh, and trim_rh.sh are to improve run times on S4. The S4 cluster is small (80 nodes, with no more than 40 usable at a time), so the size of the post jobs were reduced to 1 node apiece. Adding the -ncpu=1 flag to the wgrib2 calls within srun --multi-prog calls significantly improves runtimes. --- modulefiles/s4 | 34 ++++++++++++++++++++++++++++++ sorc/build_ncep_post.sh | 7 +++++- sorc/ncep_post.fd/build_upp_lib.sh | 3 +++ tests/detect_machine.sh | 4 +++- ush/fv3gfs_downstream_nems.sh | 4 ++-- ush/fv3gfs_dwn_nems.sh | 15 +++++++------ ush/mod_icec.sh | 2 +- ush/trim_rh.sh | 2 +- 8 files changed, 59 insertions(+), 12 deletions(-) create mode 100644 modulefiles/s4 diff --git a/modulefiles/s4 b/modulefiles/s4 new file mode 100644 index 000000000..3147aa294 --- /dev/null +++ b/modulefiles/s4 @@ -0,0 +1,34 @@ +#%Module###################################################################### +# David Huber 06/2021, Set up config. with the hpc-stack NCEPLIBS. +############################################################################## + +proc ModulesHelp { } { +puts stderr "Loads modules required for building upp" +} +module-whatis "Loads UPP prerequisites on S4" + +module load license_intel/S4 +module use /data/prod/hpc-stack/modulefiles/stack +module load hpc/1.1.0 +module load hpc-intel/18.0.4 +module load hpc-impi/18.0.4 + +module load jasper/2.0.22 +module load zlib/1.2.11 +module load png/1.6.35 + +module load hdf5/1.10.6 +module load netcdf/4.7.4 + +module load bacio/2.4.1 +module load crtm/2.3.0 +module load g2/3.4.1 +module load g2tmpl/1.10.0 +module load ip/3.3.3 +module load nemsio/2.5.2 +module load sfcio/1.4.1 +module load sigio/2.3.2 +module load sp/2.3.3 +module load w3nco/2.4.1 +module load w3emc/2.7.3 +module load wrf_io/1.1.1 diff --git a/sorc/build_ncep_post.sh b/sorc/build_ncep_post.sh index d2e36e826..93022e2f4 100755 --- a/sorc/build_ncep_post.sh +++ b/sorc/build_ncep_post.sh @@ -13,7 +13,7 @@ ##################################################################################################### #List of valid machines: -validmachines=(theia jet wcoss_dell_p3 wcoss cray-intel hera orion odin stampede) +validmachines=(theia jet wcoss_dell_p3 wcoss cray-intel hera orion odin stampede s4) function usage { echo "Usage:" @@ -37,6 +37,8 @@ if [ "$#" -eq 0 ]; then machine=wcoss_dell_p3 elif [ $mac = t -o $mac = e -o $mac = g ] ; then # For WCOSS machine=wcoss + elif [ $mac2 = s4 ] ; then # For S4 + machine=s4 elif [ $mac = l -o $mac = s ] ; then # wcoss_c (i.e. luna and surge) export machine=cray-intel elif [ $mac2 = hf ] ; then # For Hera @@ -101,6 +103,9 @@ odin) # For Odin at NSSL stampede) module purge ;; +s4) # For S4 + . /etc/profile + ;; *) set +x echo "ERROR: Invalid machine name specified" diff --git a/sorc/ncep_post.fd/build_upp_lib.sh b/sorc/ncep_post.fd/build_upp_lib.sh index 26c5aaace..b3a01dae3 100755 --- a/sorc/ncep_post.fd/build_upp_lib.sh +++ b/sorc/ncep_post.fd/build_upp_lib.sh @@ -19,6 +19,9 @@ elif [ $mac = v -o $mac = m ] ; then # For Dell elif [ $mac = t -o $mac = e -o $mac = g ] ; then # For WCOSS machine=wcoss . /usrx/local/Modules/default/init/bash +elif [ $mac2 = s4 ] ; then # For S4 + machine=s4 + . /etc/profile elif [ $mac = l -o $mac = s ] ; then # wcoss_c (i.e. luna and surge) export machine=cray-intel elif [ $mac = O ] ; then # For Orion diff --git a/tests/detect_machine.sh b/tests/detect_machine.sh index 35bd2b6fc..9362e5635 100755 --- a/tests/detect_machine.sh +++ b/tests/detect_machine.sh @@ -90,13 +90,15 @@ case $(hostname -f) in login2.stampede2.tacc.utexas.edu) MACHINE_ID=stampede ;; ### stampede2 login3.stampede2.tacc.utexas.edu) MACHINE_ID=stampede ;; ### stampede3 login4.stampede2.tacc.utexas.edu) MACHINE_ID=stampede ;; ### stampede4 + + s4-submit.ssec.wisc.edu) MACHINE_ID=s4 ;; ### S4 esac # Overwrite auto-detect with RT_MACHINE if set MACHINE_ID=${RT_MACHINE:-${MACHINE_ID}} # Append compiler -#if [ $MACHINE_ID = orion ] || [ $MACHINE_ID = hera ] || [ $MACHINE_ID = cheyenne ] || [ $MACHINE_ID = jet ] || [ $MACHINE_ID = gaea ] || [ $MACHINE_ID = stampede ] ; then +#if [ $MACHINE_ID = orion ] || [ $MACHINE_ID = hera ] || [ $MACHINE_ID = cheyenne ] || [ $MACHINE_ID = jet ] || [ $MACHINE_ID = gaea ] || [ $MACHINE_ID = stampede ] || [ $MACHINE_ID = s4 ]; then # MACHINE_ID=${MACHINE_ID}.${RT_COMPILER} #fi diff --git a/ush/fv3gfs_downstream_nems.sh b/ush/fv3gfs_downstream_nems.sh index 283311975..2e5c16cb6 100755 --- a/ush/fv3gfs_downstream_nems.sh +++ b/ush/fv3gfs_downstream_nems.sh @@ -99,7 +99,7 @@ fi #----------------------------------------------------- #----------------------------------------------------- -if [ $machine = WCOSS -o $machine = WCOSS_C -o $machine = WCOSS_DELL_P3 -o $machine = HERA -o $machine = ORION ]; then +if [ $machine = WCOSS -o $machine = WCOSS_C -o $machine = WCOSS_DELL_P3 -o $machine = HERA -o $machine = ORION -o $machine = S4 ]; then #----------------------------------------------------- #----------------------------------------------------- export nset=1 @@ -173,7 +173,7 @@ date launcher=${APRUN_DWN:-"aprun -j 1 -n 24 -N 24 -d 1 cfp"} if [ $machine = WCOSS_C -o $machine = WCOSS_DELL_P3 ] ; then $launcher $MP_CMDFILE - elif [ $machine = HERA -o $machine = ORION ] ; then + elif [ $machine = HERA -o $machine = ORION -o $machine = S4 ] ; then if [ -s $DATA/poescript_srun ]; then rm -f $DATA/poescript_srun; fi touch $DATA/poescript_srun nm=0 diff --git a/ush/fv3gfs_dwn_nems.sh b/ush/fv3gfs_dwn_nems.sh index 030217add..4190e9525 100755 --- a/ush/fv3gfs_dwn_nems.sh +++ b/ush/fv3gfs_dwn_nems.sh @@ -32,6 +32,9 @@ export opt25=":(APCP|ACPCP|PRATE|CPRAT):" export opt26=' -set_grib_max_bits 25 -fi -if ' export opt27=":(APCP|ACPCP|PRATE|CPRAT|DZDT):" export opt28=' -new_grid_interpolation budget -fi ' +if [ $machine = "S4" ]; then + export optncpu=' -ncpu 1 ' +fi export grid0p25="latlon 0:1440:0.25 90:721:-0.25" export grid0p5="latlon 0:720:0.5 90:361:-0.5" export grid1p0="latlon 0:360:1.0 90:181:-1.0" @@ -42,7 +45,7 @@ export PGBS=${PGBS:-"NO"} if [ $nset = 1 ]; then if [ "$PGBS" = "YES" ]; then - $WGRIB2 $tmpfile $opt1 $opt21 $opt22 $opt23 $opt24 $opt25 $opt26 $opt27 $opt28 \ + $WGRIB2 $optncpu $tmpfile $opt1 $opt21 $opt22 $opt23 $opt24 $opt25 $opt26 $opt27 $opt28 \ -new_grid $grid0p25 pgb2file_${fhr3}_${iproc}_0p25 \ -new_grid $grid1p0 pgb2file_${fhr3}_${iproc}_1p0 \ -new_grid $grid0p5 pgb2file_${fhr3}_${iproc}_0p5 @@ -51,7 +54,7 @@ if [ $nset = 1 ]; then $TRIMRH pgb2file_${fhr3}_${iproc}_0p5 $TRIMRH pgb2file_${fhr3}_${iproc}_1p0 #tweak sea ice cover - count=`$WGRIB2 pgb2file_${fhr3}_${iproc}_0p25 -match "LAND|ICEC" |wc -l` + count=`$WGRIB2 $optncpu pgb2file_${fhr3}_${iproc}_0p25 -match "LAND|ICEC" |wc -l` if [ $count -eq 2 ]; then $MODICEC pgb2file_${fhr3}_${iproc}_0p25 $MODICEC pgb2file_${fhr3}_${iproc}_0p5 @@ -63,19 +66,19 @@ if [ $nset = 1 ]; then export err=$?; err_chk fi else - $WGRIB2 $tmpfile $opt1 $opt21 $opt22 $opt23 $opt24 $opt25 $opt26 $opt27 $opt28 \ + $WGRIB2 $optncpu $tmpfile $opt1 $opt21 $opt22 $opt23 $opt24 $opt25 $opt26 $opt27 $opt28 \ -new_grid $grid0p25 pgb2file_${fhr3}_${iproc}_0p25 export err=$?; err_chk $TRIMRH pgb2file_${fhr3}_${iproc}_0p25 #tweak sea ice cover - count=`$WGRIB2 pgb2file_${fhr3}_${iproc}_0p25 -match "LAND|ICEC" |wc -l` + count=`$WGRIB2 $optncpu pgb2file_${fhr3}_${iproc}_0p25 -match "LAND|ICEC" |wc -l` if [ $count -eq 2 ]; then $MODICEC pgb2file_${fhr3}_${iproc}_0p25 fi fi elif [ $nset = 2 ]; then if [ "$PGBS" = "YES" ]; then - $WGRIB2 $tmpfile $opt1 $opt21 $opt22 $opt23 $opt24 $opt25 $opt26 $opt27 $opt28 \ + $WGRIB2 $optncpu $tmpfile $opt1 $opt21 $opt22 $opt23 $opt24 $opt25 $opt26 $opt27 $opt28 \ -new_grid $grid0p25 pgb2bfile_${fhr3}_${iproc}_0p25 \ -new_grid $grid1p0 pgb2bfile_${fhr3}_${iproc}_1p0 \ -new_grid $grid0p5 pgb2bfile_${fhr3}_${iproc}_0p5 @@ -84,7 +87,7 @@ elif [ $nset = 2 ]; then $TRIMRH pgb2bfile_${fhr3}_${iproc}_0p5 $TRIMRH pgb2bfile_${fhr3}_${iproc}_1p0 else - $WGRIB2 $tmpfile $opt1 $opt21 $opt22 $opt23 $opt24 $opt25 $opt26 $opt27 $opt28 \ + $WGRIB2 $optncpu $tmpfile $opt1 $opt21 $opt22 $opt23 $opt24 $opt25 $opt26 $opt27 $opt28 \ -new_grid $grid0p25 pgb2bfile_${fhr3}_${iproc}_0p25 export err=$?; err_chk $TRIMRH pgb2bfile_${fhr3}_${iproc}_0p25 diff --git a/ush/mod_icec.sh b/ush/mod_icec.sh index 9e7722580..bb8c22182 100755 --- a/ush/mod_icec.sh +++ b/ush/mod_icec.sh @@ -7,7 +7,7 @@ f=$1 export WGRIB2=${WGRIB2:-${NWPROD:-/nwprod}/util/exec/wgrib2} -$WGRIB2 $f \ +$WGRIB2 $optncpu $f \ -if 'LAND' -rpn 'sto_1' -fi \ -if 'ICEC' -rpn 'rcl_1:0:==:*' -fi \ -set_grib_type same \ diff --git a/ush/trim_rh.sh b/ush/trim_rh.sh index 52a6739f3..9140e9712 100755 --- a/ush/trim_rh.sh +++ b/ush/trim_rh.sh @@ -8,7 +8,7 @@ f=$1 export WGRIB2=${WGRIB2:-${NWPROD:-/nwprod}/util/exec/wgrib2} -$WGRIB2 $f -not_if ':RH:' -grib $f.new \ +$WGRIB2 $optncpu $f -not_if ':RH:' -grib $f.new \ -if ':RH:' -rpn "10:*:0.5:+:floor:1000:min:10:/" -set_grib_type same \ -set_scaling -1 0 -grib_out $f.new export err=$?; err_chk