From 00c40bf592867bb0ee25d0496cb1264d710d651e Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Tue, 19 Sep 2023 17:22:20 -0700 Subject: [PATCH 1/5] Add nightly testing for ROCm 5.4 Signed-off-by: Engin Kayraklioglu --- util/cron/test-gpu-ex-rocm-54.bash | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100755 util/cron/test-gpu-ex-rocm-54.bash diff --git a/util/cron/test-gpu-ex-rocm-54.bash b/util/cron/test-gpu-ex-rocm-54.bash new file mode 100755 index 000000000000..70fbfaef5bc6 --- /dev/null +++ b/util/cron/test-gpu-ex-rocm-54.bash @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# +# GPU native testing on a Cray EX (using none for CHPL_COMM) + +CWD=$(cd $(dirname ${BASH_SOURCE[0]}) ; pwd) +source $CWD/common-native-gpu.bash +source $CWD/common-hpe-cray-ex.bash + +module load rocm/5.4.3 # pin to rocm 5.4.3 + +export CHPL_COMM=none +export CHPL_LOCALE_MODEL=gpu +export CHPL_LAUNCHER_PARTITION=bardpeak # bardpeak is the default queue + +export CHPL_GPU=amd # also detected by default + +export CHPL_NIGHTLY_TEST_DIRS="gpu/native" + +export CHPL_NIGHTLY_TEST_CONFIG_NAME="gpu-ex-rocm-54" +$CWD/nightly -cron ${nightly_args} From fe4e268505aab62396e00d1428d2e456af58588e Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 6 Dec 2023 10:11:17 -0800 Subject: [PATCH 2/5] Use the bundled LLVM Signed-off-by: Engin Kayraklioglu --- util/cron/test-gpu-ex-rocm-54.bash | 1 + 1 file changed, 1 insertion(+) diff --git a/util/cron/test-gpu-ex-rocm-54.bash b/util/cron/test-gpu-ex-rocm-54.bash index 70fbfaef5bc6..b97e8cfa93bc 100755 --- a/util/cron/test-gpu-ex-rocm-54.bash +++ b/util/cron/test-gpu-ex-rocm-54.bash @@ -9,6 +9,7 @@ source $CWD/common-hpe-cray-ex.bash module load rocm/5.4.3 # pin to rocm 5.4.3 export CHPL_COMM=none +export CHPL_LLVM=bundled export CHPL_LOCALE_MODEL=gpu export CHPL_LAUNCHER_PARTITION=bardpeak # bardpeak is the default queue From 8d5b661671ed383040dc70a17fbb1e358c5b11ef Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 6 Dec 2023 10:15:02 -0800 Subject: [PATCH 3/5] Set CHPL_GPU_ARCH, too Signed-off-by: Engin Kayraklioglu --- util/cron/test-gpu-ex-rocm-54.bash | 1 + 1 file changed, 1 insertion(+) diff --git a/util/cron/test-gpu-ex-rocm-54.bash b/util/cron/test-gpu-ex-rocm-54.bash index b97e8cfa93bc..0ce4bbb4cc85 100755 --- a/util/cron/test-gpu-ex-rocm-54.bash +++ b/util/cron/test-gpu-ex-rocm-54.bash @@ -14,6 +14,7 @@ export CHPL_LOCALE_MODEL=gpu export CHPL_LAUNCHER_PARTITION=bardpeak # bardpeak is the default queue export CHPL_GPU=amd # also detected by default +export CHPL_GPU_ARCH=gfx90a export CHPL_NIGHTLY_TEST_DIRS="gpu/native" From 3be68f049d2fb8a88e0904daecef4f494f417d81 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 6 Dec 2023 12:07:14 -0800 Subject: [PATCH 4/5] Apply the patch to suppress strict prototype warnings when compiling ROCm headers Signed-off-by: Engin Kayraklioglu --- runtime/src/gpu/amd/Makefile.share | 3 +++ 1 file changed, 3 insertions(+) diff --git a/runtime/src/gpu/amd/Makefile.share b/runtime/src/gpu/amd/Makefile.share index 0a20edadafd1..a89cd86c9fa1 100644 --- a/runtime/src/gpu/amd/Makefile.share +++ b/runtime/src/gpu/amd/Makefile.share @@ -23,6 +23,9 @@ GPU_OBJS = $(addprefix $(GPU_OBJDIR)/,$(addsuffix .o,$(basename $(GPU_SRCS)))) RUNTIME_CXXFLAGS += -x hip --offload-arch=$(CHPL_MAKE_GPU_ARCH) +# Some ROCm headers have `foo()`, suppress warnings/errors from them +RUNTIME_CFLAGS += -Wno-strict-prototypes + $(RUNTIME_OBJ_DIR)/gpu-amd-reduce.o: gpu-amd-reduce.cc \ $(RUNTIME_OBJ_DIR_STAMP) PATH=$(PATH):$(CHPL_MAKE_ROCM_PATH)/llvm/bin $(CXX) -c -std=c++14 $(RUNTIME_CXXFLAGS) $(RUNTIME_INCLS) -o $@ $< From 0f470bbc596d3456eaf7091908ed27ab3a1f0f26 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 6 Dec 2023 14:55:50 -0800 Subject: [PATCH 5/5] Unset CHPL_LLVM_CONFIG Signed-off-by: Engin Kayraklioglu --- util/cron/test-gpu-ex-rocm-54.bash | 1 + 1 file changed, 1 insertion(+) diff --git a/util/cron/test-gpu-ex-rocm-54.bash b/util/cron/test-gpu-ex-rocm-54.bash index 0ce4bbb4cc85..1009f97fba69 100755 --- a/util/cron/test-gpu-ex-rocm-54.bash +++ b/util/cron/test-gpu-ex-rocm-54.bash @@ -10,6 +10,7 @@ module load rocm/5.4.3 # pin to rocm 5.4.3 export CHPL_COMM=none export CHPL_LLVM=bundled +unset CHLP_LLVM_CONFIG # we need this to avoid warnings export CHPL_LOCALE_MODEL=gpu export CHPL_LAUNCHER_PARTITION=bardpeak # bardpeak is the default queue