Skip to content

Commit

Permalink
Updating GitHub CI.
Browse files Browse the repository at this point in the history
  • Loading branch information
Treece Burgess committed Nov 8, 2024
1 parent 4a0a693 commit af68eb2
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 53 deletions.
59 changes: 34 additions & 25 deletions .github/workflows/ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ module load $COMPILER

cd src

if [ "$COMPONENTS" = "cuda nvml lmsensors io net powercap appio lustre coretemp rapl sde stealtime" ]; then
# Set necessary environment variables for lmsensors
MATCH_COMPONENTS="cuda nvml rocm rocm_smi powercap powercap_ppc rapl sensors_ppc infiniband net appio io lustre stealtime coretemp lmsensors mx sde"
# load necessary environment variables for the above set of MATCH_COMPONENTS
if [ "$COMPONENTS" = "$MATCH_COMPONENTS" ]; then
# lmsensors
wget https://github.com/groeck/lm-sensors/archive/V3-4-0.tar.gz
tar -zxf V3-4-0.tar.gz
cd lm-sensors-3-4-0
Expand All @@ -27,45 +29,52 @@ if [ "$COMPONENTS" = "cuda nvml lmsensors io net powercap appio lustre coretemp
export PAPI_LMSENSORS_ROOT=lm
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PAPI_LMSENSORS_ROOT/lib

# Set necessary environment variables for cuda and nvml
# rocm and rocm_smi
export PAPI_ROCM_ROOT=`ls -d /opt/rocm-*`
export PAPI_ROCMSMI_ROOT=$PAPI_ROCM_ROOT/rocm_smi

# cuda and nvml
module load cuda
export PAPI_CUDA_ROOT=$ICL_CUDA_ROOT
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PAPI_CUDA_ROOT/extras/CUPTI/lib64

fi

if [ "$COMPONENTS" = "rocm rocm_smi" ]; then
export PAPI_ROCM_ROOT=`ls -d /opt/rocm-*`
export PAPI_ROCMSMI_ROOT=$PAPI_ROCM_ROOT/rocm_smi
fi
#MATCH_COMPONENTS="rocm rocm_smi"
# load necessary environment variables for testing of the rocm and rocm_smi components
#if [ "$COMPONENTS" = "$MATCH_COMPONENTS" ]; then
# export PAPI_ROCM_ROOT=`ls -d /opt/rocm-*`
# export PAPI_ROCMSMI_ROOT=$PAPI_ROCM_ROOT/rocm_smi
#fi

# test linking with or without --with-shlib-tools
if [ "$SHLIB" = "with" ]; then
./configure --with-debug=$DEBUG --enable-warnings --with-components="$COMPONENTS" --with-shlib-tools
else
./configure --with-debug=$DEBUG --enable-warnings --with-components="$COMPONENTS"
fi

#touch /tmp/component_names.txt
#for component in $COMPONENTS; do
# echo "Name: $component" >> /tmp/component_names.txt
#done

CHECK_COMPONENTS=$(echo $COMPONENTS | sed 's/ /|/g')
echo $CHECK_COMPONENTS

make -j4

# run PAPI utilities
echo "Running papi_component_avail: "
utils/papi_component_avail

# Make sure the list of components are active
utils/papi_component_avail | grep -A1000 'Active components' | grep -E $CHECK_COMPONENTS

#if [ "$COMPONENTS" != "cuda nvml" ]; then
# echo Testing
# ./run_tests.sh
#fi
# check list of active components
ACTIVE_COMPONENTS=$(utils/papi_component_avail | grep -A1000 'Active components' | grep "Name:" | sed 's/Name: //' | awk '{print $1}' | paste -sd ' ' -)
EXPECTED_ACTIVE_COMPONENTS="perf_event perf_event_uncore cuda nvml powercap net appio io stealtime coretemp lmsensors sde sysdetect"
if [ "$ACTIVE_COMPONENTS" != "perf_event perf_event_uncore cuda nvml powercap net appio io stealtime coretemp lmsensors" ]; then
DISABLED_COMPONENTS=$(diff --side-by-side --suppress-common-lines <(echo "$EXPECTED_ACTIVE_COMPONENTS" | sed 's/ /\n/g') <(echo "$ACTIVE_COMPONENTS" | sed 's/ /\n/g') | awk '{print $2}')
echo -e "Components are disabled that should be active, these are:\n$DISABLED_COMPONENTS"
exit 1
fi

if [ "$SHLIB" = "with" ]; then
echo "Running single component test for active components"
./run_tests_shlib.sh TESTS_QUIET
# without '--with-shlib-tools' in ./configure
if [ "$SHLIB" = "without" ]; then
echo "Running full test suite for active components"
./run_tests.sh
# with '--with-shlib-tools' in ./configure
else
echo "Running single component test for active components"
./run_tests_shlib.sh TESTS_QUIET
fi
30 changes: 3 additions & 27 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,42 +13,18 @@ jobs:
papi_component:
strategy:
matrix:
components: [cuda nvml lmsensors io net powercap appio lustre coretemp rapl sde stealtime]
components: [cuda nvml rocm rocm_smi powercap powercap_ppc rapl sensors_ppc infiniband net appio io lustre stealtime coretemp lmsensors mx sde]
debug: [yes, no]
shlib: [with, without]
# machine: [cpu_intel, gpu_amd, gpu_intel]
fail-fast: false
#runs-on: ${{matrix.machine}}
runs-on: [self-hosted, cpu_intel, gpu_nvidia]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- name: Test
run: .github/workflows/ci.sh "${{matrix.components}}" ${{matrix.debug}} ${{matrix.shlib}}
papi_component_amd:
strategy:
matrix:
components: [rocm rocm_smi]
debug: [yes, no]
shlib: [with, without]
fail-fast: false
runs-on: [self-hosted, gpu_amd]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- name: Test
run: .github/workflows/ci.sh "${{matrix.components}}" ${{matrix.debug}} ${{matrix.shlib}}
papi_component_intel_gpu:
strategy:
matrix:
components: [intel_gpu]
debug: [yes, no]
shlib: [with, without]
fail-fast: false
runs-on: [self-hosted, gpu_intel]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- name: Test
run: .github/workflows/ci.sh "${{matrix.components}}" ${{matrix.debug}} ${{matrix.shlib}}
papi_spack:
runs-on: cpu
timeout-minutes: 60
Expand Down
18 changes: 17 additions & 1 deletion src/ctests/all_native_events.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,20 @@ main( int argc, char **argv )
PAPI_event_info_t info, info1;
const PAPI_hw_info_t *hwinfo = NULL;
const PAPI_component_info_t* cmpinfo;
char enableCuda[PAPI_MIN_STR_LEN] = "no";
int event_code;
int numcmp, cid;
int quiet;

/* Set quiet variable */
quiet=tests_quiet( argc, argv );

for (i = 0; i < argc; i++) {
if (strncmp(argv[i], "--enable_cuda_evts=", 19) == 0) {
strcpy(enableCuda, argv[i] + 19);
}
}

/* Init PAPI library */
retval = PAPI_library_init( PAPI_VER_CURRENT );
if ( retval != PAPI_VER_CURRENT ) {
Expand All @@ -107,7 +114,7 @@ main( int argc, char **argv )
numcmp = PAPI_num_components( );

int rocm_id = PAPI_get_component_index("rocm");

int cuda_id = PAPI_get_component_index("cuda");
/* Loop through all components */
for( cid = 0; cid < numcmp; cid++ ) {

Expand All @@ -117,6 +124,15 @@ main( int argc, char **argv )
continue;
}

/* possible option to fix the long run time of this file for ci, by completing skipping cuda tests */
// if (cid == cuda_id)
// continue;

/* possible option to fix the long run time of this file for CI, by using an option */
if (strcmp(enableCuda, "no") == 0 && cid == cuda_id) {
continue;
}

cmpinfo = PAPI_get_component_info( cid );
if (cmpinfo == NULL) {
test_fail( __FILE__, __LINE__, "PAPI_get_component_info", 2 );
Expand Down

0 comments on commit af68eb2

Please sign in to comment.