Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MPS benchmarking scripts and test workflow #244

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .github/workflows/multi_process_script_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: MPS Test

on:
push:
pull_request:
branches:
- main

workflow_dispatch:

jobs:
builds:
runs-on: ubuntu-latest
container:
image: ghcr.io/acts-project/ubuntu2004_cuda:v30
steps:
- uses: actions/checkout@v2
with:
submodules: true
lfs: true
- name: Configure
run:
cmake -S ${GITHUB_WORKSPACE} -B build -DCMAKE_BUILD_TYPE=Release -DTRACCC_BUILD_CUDA=ON
- name: Build
run:
cmake --build build
- name: Auto Benchmark Cuda
run: |
cd multi_process
./auto_benchmark.sh -p1 -t1 -c1 -e1 -g1 -r0 -d"${GITHUB_WORKSPACE}/data"
- name: Auto Benchmark CPU
run: |
cd multi_process
./auto_benchmark.sh -p1 -t1 -c1 -e1 -g1 -r1 -d"${GITHUB_WORKSPACE}/data"

50 changes: 50 additions & 0 deletions multi_process/auto_benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

# run n cuda processes for example n in range 1 -> 32
# benchmark cuda mps for increments of 10 events per process for example i in range 1 -> 50
max_proc=16 # number of processes upper bound
max_events=150 # maximum number of events per process
increment=10 # event steps
cores=1 # number of physical cores
threads=1 # number of threads per physical core
gpu=1 # number of gpus
path='../data' # data path
run_cpu=0 # 1-cpu and 0-cuda

while getopts r:c:t:p:e:g:d: flag;
do
case "${flag}" in
c) cores=${OPTARG};;
t) threads=${OPTARG};;
p) max_proc=${OPTARG};;
e) max_events=${OPTARG};;
g) gpu=${OPTARG};;
d) path=${OPTARG};;
r) run_cpu=${OPTARG};;
esac
done

echo "$max_proc $max_events";
for((i=max_events;i<=max_events;i+=increment))
do
echo "starting to benchmark with $i processes";
for((j=1;j<=max_proc;j+=1))
do
echo "starting new run with $j events";
if [ $run_cpu != 0 ];then

./benchmark_cpu.sh -p"$path" -n$j -e$i -c$cores -t$threads
result=$?
else
./benchmark_cuda.sh -p"$path" -n$j -e$i -c$cores -t$threads -g$gpu
result=$?
fi
if [ $result != 0 ];then
exit $result
fi
sleep 1
done
done
echo quit|nvidia-cuda-mps-control

exit $result
46 changes: 46 additions & 0 deletions multi_process/benchmark_cpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash

num_proc=1 # number of processes expected to run concurrently
events=1 # number of event each process will compute
cores=1 # number of cores (sockets)
threads=1 # number of threads per core
datapath="" # data dir
while getopts n:e:c:t:p: flag;
do
case "${flag}" in
n) num_proc=${OPTARG};;
e) events=${OPTARG};;
c) cores=${OPTARG};;
t) threads=${OPTARG};;
p) datapath=${OPTARG};;
esac
done
echo "$datapath"
echo "number of processes : $num_proc";
echo "number of events : $events";
export TRACCC_TEST_DATA_DIR=$datapath
Tstart=$(date "+%s.%3N")

#warm up run

../build/bin/traccc_seq_example --detector_file=tml_detector/trackml-detector.csv --digitization_config_file=tml_detector/default-geometric-config-generic.json --input_directory=tml_full/ttbar_mu200/ --events=$events --input-binary &
wait $!
result=$?
echo "result : $result"

# end warm up run

for((i=0;i<num_proc;i++))
do
# get processor id
p=$((($i % ($cores * $threads))))
echo " processor id $p";
# end get processor id
../build/bin/traccc_seq_example --detector_file=tml_detector/trackml-detector.csv --digitization_config_file=tml_detector/default-geometric-config-generic.json --input_directory=tml_full/ttbar_mu200/ --events=$events --input-binary &
done
wait
Tend=$(date "+%s.%3N")
elapsed=$(echo "scale=3; $Tend - $Tstart" | bc)
python3 log_data.py $num_proc $events $elapsed $cores $threads cpu
echo "Elapsed: $elapsed s"
exit $result
62 changes: 62 additions & 0 deletions multi_process/benchmark_cuda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/bash

num_proc=1 # number of processes expected to run concurrently
events=1 # number of event each process will compute
cores=1 # number of cores (sockets)
threads=1 # number of threads per core
datapath="" # data dir
numgpus=1 # number of gpus

while getopts n:e:c:t:p:g: flag;
do
case "${flag}" in
n) num_proc=${OPTARG};;
e) events=${OPTARG};;
c) cores=${OPTARG};;
t) threads=${OPTARG};;
p) datapath=${OPTARG};;
g) numgpus=${OPTARG};;
esac
done
echo "$datapath"
echo "number of processes : $num_proc";
echo "number of events : $events";
# echo "log path $log_dir"
export TRACCC_TEST_DATA_DIR=$datapath

# enable cuda mps
nvidia-cuda-mps-control -d
mps_ret=$?
if [ $mps_ret -ne 0 ]; then
echo "Failed to enabled CUDA mps : $mps_ret"
exit $mps_ret
fi
echo "Enabled CUDA mps"
# end enable cuda mps

# warmup / test run
CUDA_VISIBLE_DEVICES=0 ../build/bin/traccc_seq_example_cuda --detector_file=tml_detector/trackml-detector.csv --digitization_config_file=tml_detector/default-geometric-config-generic.json --input_directory=tml_full/ttbar_mu200/ --events=$events --input-binary &
wait $!
result=$?
echo "result : $result"
# end warm up/ test run

Tstart=$(date "+%s.%3N")
for((i=0;i<num_proc;i++))
do
p=$((($i % ($cores * $threads))))
echo " processor id $p";
# get gpu id
gpu_id=$(($i % $numgpus))
echo " gpu $gpu_id";
# end get gpu id

# start job
CUDA_VISIBLE_DEVICES=$gpu_id ../build/bin/traccc_seq_example_cuda --detector_file=tml_detector/trackml-detector.csv --digitization_config_file=tml_detector/default-geometric-config-generic.json --input_directory=tml_full/ttbar_mu200/ --events=$events --input-binary &
done
wait
Tend=$(date "+%s.%3N")
elapsed=$(echo "scale=3; $Tend - $Tstart" | bc)
python3 log_data.py $num_proc $events $elapsed $cores $threads cuda
echo "Elapsed: $elapsed s"
exit $result
17 changes: 17 additions & 0 deletions multi_process/log_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from csv import writer
import sys

def append_list_as_row(list_of_elem,file_name="mps_throughput.csv"):
# Open file in append mode
with open(file_name, 'a+', newline='') as write_obj:
# Create a writer object from csv module
csv_writer = writer(write_obj)
# Add contents of list as last row in the csv file
csv_writer.writerow(list_of_elem)



if (sys.argv[6]=='cuda'):
append_list_as_row(sys.argv[1:6],"mps_throughput_cuda.csv")
elif (sys.argv[6]=='cpu'):
append_list_as_row(sys.argv[1:6],"mps_throughput_cpu.csv")