acts-project · Chamodya-ka · Sep 14, 2022 · Sep 21, 2022 · Sep 29, 2022 · Sep 29, 2022
diff --git a/.github/workflows/multi_process_script_test.yml b/.github/workflows/multi_process_script_test.yml
@@ -0,0 +1,35 @@
+name: MPS Test
+
+on:
+  push:
+  pull_request:
+    branches:
+      - main
+
+  workflow_dispatch:
+
+jobs:
+  builds:
+    runs-on: ubuntu-latest
+    container: 
+      image: ghcr.io/acts-project/ubuntu2004_cuda:v30
+    steps:
+      - uses: actions/checkout@v2
+        with:
+            submodules: true
+            lfs: true
+      - name: Configure
+        run:
+          cmake -S ${GITHUB_WORKSPACE} -B build -DCMAKE_BUILD_TYPE=Release -DTRACCC_BUILD_CUDA=ON
+      - name: Build
+        run:
+          cmake --build build
+      - name: Auto Benchmark Cuda
+        run: |
+          cd multi_process
+          ./auto_benchmark.sh -p1 -t1 -c1 -e1 -g1 -r0 -d"${GITHUB_WORKSPACE}/data"
+      - name: Auto Benchmark CPU
+        run: |
+          cd multi_process
+          ./auto_benchmark.sh -p1 -t1 -c1 -e1 -g1 -r1 -d"${GITHUB_WORKSPACE}/data"
+
diff --git a/multi_process/auto_benchmark.sh b/multi_process/auto_benchmark.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# run n cuda processes for example n in range 1 -> 32
+# benchmark cuda mps for increments of 10 events per process for example i in range 1 -> 50
+max_proc=16	# number of processes upper bound
+max_events=150	# maximum number of events per process
+increment=10 	# event steps 
+cores=1		# number of physical cores
+threads=1	# number of threads per physical core 
+gpu=1		# number of gpus
+path='../data'	# data path
+run_cpu=0	# 1-cpu and 0-cuda 
+
+while getopts r:c:t:p:e:g:d: flag;
+do
+    case "${flag}" in
+	c) cores=${OPTARG};;
+	t) threads=${OPTARG};;
+	p) max_proc=${OPTARG};;
+	e) max_events=${OPTARG};;
+	g) gpu=${OPTARG};;
+	d) path=${OPTARG};;
+	r) run_cpu=${OPTARG};;
+    esac
+done
+
+echo "$max_proc $max_events";
+for((i=max_events;i<=max_events;i+=increment))
+do	
+	echo "starting to benchmark with $i processes";
+	for((j=1;j<=max_proc;j+=1))
+	do
+		echo "starting new run with $j events";
+		if [ $run_cpu != 0 ];then
+
+			./benchmark_cpu.sh -p"$path" -n$j -e$i -c$cores -t$threads	
+			result=$?
+		else
+			./benchmark_cuda.sh -p"$path" -n$j -e$i -c$cores -t$threads -g$gpu
+			result=$?
+		fi
+		if [ $result != 0 ];then
+			exit $result
+		fi		
+		sleep 1
+	done		
+done
+echo quit|nvidia-cuda-mps-control
+
+exit $result
diff --git a/multi_process/benchmark_cpu.sh b/multi_process/benchmark_cpu.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+num_proc=1 	# number of processes expected to run concurrently
+events=1 	# number of event each process will compute	
+cores=1		# number of cores (sockets)
+threads=1	# number of threads per core 
+datapath=""	# data dir
+while getopts n:e:c:t:p: flag;
+do
+    case "${flag}" in
+        n) num_proc=${OPTARG};;
+        e) events=${OPTARG};;
+	c) cores=${OPTARG};;
+	t) threads=${OPTARG};;
+	p) datapath=${OPTARG};;
+    esac
+done
+echo "$datapath"
+echo "number of processes : $num_proc";
+echo "number of events : $events";
+export TRACCC_TEST_DATA_DIR=$datapath
+Tstart=$(date "+%s.%3N")
+
+#warm up run
+
+../build/bin/traccc_seq_example --detector_file=tml_detector/trackml-detector.csv --digitization_config_file=tml_detector/default-geometric-config-generic.json --input_directory=tml_full/ttbar_mu200/ --events=$events --input-binary &
+wait $!
+result=$?
+echo "result : $result" 
+
+# end warm up run
+
+for((i=0;i<num_proc;i++))
+do
+	# get processor id
+	p=$((($i % ($cores * $threads))))
+	echo " processor id $p";
+	# end get processor id
+	../build/bin/traccc_seq_example --detector_file=tml_detector/trackml-detector.csv --digitization_config_file=tml_detector/default-geometric-config-generic.json --input_directory=tml_full/ttbar_mu200/  --events=$events --input-binary &
+done
+wait
+Tend=$(date "+%s.%3N")
+elapsed=$(echo "scale=3; $Tend - $Tstart" | bc)
+python3 log_data.py $num_proc $events $elapsed $cores $threads cpu
+echo "Elapsed: $elapsed s"
+exit $result
diff --git a/multi_process/benchmark_cuda.sh b/multi_process/benchmark_cuda.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+num_proc=1 	# number of processes expected to run concurrently
+events=1 	# number of event each process will compute	
+cores=1		# number of cores (sockets)
+threads=1	# number of threads per core 
+datapath=""	# data dir
+numgpus=1	# number of gpus
+
+while getopts n:e:c:t:p:g: flag;
+do
+    case "${flag}" in
+        n) num_proc=${OPTARG};;
+        e) events=${OPTARG};;
+	c) cores=${OPTARG};;
+	t) threads=${OPTARG};;
+	p) datapath=${OPTARG};;
+	g) numgpus=${OPTARG};;
+    esac
+done
+echo "$datapath"
+echo "number of processes : $num_proc";
+echo "number of events : $events";
+# echo "log path $log_dir"
+export TRACCC_TEST_DATA_DIR=$datapath
+
+# enable cuda mps
+nvidia-cuda-mps-control -d
+mps_ret=$?
+if [ $mps_ret -ne 0 ]; then
+    echo "Failed to enabled CUDA mps : $mps_ret"
+    exit $mps_ret
+fi
+echo "Enabled CUDA mps"
+# end enable cuda mps
+
+# warmup / test run
+CUDA_VISIBLE_DEVICES=0 ../build/bin/traccc_seq_example_cuda --detector_file=tml_detector/trackml-detector.csv --digitization_config_file=tml_detector/default-geometric-config-generic.json --input_directory=tml_full/ttbar_mu200/ --events=$events --input-binary &
+wait $!
+result=$?
+echo "result : $result" 
+# end warm up/ test run
+
+Tstart=$(date "+%s.%3N")
+for((i=0;i<num_proc;i++))
+do
+	p=$((($i % ($cores * $threads))))
+	echo " processor id $p";
+	# get gpu id
+	gpu_id=$(($i % $numgpus))
+	echo " gpu $gpu_id";
+	# end get gpu id
+
+	# start job
+	CUDA_VISIBLE_DEVICES=$gpu_id ../build/bin/traccc_seq_example_cuda --detector_file=tml_detector/trackml-detector.csv --digitization_config_file=tml_detector/default-geometric-config-generic.json --input_directory=tml_full/ttbar_mu200/  --events=$events --input-binary &
+done
+wait
+Tend=$(date "+%s.%3N")
+elapsed=$(echo "scale=3; $Tend - $Tstart" | bc)
+python3 log_data.py $num_proc $events $elapsed $cores $threads cuda
+echo "Elapsed: $elapsed s"
+exit $result
diff --git a/multi_process/log_data.py b/multi_process/log_data.py
@@ -0,0 +1,17 @@
+from csv import writer
+import sys
+
+def append_list_as_row(list_of_elem,file_name="mps_throughput.csv"):
+    # Open file in append mode
+    with open(file_name, 'a+', newline='') as write_obj:
+        # Create a writer object from csv module
+        csv_writer = writer(write_obj)
+        # Add contents of list as last row in the csv file
+        csv_writer.writerow(list_of_elem)
+
+
+
+if (sys.argv[6]=='cuda'):
+	append_list_as_row(sys.argv[1:6],"mps_throughput_cuda.csv")
+elif (sys.argv[6]=='cpu'):
+	append_list_as_row(sys.argv[1:6],"mps_throughput_cpu.csv")