update examples

Signed-off-by: Wenxin Zhang <[email protected]>
opea-project · May 13, 2024 · 93cccf9 · 93cccf9
1 parent e999c21
commit 93cccf9
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 11 deletions.
diff --git a/.github/workflows/scripts/models/collect_log.sh b/.github/workflows/scripts/models/collect_log.sh
@@ -21,6 +21,8 @@ PATTERN='[-a-zA-Z0-9_]*='
 PERF_STABLE_CHECK=true
 for i in "$@"; do
     case $i in
+        --datasets*)
+            datasets=`echo $i | sed "s/${PATTERN}//"`;;
         --device=*)
             device=`echo $i | sed "s/${PATTERN}//"`;;
         --model=*)
@@ -32,14 +34,14 @@ for i in "$@"; do
     esac
 done
 
-output_file="/GenAIEval/${device}/${model}/${device}-${model}-${tasks}.log"
+log_file="/GenAIEval/${device}/${model}/${device}-${model}-${tasks}-${datasets}.log"
 $BOLD_YELLOW && echo "-------- Collect logs --------" && $RESET
 
 echo "working in"
 pwd
-if [[ ! -f ${output_file} ]]; then
-    echo "${device};${model};${tasks};;${logfile}" >> ${WORKSPACE}/summary.log
+if [[ ! -f ${log_file} ]]; then
+    echo "${device};${model};${tasks};${datasets};;${logfile}" >> ${WORKSPACE}/summary.log
 else
-    acc=$(grep -Po "Accuracy .* is:\\s+(\\d+(\\.\\d+)?)" ${acc_log_name} | head -n 1 | sed 's/.*://;s/[^0-9.]//g')
-    echo "${device};${model};${tasks};${acc};${logfile}" >> ${WORKSPACE}/summary.log
+    acc=$(grep -Po "Accuracy .* is:\\s+(\\d+(\\.\\d+)?)" ${log_file} | head -n 1 | sed 's/.*://;s/[^0-9.]//g')
+    echo "${device};${model};${tasks};${datasets};${acc};${logfile}" >> ${WORKSPACE}/summary.log
 fi
diff --git a/.github/workflows/scripts/models/model_test.sh b/.github/workflows/scripts/models/model_test.sh
@@ -42,9 +42,9 @@ $BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET
 main() {
     case ${tasks} in
         "text-generation")
-            working_dir="/GenAIEval/evaluation/lm_evaluation_harness";;
+            working_dir="/GenAIEval/GenAIEval/evaluation/lm_evaluation_harness/examples";;
         "code-generation")
-            working_dir="/GenAIEval/evaluation/bigcode_evaluation_harness";;
+            working_dir="/GenAIEval/GenAIEval/evaluation/bigcode_evaluation_harness/examples";;
         *)
             echo "Not suppotted task"; exit 1;;
     esac
@@ -62,11 +62,13 @@ function prepare() {
     else
         echo "Not found requirements.txt file."
     fi
+    if [[ ${device} == "hpu" ]]; then
+        pip install --upgrade-strategy eager optimum[habana]
+    fi
 }
 
 function run_benchmark() {
     cd ${working_dir}
-    pip install --upgrade-strategy eager optimum[habana]
     overall_log="${log_dir}/${device}-${model}-${tasks}-${datasets}.log"
     python main.py \
         --model hf \

diff --git a/...uation/bigcode_evaluation_harness/main.py → ...gcode_evaluation_harness/examples/main.py b/...uation/bigcode_evaluation_harness/main.py → ...gcode_evaluation_harness/examples/main.py
diff --git a/.../evaluation/lm_evaluation_harness/main.py → ...on/lm_evaluation_harness/examples/main.py b/.../evaluation/lm_evaluation_harness/main.py → ...on/lm_evaluation_harness/examples/main.py
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ For evaluating the models on text-generation tasks, we follow the [lm-evaluation
 ```shell
 
 # pip install --upgrade-strategy eager optimum[habana]
-cd GenAIEval/evaluation/lm_evaluation_harness
+cd GenAIEval/evaluation/lm_evaluation_harness/examples
 python main.py \
     --model gaudi-hf \
     --model_args pretrained=EleutherAI/gpt-j-6B \
@@ -29,7 +29,7 @@ python main.py \
 ##### CPU
 ```shell
 
-cd GenAIEval/evaluation/lm_evaluation_harness
+cd GenAIEval/evaluation/lm_evaluation_harness/examples
 python main.py \
     --model hf \
     --model_args pretrained=EleutherAI/gpt-j-6B \
@@ -57,7 +57,7 @@ For evaluating the models on coding tasks or specifically coding LLMs, we follow
 #### command line usage
 
 ```shell
-cd GenAIEval/evaluation/bigcode_evaluation_harness
+cd GenAIEval/evaluation/bigcode_evaluation_harness/examples
 python main.py \
     --model "codeparrot/codeparrot-small" \
     --tasks "humaneval" \