From fdebf3f8d7c1f6facf8636475a9a0181bf175baa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?= <852750+sozercan@users.noreply.github.com> Date: Sun, 21 Apr 2024 14:07:43 -0700 Subject: [PATCH] test: update gpu test for llama3 (#204) Signed-off-by: Sertac Ozercan --- .github/workflows/test-docker-gpu.yaml | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-docker-gpu.yaml b/.github/workflows/test-docker-gpu.yaml index c3c285f1..3bb5d3be 100644 --- a/.github/workflows/test-docker-gpu.yaml +++ b/.github/workflows/test-docker-gpu.yaml @@ -47,8 +47,22 @@ jobs: - name: run test model run: docker run --name testmodel -d --rm -p 8080:8080 --gpus all testmodel:test - - name: run test - if: matrix.backend != 'mamba' + - name: run test (gguf) + if: matrix.backend == 'llama-cuda' + run: | + result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llama-3-8b-chat", + "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] + }') + echo $result + + choices=$(echo "$result" | jq '.choices') + if [ -z "$choices" ]; then + exit 1 + fi + + - name: run test (exl2/gptq) + if: matrix.backend == 'exllama2-gptq' || matrix.backend == 'exllama2-exl2' run: | result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "llama-2-7b-chat", @@ -61,7 +75,7 @@ jobs: exit 1 fi - - name: run test + - name: run test (mamba) if: matrix.backend == 'mamba' run: | result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{