docker-test-gpu #134
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: docker-test-gpu | |
on: | |
workflow_dispatch: | |
permissions: read-all | |
jobs: | |
test: | |
runs-on: self-hosted | |
timeout-minutes: 240 | |
strategy: | |
fail-fast: false | |
max-parallel: 1 | |
matrix: | |
backend: | |
- llama-cuda | |
# - exllama | |
- exllama2-gptq | |
- exllama2-exl2 | |
# - mamba | |
steps: | |
- name: cleanup workspace | |
run: | | |
rm -rf ./* || true | |
rm -rf ./.??* || true | |
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 | |
# use default docker driver builder with containerd image store for local aikit image | |
# these must be setup before running this test | |
- run: docker buildx use default | |
- name: build aikit | |
run: | | |
docker buildx build . -t aikit:test \ | |
--load --provenance=false --progress plain | |
- name: build test model | |
run: | | |
docker buildx build . -t testmodel:test \ | |
-f test/aikitfile-${{ matrix.backend }}.yaml \ | |
--load --provenance=false --progress plain | |
- name: list images | |
run: docker images | |
- name: run test model | |
run: docker run --name testmodel -d --rm -p 8080:8080 --gpus all testmodel:test | |
- name: run test (gguf) | |
if: matrix.backend == 'llama-cuda' | |
run: | | |
result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ | |
"model": "llama-3-8b-instruct", | |
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] | |
}') | |
echo $result | |
choices=$(echo "$result" | jq '.choices') | |
if [ -z "$choices" ]; then | |
exit 1 | |
fi | |
- name: run test (exl2/gptq) | |
if: matrix.backend == 'exllama2-gptq' || matrix.backend == 'exllama2-exl2' || matrix.backend == 'exllama' | |
run: | | |
result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ | |
"model": "llama-2-7b-chat", | |
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] | |
}') | |
echo $result | |
choices=$(echo "$result" | jq '.choices') | |
if [ -z "$choices" ]; then | |
exit 1 | |
fi | |
- name: run test (mamba) | |
if: matrix.backend == 'mamba' | |
run: | | |
result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ | |
"model": "mamba-chat", | |
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] | |
}') | |
echo $result | |
choices=$(echo "$result" | jq '.choices') | |
if [ -z "$choices" ]; then | |
exit 1 | |
fi | |
- name: save logs | |
if: always() | |
run: docker logs testmodel > /tmp/docker-${{ matrix.backend }}.log | |
- run: docker stop testmodel | |
if: always() | |
- run: docker system prune -a -f --volumes || true | |
if: always() | |
- name: publish test artifacts | |
if: always() | |
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 | |
with: | |
name: test-${{ matrix.backend }} | |
path: | | |
/tmp/*.log |