From 56c1b594bfb60544c982c9f40fdbd4b9f935a18c Mon Sep 17 00:00:00 2001 From: Sertac Ozercan Date: Mon, 2 Dec 2024 03:47:25 +0000 Subject: [PATCH] feat: add qwq 32b preview Signed-off-by: Sertac Ozercan --- .github/workflows/update-models-self.yaml | 1 + README.md | 4 ++++ models/qwq-32b-preview.yaml | 19 +++++++++++++++++++ scripts/parse-models.sh | 3 +-- website/docs/premade-models.md | 2 ++ 5 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 models/qwq-32b-preview.yaml diff --git a/.github/workflows/update-models-self.yaml b/.github/workflows/update-models-self.yaml index bb326614..ea35b7bb 100644 --- a/.github/workflows/update-models-self.yaml +++ b/.github/workflows/update-models-self.yaml @@ -23,6 +23,7 @@ jobs: - llama-3.1-70b-instruct - mixtral-8x7b-instruct - codestral-22b + - qwq-32b-preview runs-on: self-hosted timeout-minutes: 360 steps: diff --git a/README.md b/README.md index 521398ab..c42c2d47 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,8 @@ If it doesn't include a specific model, you can always [create your own images]( | 🅿️ Phi 3.5 | Instruct | 3.8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) | | 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | | ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) | +| QwQ | | 32B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/qwq:32b` | `qwq-32b-preview` | [Apache 2.0](https://huggingface.co/Qwen/QwQ-32B-Preview/blob/main/LICENSE) | + ### NVIDIA CUDA @@ -114,8 +116,10 @@ If it doesn't include a specific model, you can always [create your own images]( | 🅿️ Phi 3.5 | Instruct | 3.8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) | | 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | | ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) | +| QwQ | | 32B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/qwq:32b` | `qwq-32b-preview` | [Apache 2.0](https://huggingface.co/Qwen/QwQ-32B-Preview/blob/main/LICENSE) | | 📸 Flux 1 Dev | Text to image | 12B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/flux1:dev` | `flux-1-dev` | [FLUX.1 [dev] Non-Commercial License](https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev) | + ### Apple Silicon (experimental) > [!NOTE] diff --git a/models/qwq-32b-preview.yaml b/models/qwq-32b-preview.yaml new file mode 100644 index 00000000..b5e3cac6 --- /dev/null +++ b/models/qwq-32b-preview.yaml @@ -0,0 +1,19 @@ +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +debug: true +runtime: cuda +models: + - name: qwq-32b-preview + source: https://huggingface.co/lmstudio-community/QwQ-32B-Preview-GGUF/resolve/main/QwQ-32B-Preview-Q4_K_M.gguf + sha256: 8389413ff15eabdeae824faa78ca433e7cd61a93a6bee4fb0e916cdb727efcda +config: | + - name: qwq-32b-preview + backend: llama + parameters: + model: QwQ-32B-Preview-Q4_K_M.gguf + context_size: 8192 + repeat_penalty: 1.05 + flash_attention: true + f16: true + mmap: true + system_prompt: \"You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.\" diff --git a/scripts/parse-models.sh b/scripts/parse-models.sh index 873e10ae..b969763c 100755 --- a/scripts/parse-models.sh +++ b/scripts/parse-models.sh @@ -23,11 +23,10 @@ extract_model_type() { echo "$1" | sed -n -e 's/^flux-[0-9]+-\(dev\)$/\1/p' -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p' } -for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "llama-3.2-1b-instruct" "llama-3.2-3b-instruct" "phi-3-3.8b" "phi-3.5-3.8b-instruct" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct" "flux-1-dev"; do +for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "llama-3.2-1b-instruct" "llama-3.2-3b-instruct" "phi-3-3.8b" "phi-3.5-3.8b-instruct" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct" "flux-1-dev" "qwq-32b-preview"; do echo "Model: $MODEL" echo " Name: $(extract_model_name "$MODEL")" echo " Size: $(extract_model_size "$MODEL")" echo " Type: $(extract_model_type "$MODEL")" echo done - diff --git a/website/docs/premade-models.md b/website/docs/premade-models.md index 6bd3b29d..121c3445 100644 --- a/website/docs/premade-models.md +++ b/website/docs/premade-models.md @@ -23,6 +23,7 @@ Depending on your CPU capabilities, AIKit will automatically select the most opt | 🅿️ Phi 3.5 | Instruct | 3.8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) | | 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | | ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) | +| QwQ | | 32B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/qwq:32b` | `qwq-32b-preview` | [Apache 2.0](https://huggingface.co/Qwen/QwQ-32B-Preview/blob/main/LICENSE) | ## NVIDIA CUDA @@ -36,6 +37,7 @@ Depending on your CPU capabilities, AIKit will automatically select the most opt | 🅿️ Phi 3.5 | Instruct | 3.8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) | | 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | | ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) | +| QwQ | | 32B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/qwq:32b` | `qwq-32b-preview` | [Apache 2.0](https://huggingface.co/Qwen/QwQ-32B-Preview/blob/main/LICENSE) | | 📸 Flux 1 Dev | Text to image | 12B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/flux1:dev` | `flux-1-dev` | [FLUX.1 [dev] Non-Commercial License](https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev) | :::note