sozercan · sozercan · May 26, 2024 · Feb 26, 2024 · May 26, 2024 · May 26, 2024
diff --git a/.github/workflows/patch-models.yaml b/.github/workflows/patch-models.yaml
@@ -24,6 +24,7 @@ jobs:
               - ghcr.io/sozercan/llama3:70b
               - ghcr.io/sozercan/mixtral:8x7b
               - ghcr.io/sozercan/phi3:3.8b
+              - ghcr.io/sozercan/gemma1.1:2b
         steps:
         - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
           with:

diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml
@@ -24,6 +24,7 @@ jobs:
          - llama-2-13b-chat
          - llama-3-8b-instruct
          - phi-3-3.8b
+         - gemma-1.1-2b-instruct
     runs-on: ubuntu-latest
     timeout-minutes: 360
     steps:

diff --git a/models/gemma-1.1-2b-instruct.yaml b/models/gemma-1.1-2b-instruct.yaml
@@ -0,0 +1,39 @@
+#syntax=ghcr.io/sozercan/aikit:latest
+apiVersion: v1alpha1
+debug: true
+runtime: cuda
+models:
+  - name: gemma-1.1-2b-instruct
+    source: https://huggingface.co/lmstudio-community/gemma-1.1-2b-it-GGUF/resolve/main/gemma-1.1-2b-it-Q4_K_M.gguf
+    sha256: cc2118e1d780fa33582738d8c99223d62c8734b06ef65076c01618d484d081d4
+    promptTemplates:
+      - name: chatMsg
+        template: |
+          <start_of_turn>user
+          {{if .Content }}{{ .Content }}{{ end }}<end_of_turn>
+      - name: chat
+        template: |
+          {{ .Input }}
+          <start_of_turn>model
+      - name: completion
+        template: |
+          {{ .Input }}
+config: |
+  - name: gemma-1.1-2b-instruct
+    backend: llama
+    parameters:
+      model: gemma-1.1-2b-it-Q4_K_M.gguf
+    context_size: 8192
+    template:
+      chat_message: chatMsg
+      chat: chat
+      completion: completion
+    repeat_penalty: 1
+    stopwords:
+     - \"<start_of_turn>\"
+     - \"<end_of_turn>\"
+     - \"<eos>\"
+    gpu_layers: 35
+    f16: true
+    batch: 512
+    mmap: true