From b92e3a69d52b4f555a1e02d81df98eed555d6814 Mon Sep 17 00:00:00 2001
From: Sertac Ozercan <sozercan@gmail.com>
Date: Wed, 13 Dec 2023 07:23:48 +0000
Subject: [PATCH] update

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 .github/workflows/test-docker-sd.yaml | 89 +++++++++++++++++++++++++++
 README.md                             | 11 ++--
 examples/stablediffusion.yaml         | 50 +++++++++++++++
 pkg/aikit2llb/convert.go              | 32 ++++++----
 4 files changed, 165 insertions(+), 17 deletions(-)
 create mode 100644 .github/workflows/test-docker-sd.yaml
 create mode 100644 examples/stablediffusion.yaml

diff --git a/.github/workflows/test-docker-sd.yaml b/.github/workflows/test-docker-sd.yaml
new file mode 100644
index 00000000..0413dd5c
--- /dev/null
+++ b/.github/workflows/test-docker-sd.yaml
@@ -0,0 +1,89 @@
+name: docker-test-stablediffusion
+
+on:
+  push:
+    branches:
+      - main
+    paths-ignore:
+      - '**.md'
+  pull_request:
+    branches:
+      - main
+    paths-ignore:
+      - '**.md'
+
+permissions: read-all
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 240
+    steps:
+      - name: Harden Runner
+        uses: step-security/harden-runner@eb238b55efaa70779f274895e782ed17c84f2895 # v2.6.1
+        with:
+          egress-policy: audit
+
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+
+      - name: cleanup disk space
+        run: |
+          docker system prune -f -a --volumes
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /opt/ghc
+          sudo rm -rf "/usr/local/share/boost"
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
+      # need containerd image store for testing local images
+      - uses: crazy-max/ghaction-setup-docker@69b561f709cbd934060981d481ccfc316cfb77b7 # v2.2.0
+        with:
+          daemon-config: |
+            {
+              "debug": true,
+              "features": {
+                "containerd-snapshotter": true
+              }
+            }
+      - uses: crazy-max/ghaction-github-runtime@b3a9207c0e1ef41f4cf215303c976869d0c2c1c4 # v3.0.0
+
+      - name: build aikit
+        run: |
+          docker buildx build . -t aikit:test \
+            --load --progress plain \
+            --cache-from=type=gha,scope=aikit \
+            --cache-to=type=gha,scope=aikit,mode=max
+
+      - name: build test model
+        run: |
+          docker buildx build . -t testmodel:test \
+            -f test/aikitfile-sd.yaml \
+            --load --progress plain \
+            --cache-from=type=gha,scope=testmodel \
+            --cache-to=type=gha,scope=testmodel,mode=max
+
+      - name: list images
+        run: docker images
+
+      - name: run test model
+        run: docker run --name testmodel -d -p 8080:8080 testmodel:test
+
+      - name: install e2e dependencies
+        run: make test-e2e-dependencies
+
+      - name: run test
+        run: |
+          curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+            "model": "llava",
+            "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://discovery.sndimg.com/content/dam/images/discovery/fullset/2021/12/1/GettyImages-1280574445.jpg.rend.hgtvcom.966.644.suffix/1638387352203.jpeg" }}], "temperature": 0.9}]}'
+
+      - name: save logs
+        if: always()
+        run: |
+          docker logs testmodel > docker.log
+
+      - name: publish logs
+        if: always()
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
+        with:
+          name: docker-logs
+          path: docker.log
diff --git a/README.md b/README.md
index aff353c2..53f9cc5c 100644
--- a/README.md
+++ b/README.md
@@ -17,8 +17,9 @@ AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. Local
 - 🤏 Minimal image size, resulting in less vulnerabilities and smaller attack surface with a custom [distroless](https://github.com/GoogleContainerTools/distroless)-based image
 - 🚀 [Easy to use declarative configuration](./docs/specs.md)
 - ✨ OpenAI API compatible to use with any OpenAI API compatible client
-- 🖼️ [Multi-modal model support](#vision-with-llava)
-- 🚢 Kubernetes deployment ready
+- 📸 [Multi-modal model support](#vision-with-llava)
+- 🖼️ Image generation support with Stable Diffusion
+- 🚢 [Kubernetes deployment ready](#kubernetes-deployment)
 - 📦 Supports multiple models with a single image
 - 🖥️ [Supports GPU-accelerated inferencing with NVIDIA GPUs](#nvidia)
 - 🔐 [Signed images for `aikit` and pre-made models](./docs/cosign.md)
@@ -27,15 +28,15 @@ AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. Local
 
 ### Building an image with a Llama 2 model
 
-[<img src="https://asciinema.org/a/J9bitkONKPvedSfU1RkrmVEhD.svg" width="500">](https://asciinema.org/a/J9bitkONKPvedSfU1RkrmVEhD)
+[<img src="https://asciinema.org/a/J9bitkONKPvedSfU1RkrmVEhD.svg" width="400">](https://asciinema.org/a/J9bitkONKPvedSfU1RkrmVEhD)
 
 ### Inference
 
-[<img src="https://asciinema.org/a/DYh5bCQMNPSis1whhsfPeMOoM.svg" width="500">](https://asciinema.org/a/DYh5bCQMNPSis1whhsfPeMOoM)
+[<img src="https://asciinema.org/a/DYh5bCQMNPSis1whhsfPeMOoM.svg" width="400">](https://asciinema.org/a/DYh5bCQMNPSis1whhsfPeMOoM)
 
 ### Vision with LLaVA
 
-[<img src="https://asciinema.org/a/626553.svg" width="500">](https://asciinema.org/a/626553)
+[<img src="https://asciinema.org/a/626553.svg" width="400">](https://asciinema.org/a/626553)
 
 > see [llava.yaml](./examples//llava.yaml) for the configuration used in the demo
 
diff --git a/examples/stablediffusion.yaml b/examples/stablediffusion.yaml
new file mode 100644
index 00000000..4a7de950
--- /dev/null
+++ b/examples/stablediffusion.yaml
@@ -0,0 +1,50 @@
+#syntax=ghcr.io/sozercan/aikit:latest
+apiVersion: v1alpha1
+debug: true
+backends:
+  - stablediffusion
+models:
+  - name: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
+    sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
+  - name: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
+    sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
+  - name: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
+    sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
+  - name: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
+    sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
+    source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-encoder-512-512-fp16.bin"
+  - name: "stablediffusion_assets/AutoencoderKL-fp16.bin"
+    sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
+    source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-fp16.bin"
+  - name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
+    sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
+    source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/FrozenCLIPEmbedder-fp16.bin"
+  - name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
+    sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
+  - name: "stablediffusion_assets/log_sigmas.bin"
+    sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
+    source: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
+  - name: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
+    sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
+  - name: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
+    sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
+  - name: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
+    sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
+  - name: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
+    sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
+    source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/UNetModel-MHA-fp16.bin"
+  - name: "stablediffusion_assets/vocab.txt"
+    sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
+config: |
+  - name: stablediffusion
+    backend: stablediffusion
+    parameters:
+      model: stablediffusion_assets
diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go
index 1fc6e7ff..8cf71af2 100644
--- a/pkg/aikit2llb/convert.go
+++ b/pkg/aikit2llb/convert.go
@@ -22,31 +22,39 @@ const (
 
 func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) {
 	var merge llb.State
-	s := llb.Image(debianSlim)
-	s, merge = copyModels(c, s)
-	s, merge = addLocalAI(c, s, merge)
+	state := llb.Image(debianSlim)
+	base := getBaseImage(c)
+
+	state, merge = copyModels(c, base, state)
+	state, merge = addLocalAI(c, state, merge)
+
+	// install cuda if runtime is nvidia
 	if c.Runtime == utils.RuntimeNVIDIA {
-		merge = installCuda(s, merge)
+		merge = installCuda(state, merge)
 	}
+
+	// install opencv and friends if stable diffusion backend is being used
 	for b := range c.Backends {
 		if strings.Contains(c.Backends[b], "stablediffusion") {
-			merge = installOpenCV(s, merge)
+			merge = installOpenCV(state, merge)
 		}
 	}
+
 	imageCfg := NewImageConfig(c)
 	return merge, imageCfg
 }
 
-func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) {
-	var base llb.State
+func getBaseImage(c *config.Config) llb.State {
 	for b := range c.Backends {
 		if strings.Contains(c.Backends[b], "stablediffusion") {
-			// due to too many dependencies, we are using debian slim as base for stable diffusion
-			base = llb.Image(debianSlim)
-		} else {
-			base = llb.Image(distrolessBase)
+			// due to too many dependencies, using debian slim as base for stable diffusion
+			return llb.Image(debianSlim)
 		}
 	}
+	return llb.Image(distrolessBase)
+}
+
+func copyModels(c *config.Config, base llb.State, s llb.State) (llb.State, llb.State) {
 	savedState := s
 
 	// create config file if defined
@@ -162,7 +170,7 @@ func addLocalAI(c *config.Config, s llb.State, merge llb.State) (llb.State, llb.
 	opts = append(opts, llb.Chmod(0o755))
 	localAI := llb.HTTP(localAIURL, opts...)
 	s = s.File(
-		llb.Copy(localAI, "local-ai", "/usr/bin"),
+		llb.Copy(localAI, "local-ai", "/usr/bin/local-ai"),
 		llb.WithCustomName("Copying "+fileNameFromURL(localAIURL)+" to /usr/bin"), //nolint: goconst
 	)