Skip to content

Commit

Permalink
feat: apple silicon support (#433)
Browse files Browse the repository at this point in the history
Signed-off-by: Sertac Ozercan <[email protected]>
  • Loading branch information
sozercan authored Nov 25, 2024
1 parent e1697c0 commit 95c9d54
Show file tree
Hide file tree
Showing 15 changed files with 260 additions and 44 deletions.
1 change: 1 addition & 0 deletions .github/dependabot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ updates:
directory: /
schedule:
interval: "weekly"

- package-ecosystem: docker
directory: /charts/aikit
schedule:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ jobs:
- name: lint
uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1
with:
version: v1.60.3
version: v1.62.0
32 changes: 27 additions & 5 deletions .github/workflows/release-base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ jobs:
release-base:
runs-on: ubuntu-latest
timeout-minutes: 360
strategy:
fail-fast: true
matrix:
runtime:
- base
- applesilicon
steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
Expand All @@ -38,18 +44,34 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Set runtime variables for matrix
run: |
if [ ${{ matrix.runtime }} == "base" ]; then
export PLATFORMS="linux/amd64,linux/arm64"
export FILE="Dockerfile.base"
export TAG="ghcr.io/sozercan/base:latest"
export CACHE_FROM="type=gha,scope=base"
export CACHE_TO="type=gha,scope=base,mode=max"
elif [ ${{ matrix.runtime }} == "applesilicon" ]; then
export PLATFORMS="linux/arm64"
export FILE="Dockerfile.base-applesilicon"
export TAG="ghcr.io/sozercan/applesilicon/base:latest"
export CACHE_FROM="type=gha,scope=base-applesilicon"
export CACHE_TO="type=gha,scope=base-applesilicon,mode=max"
fi
- name: Build and push
uses: docker/build-push-action@4f58ea79222b3b9dc2c8bbdd6debcef730109a75 # v6.9.0
id: build-and-push
with:
push: true
tags: ghcr.io/sozercan/base:latest
cache-from: type=gha,scope=base
cache-to: type=gha,scope=base,mode=max
sbom: true
provenance: true
platforms: linux/amd64,linux/arm64
file: Dockerfile.base
tags: ${{ env.TAG }}
cache-from: ${{ env.CACHE_FROM }}
cache-to: ${{ env.CACHE_TO }}
platforms: ${{ env.PLATFORMS }}
file: ${{ env.FILE }}

- name: Sign the images with GitHub OIDC Token
env:
Expand Down
70 changes: 70 additions & 0 deletions .github/workflows/test-podman-applesilicon.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
name: podman-test-gpu

on:
workflow_dispatch:

permissions: read-all

jobs:
test:
runs-on: self-hosted
timeout-minutes: 240
steps:
- name: cleanup workspace
run: |
rm -rf ./* || true
rm -rf ./.??* || true
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

# use default docker driver builder with containerd image store for local aikit image
# these must be setup before running this test
- run: docker buildx use default

- name: build aikit
run: |
docker buildx build . -t aikit:test \
--load --provenance=false --progress plain
- name: build test model
run: |
docker buildx build . -t testmodel:test \
-f test/aikitfile-llama.yaml \
--load --provenance=false --progress plain \
--build-arg="runtime=applesilicon"
- name: list images
run: docker images

- name: run test model
run: podman run --name testmodel -d --rm --device /dev/dri -p 8080:8080 --pull always testmodel:test

- name: run test (gguf)
run: |
result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llama-3.2-1b-instruct",
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
}')
echo $result
choices=$(echo "$result" | jq '.choices')
if [ -z "$choices" ]; then
exit 1
fi
- name: save logs
if: always()
run: podman logs testmodel > /tmp/podman-gpu.log

- run: podman stop testmodel
if: always()

- run: podman system prune -a -f --volumes || true
if: always()

- name: publish test artifacts
if: always()
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: test-podman-gpu
path: |
/tmp/*.log
45 changes: 34 additions & 11 deletions .github/workflows/update-models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
workflow_dispatch:
inputs:
staging:
description: 'push to test registry'
description: "push to test registry"
required: false
default: false
type: boolean
Expand All @@ -15,17 +15,23 @@ permissions:
id-token: write

jobs:
update-models:
update-models:
strategy:
fail-fast: false
matrix:
model:
- llama-3.2-1b-instruct
- llama-3.2-3b-instruct
- llama-3.1-8b-instruct
- phi-3.5-3.8b-instruct
- gemma-2-2b-instruct
- flux-1-dev
- llama-3.2-1b-instruct
- llama-3.2-3b-instruct
- llama-3.1-8b-instruct
- phi-3.5-3.8b-instruct
- gemma-2-2b-instruct
- flux-1-dev
runtime:
- cuda
- applesilicon
exclude:
- model: flux-1-dev # requires cuda runtime
runtime: applesilicon
runs-on: ubuntu-latest
timeout-minutes: 360
steps:
Expand Down Expand Up @@ -96,23 +102,40 @@ jobs:
fi
if ${{ inputs.staging }}; then
export REGISTRY=ghcr.io/sozercan/test
if [ ${{ matrix.runtime }} == "applesilicon" ]; then
export REGISTRY=ghcr.io/sozercan/test/applesilicon
else
export REGISTRY=ghcr.io/sozercan/test
fi
else
export REGISTRY=ghcr.io/sozercan
if [ ${{ matrix.runtime }} == "applesilicon" ]; then
export REGISTRY=ghcr.io/sozercan/applesilicon
else
export REGISTRY=ghcr.io/sozercan
fi
fi
export PLATFORMS="linux/amd64,linux/arm64"
if [ ${{ matrix.model }} == "flux-1-dev" ]; then
export PLATFORMS="linux/amd64"
fi
if [ ${{ matrix.runtime }} == "applesilicon" ]; then
export PLATFORMS="linux/arm64"
fi
if [ ${{ matrix.runtime }} == "applesilicon" ]; then
export BUILD_ARGS="--build-arg runtime=applesilicon"
else
export BUILD_ARGS=""
fi
docker buildx build . \
-t ${REGISTRY}/${MODEL_NAME}:${MODEL_SIZE} \
-t ${REGISTRY}/${MODEL_NAME}:${MODEL_SIZE}${MODEL_TYPE} \
-f models/${{ matrix.model }}.yaml \
--push --progress plain \
--sbom=true --provenance=true \
--platform ${PLATFORMS}
--platform ${PLATFORMS} ${BUILD_ARGS}
echo "DIGEST=$(cosign triangulate ${REGISTRY}/${MODEL_NAME}:${MODEL_SIZE} --type digest)" >> $GITHUB_ENV
- name: Sign the images with GitHub OIDC Token
Expand Down
6 changes: 5 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ repos:
hooks:
- id: gitleaks
- repo: https://github.com/golangci/golangci-lint
rev: v1.61.0
rev: v1.62.0
hooks:
- id: golangci-lint
- repo: https://github.com/jumanjihouse/pre-commit-hooks
Expand All @@ -16,3 +16,7 @@ repos:
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/crate-ci/typos
rev: v1.27.3
hooks:
- id: typos
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ FROM golang:1.23-bookworm@sha256:3f3b9daa3de608f3e869cd2ff8baf21555cf0fca9fd3425
ARG LDFLAGS
COPY . /go/src/github.com/sozercan/aikit
WORKDIR /go/src/github.com/sozercan/aikit
RUN CGO_ENABLED=0 go build -o /aikit -ldflags "${LDFLAGS} -extldflags '-static'" ./cmd/frontend
RUN CGO_ENABLED=0 go build -o /aikit -ldflags "${LDFLAGS} -w -s -extldflags '-static'" ./cmd/frontend

FROM scratch
COPY --from=builder /aikit /bin/aikit
Expand Down
12 changes: 12 additions & 0 deletions Dockerfile.base-applesilicon
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
ARG MESA_VERSION=23.3.5-102

FROM fedora:39@sha256:d63d63fe593749a5e8dbc8152427d40bbe0ece53d884e00e5f3b44859efa5077
USER 0

# Install the patched mesa-krunkit drivers
RUN dnf -y install dnf-plugins-core && \
dnf -y copr enable slp/mesa-krunkit && \
dnf -y install \
mesa-vulkan-drivers-${MESA_VERSION} \
vulkan-loader-devel vulkan-tools vulkan-loader && \
dnf clean all
25 changes: 18 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
VERSION := v0.14.0

REGISTRY ?= ghcr.io/sozercan
KIND_VERSION ?= 0.23.0
KUBERNETES_VERSION ?= 1.30.1
HELM_VERSION ?= 3.15.1
KIND_VERSION ?= 0.25.0
KUBERNETES_VERSION ?= 1.31.2
HELM_VERSION ?= 3.16.3
TAG ?= test
OUTPUT_TYPE ?= type=docker
TEST_IMAGE_NAME ?= testmodel
TEST_FILE ?= test/aikitfile-llama.yaml
RUNTIME ?= ""
PLATFORMS ?= linux/amd64,linux/arm64

GIT_COMMIT := $(shell git rev-list --abbrev-commit --tags --max-count=1)
GIT_TAG := $(shell git describe --abbrev=0 --tags ${GIT_COMMIT} 2>/dev/null || true)
Expand All @@ -19,17 +21,22 @@ lint:

.PHONY: build-aikit
build-aikit:
docker buildx build . -t ${REGISTRY}/aikit:${TAG} --output=${OUTPUT_TYPE} --build-arg LDFLAGS=${LDFLAGS} \
docker buildx build . -t ${REGISTRY}/aikit:${TAG} --output=${OUTPUT_TYPE} \
--build-arg LDFLAGS=${LDFLAGS} \
--progress=plain

.PHONY: build-test-model
build-test-model:
docker buildx build . -t ${REGISTRY}/${TEST_IMAGE_NAME}:${TAG} -f ${TEST_FILE} --output=${OUTPUT_TYPE} \
--progress=plain --provenance=false
docker buildx build . -t ${REGISTRY}/${TEST_IMAGE_NAME}:${TAG} -f ${TEST_FILE} \
--progress=plain --provenance=false \
--output=${OUTPUT_TYPE} \
--build-arg runtime=${RUNTIME} \
--platform ${PLATFORMS}

.PHONY: build-distroless-base
push-distroless-base:
docker buildx build . -t sozercan/aikit-base:latest -f Dockerfile.base --platform linux/amd64,linux/arm64 \
docker buildx build . -t sozercan/aikit-base:latest -f Dockerfile.base \
--platform linux/amd64,linux/arm64 \
--sbom=true --push

.PHONY: run-test-model
Expand All @@ -40,6 +47,10 @@ run-test-model:
run-test-model-gpu:
docker run --rm -p 8080:8080 --gpus all ${REGISTRY}/${TEST_IMAGE_NAME}:${TAG}

.PHONY: run-test-model-applesilicon
run-test-model-applesilicon:
podman run --rm -p 8080:8080 --device /dev/dri ${REGISTRY}/${TEST_IMAGE_NAME}:${TAG}

.PHONY: test
test:
go test -v ./... -race -coverprofile=coverage.txt -covermode=atomic
Expand Down
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ If it doesn't include a specific model, you can always [create your own images](

## CPU

> [!NOTE]
> AIKit supports both AMD64 and ARM64 CPUs. You can run the same command on either architecture, and Docker will automatically pull the correct image for your CPU.
> Depending on your CPU capabilities, AIKit will automatically select the most optimized instruction set.
| Model | Optimization | Parameters | Command | Model Name | License |
| --------------- | ------------ | ---------- | ---------------------------------------------------------------- | ------------------------ | ---------------------------------------------------------------------------------- |
| 🦙 Llama 3.2 | Instruct | 1B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.2:1b` | `llama-3.2-1b-instruct` | [Llama](https://ai.meta.com/llama/license/) |
Expand Down Expand Up @@ -110,6 +114,21 @@ If it doesn't include a specific model, you can always [create your own images](
| ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) |
| 📸 Flux 1 Dev | Text to image | 12B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/flux1:dev` | `flux-1-dev` | [FLUX.1 [dev] Non-Commercial License](https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev) |

### Apple Silicon (experimental)

> [!NOTE]
> To enable GPU acceleration on Apple Silicon, please see [Podman Desktop documentation](https://podman-desktop.io/docs/podman/gpu).
> Apple Silicon is an _experimental_ runtime and it may change in the future. This runtime is specific to Apple Silicon only, and it will not work as expected on other architectures, including Intel Macs.
> Only `gguf` models are supported on Apple Silicon.
| Model | Optimization | Parameters | Command | Model Name | License |
| ----------- | ------------ | ---------- | --------------------------------------------------------------------------------------------- | ----------------------- | ---------------------------------------------------------------------------------- |
| 🦙 Llama 3.2 | Instruct | 1B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/llama3.2:1b` | `llama-3.2-1b-instruct` | [Llama](https://ai.meta.com/llama/license/) |
| 🦙 Llama 3.2 | Instruct | 3B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/llama3.2:3b` | `llama-3.2-3b-instruct` | [Llama](https://ai.meta.com/llama/license/) |
| 🦙 Llama 3.1 | Instruct | 8B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/llama3.1:8b` | `llama-3.1-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) |
| 🅿️ Phi 3.5 | Instruct | 3.8B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) |
| 🔡 Gemma 2 | Instruct | 2B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) |

## What's next?

👉 For more information and how to fine tune models or create your own images, please see [AIKit website](https://sozercan.github.io/aikit/)!
Loading

0 comments on commit 95c9d54

Please sign in to comment.