diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml index 0ffa7c56..e97b2e76 100644 --- a/.github/workflows/pre-release.yaml +++ b/.github/workflows/pre-release.yaml @@ -59,7 +59,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push - uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 # v5.1.0 + uses: docker/build-push-action@af5a7ed5ba88268d5278f7203fb52cd833f66d6e # v5.2.0 id: build-and-push with: push: true diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 24aec544..6bab705f 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -67,7 +67,7 @@ jobs: tags: type=semver,pattern={{raw}} - name: Build and push - uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 # v5.1.0 + uses: docker/build-push-action@af5a7ed5ba88268d5278f7203fb52cd833f66d6e # v5.2.0 id: build-and-push with: push: true diff --git a/.github/workflows/test-docker-gpu.yaml b/.github/workflows/test-docker-gpu.yaml index 5fc7b8a9..d62aba98 100644 --- a/.github/workflows/test-docker-gpu.yaml +++ b/.github/workflows/test-docker-gpu.yaml @@ -27,6 +27,7 @@ jobs: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 # use default docker driver builder with containerd image store for local aikit image + # these must be setup before running this test - run: docker buildx use default - name: build aikit diff --git a/.github/workflows/test-finetune.yaml b/.github/workflows/test-finetune.yaml index 9ee64e2a..b2f5d52c 100644 --- a/.github/workflows/test-finetune.yaml +++ b/.github/workflows/test-finetune.yaml @@ -9,6 +9,12 @@ jobs: test: runs-on: self-hosted timeout-minutes: 360 + strategy: + fail-fast: false + max-parallel: 1 + matrix: + targets: + - unsloth steps: - name: cleanup workspace run: | @@ -16,6 +22,8 @@ jobs: rm -rf ./.??* || true - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + - run: nvidia-smi + # use default docker driver builder with containerd image store for local aikit image # must have insecure security entitlement for finetuning # these must be setup before running this test @@ -23,23 +31,24 @@ jobs: - name: build aikit run: | - docker build . -t aikit:test \ + docker --debug build . -t aikit:test \ --load --provenance=false --progress plain - name: build finetuned model run: | - docker build --allow security.insecure \ - --file test/aikitfile-unsloth.yaml --output _output \ - --target unsloth --progress plain . + docker --debug build --allow security.insecure \ + --file test/aikitfile-${{ matrix.targets }}.yaml --output _output \ + --target ${{ matrix.targets }} --progress plain . - name: check if finetuned model exists - run: test -f _output/model-q4_k_m.gguf - + run: | + ls -al _output + test -f _output/model-q4_k_m.gguf + - name: build custom model - working-directory: _output run: | - docker build . -t custommodel:test \ - -f test/aikitfile-unsloth-custom.yaml \ + docker --debug build _output --tag custommodel:test \ + --file test/aikitfile-${{ matrix.targets }}-custom.yaml \ --load --provenance=false --progress plain - name: list images @@ -52,7 +61,7 @@ jobs: run: | result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "custom", - "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] + "messages": [{"role": "user", "content": "Compose a haiku about cats"}] }') echo $result @@ -71,7 +80,7 @@ jobs: - run: docker system prune -a -f --volumes if: always() - - run: clean up output + - name: clean up output if: always() run: rm -rf _output diff --git a/.gitignore b/.gitignore index 9108c945..50b83935 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,5 @@ go.work bin coverage.txt + +_output diff --git a/Dockerfile b/Dockerfile index a310962e..bd99e0d4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,8 @@ -FROM golang:1.22-bookworm@sha256:925fe3fa28ba428cf67a7947ae838f8a1523117b40e3e6b5106c378e3f97fa29 as builder +FROM golang:1.22-bookworm@sha256:6699d2852712f090399ccd4e8dfd079b4d55376f3ab3aff5b2dc8b7b1c11e27e as builder +ARG LDFLAGS COPY . /go/src/github.com/sozercan/aikit WORKDIR /go/src/github.com/sozercan/aikit -RUN CGO_ENABLED=0 go build -o /aikit --ldflags '-extldflags "-static"' ./cmd/frontend +RUN CGO_ENABLED=0 go build -o /aikit -ldflags "${LDFLAGS} -extldflags '-static'" ./cmd/frontend FROM scratch COPY --from=builder /aikit /bin/aikit diff --git a/Makefile b/Makefile index bb5005c1..193ee820 100644 --- a/Makefile +++ b/Makefile @@ -6,13 +6,17 @@ OUTPUT_TYPE ?= type=docker TEST_IMAGE_NAME ?= testmodel TEST_FILE ?= test/aikitfile-llama.yaml +GIT_COMMIT := $(shell git rev-list --abbrev-commit --tags --max-count=1) +GIT_TAG := $(shell git describe --abbrev=0 --tags ${GIT_COMMIT} 2>/dev/null || true) +LDFLAGS := "-X github.com/sozercan/aikit/pkg/version.Version=$(GIT_TAG:%=%)" + .PHONY: lint lint: golangci-lint run -v ./... --timeout 5m .PHONY: build-aikit build-aikit: - docker buildx build . -t ${REGISTRY}/aikit:${TAG} --output=${OUTPUT_TYPE} + docker buildx build . -t ${REGISTRY}/aikit:${TAG} --output=${OUTPUT_TYPE} --build-arg LDFLAGS=${LDFLAGS} .PHONY: build-test-model build-test-model: diff --git a/demo/demo.sh b/demo/demo.sh new file mode 100755 index 00000000..932a2261 --- /dev/null +++ b/demo/demo.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +. third_party/demo-magic/demo-magic.sh + +clear +DEMO_PROMPT="${GREEN}โžœ ${COLOR_RESET}" + +echo "โœจ In this demo, we are going to start by fine tuning a model and then deploy the model as a minimal container!" + +echo "" + +echo "๐Ÿ‘ทโ€ First, we are going to create a new builder" + +echo "" + +pei "docker buildx create --name aikit-builder --use --buildkitd-flags '--allow-insecure-entitlement security.insecure'" + +echo "" + +echo "๐Ÿ—ƒ๏ธ Create a configuration for the fine tuning. We are going to be using a Mistral model and fine tune using OpenHermes dataset." + +cat > aikit-finetune.yaml << EOF +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +baseModel: unsloth/mistral-7b-instruct-v0.2-bnb-4bit +datasets: + - source: "teknium/openhermes" + type: "alpaca" +config: + unsloth: +EOF + +echo "" + +pei "bat aikit-finetune.yaml" + +echo "" + +echo "๐ŸŽต Starting the fine tuning process using the above configuration file, and output fine tuned model will be saved in _output folder." + +echo "" + +pei "docker buildx build --allow security.insecure --file 'aikit-finetune.yaml' --output '_output' --target unsloth --progress plain ." + +echo "" + +echo "โœ… We have finished fine tuning the model. Let's look at the output..." + +echo "" + +pei "ls -al _output" + +echo "" + +echo "๐Ÿ“ฆ Now that we have a fine tuned model. We can deploy it as a minimal container." + +echo "" + +echo "๐Ÿ“ƒ We'll start by creating a basic inference configuration file for the deployment." + +cat > aikit-inference.yaml << EOF +#syntax=ghcr.io/sozercan/aikit:latest +debug: true +apiVersion: v1alpha1 +runtime: cuda +models: + - name: mistral-finetuned + source: aikit-model-q4_k_m.gguf + promptTemplates: + - name: instruct + template: | + Below is an instruction that describes a task. Write a response that appropriately completes the request. Keep your responses concise. + + ### Instruction: + {{.Input}} + + ### Response: +config: | + - name: mistral-finetuned + parameters: + model: aikit-model-q4_k_m.gguf + context_size: 4096 + gpu_layers: 35 + f16: true + batch: 512 + mmap: true + template: + chat: instruct +EOF + +pei "bat aikit-inference.yaml" + +echo "" + +echo "๐Ÿ—๏ธ We can now build a minimal container for the model using the configuration file." + +echo "" + +pei "docker buildx build -t mistral-finetuned -f aikit-inference.yaml --load --progress plain _output" + +echo "" + +echo "๐Ÿƒ We have finished building the minimal container. Let's start the container and test it." + +echo "" + +pei "docker run --name mistral-finetuned -d --rm -p 8080:8080 --gpus all mistral-finetuned" + +echo "" + +echo "๐Ÿงช We can now test the container using a sample query. Since this is OpenAI API compatible, you can use it as a drop-in replacement for any application that uses OpenAI API." + +echo "" + +pei "curl http://localhost:8080/v1/chat/completions -H \"Content-Type: application/json\" -d '{\"model\": \"mistral-finetuned\", \"messages\": [{\"role\": \"user\", \"content\": \"Generate a list of 10 words that start with ab\"}]}'" + +echo "" + +echo "๐Ÿ™Œ We have successfully deployed the fine tuned model as a minimal container and successfully verified it! We can now stop the container if we wish." + +echo "" + +pei "docker stop mistral-finetuned" + +echo "" + +echo "โค๏ธ In this demo, we have shown how to fine tune a model and deploy it as a minimal container using AIKit. Thank you for watching!" + +echo "" + +# pei "docker buildx rm aikit-builder" diff --git a/demo/third_party/demo-magic/README.md b/demo/third_party/demo-magic/README.md new file mode 100644 index 00000000..2101058a --- /dev/null +++ b/demo/third_party/demo-magic/README.md @@ -0,0 +1,148 @@ +# Demo Magic + +demo-magic.sh is a handy shell script that enables you to script repeatable demos in a bash environment so you don't have to type as you present. Rather than trying to type commands when presenting you simply script them and let demo-magic.sh run them for you. + +## Features +- Simulates typing. It looks like you are actually typing out commands +- Allows you to actually run commands or pretend to do so. +- Can hide commands from presentation. Useful for behind the scenes stuff that doesn't need to be shown. + +## Functions + +### pe +Print and Execute. This function will simulate typing whatever you give it. It will then pause until you press ENTER. After your keypress it will run the command. + +```bash +#!/bin/bash + +pe "ls -l" +``` + +### p +Print only. This function will simulate typing whatever you give it. It will not run the command. After typing it will pause until you press ENTER. After your keypress it will move on to the next instruction in your script. + +```bash +#!/bin/bash + +p "ls -l" +``` + +### wait +Waits for the user to press ENTER. + +If `PROMPT_TIMEOUT` is defined and > 0 the demo will automatically proceed after the amount of seconds has passed. + +```bash +#!/bin/bash + +# Will wait until user presses enter +PROMPT_TIMEOUT=0 +wait + +# Will wait max 5 seconds until user presses +PROMPT_TIMEOUT=5 +wait + +``` + +### cmd +Enters script into interactive mode and allows newly typed commands to be executed within the script +``` +#!/bin/bash + +cmd +``` + +## Getting Started +Create a shell script and include demo-magic.sh + +```bash +#!/bin/bash + +######################## +# include the magic +######################## +. demo-magic.sh + +# hide the evidence +clear + +# Put your stuff here +``` + +Then use the handy functions to run through your demo. + +## Command line usage +demo-magic.sh exposes 3 options out of the box to your script. +- `-d` - disable simulated typing. Useful for debugging +- `-h` - prints the usage text +- `-n` - set no default waiting after `p` and `pe` functions +- `-w` - set no wait timeout after `p` and `pe` functions + +```bash +$ ./my-demo.sh -h + +Usage: ./my-demo.sh [options] + + Where options is one or more of: + -h Prints Help text + -d Debug mode. Disables simulated typing + -n No wait + -w Waits max the given amount of seconds before proceeding with demo (e.g. `-w5`) +``` + +## Useful Tricks + +### Faking network connections +Network connections during demos are often unreliable. Try and fake whatever commands would rely on a network connection. For example: Instead of trying to install node modules in a node.js application you can fake it. You can install the node_modules at home on your decent network. Then rename the directory and pretend to install it later by symlinking. If you want to be thorough you can capture the output of npm install into a log file then cat it out later to simulate the install. + +```bash +#!/bin/bash + +######################## +# include the magic +######################## +. demo-magic.sh + +# hide the evidence +clear + +# this command is typed and executed +pe "cd my-app" + +# this command is merely typed. Not executed +p "npm install" + +# this command runs behind the scenes +ln -s cached_node_modules node_modules + +# cat out a log file that captures a previous successful node modules install +cat node-modules-install.log + +# now type and run the command to start your app +pe "node index.js" +``` + +### No waiting +The -n _no wait_ option can be useful if you want to print and execute multiple commands. + +```bash +# include demo-magic +. demo-magic.sh -n + +# add multiple commands +pe 'git status' +pe 'git log --oneline --decorate -n 20' +``` + +However this will oblige you to define your waiting points manually e.g. +```bash +... +# define waiting points +pe 'git status' +pe 'git log --oneline --decorate -n 20' +wait +pe 'git pull' +pe 'git log --oneline --decorate -n 20' +wait +``` diff --git a/demo/third_party/demo-magic/demo-magic.sh b/demo/third_party/demo-magic/demo-magic.sh new file mode 100644 index 00000000..9120cabe --- /dev/null +++ b/demo/third_party/demo-magic/demo-magic.sh @@ -0,0 +1,228 @@ +#!/usr/bin/env bash + +############################################################################### +# +# demo-magic.sh +# +# Copyright (c) 2015-2022 Paxton Hare +# +# This script lets you script demos in bash. It runs through your demo script +# when you press ENTER. It simulates typing and runs commands. +# +############################################################################### + +# the speed to simulate typing the text +TYPE_SPEED=20 + +# no wait after "p" or "pe" +NO_WAIT=false + +# if > 0, will pause for this amount of seconds before automatically proceeding with any p or pe +PROMPT_TIMEOUT=0 + +# don't show command number unless user specifies it +SHOW_CMD_NUMS=false + + +# handy color vars for pretty prompts +BLACK="\033[0;30m" +BLUE="\033[0;34m" +GREEN="\033[0;32m" +GREY="\033[0;90m" +CYAN="\033[0;36m" +RED="\033[0;31m" +PURPLE="\033[0;35m" +BROWN="\033[0;33m" +WHITE="\033[0;37m" +BOLD="\033[1m" +COLOR_RESET="\033[0m" + +C_NUM=0 + +# prompt and command color which can be overriden +DEMO_PROMPT="$ " +DEMO_CMD_COLOR=$BOLD +DEMO_COMMENT_COLOR=$GREY + +## +# prints the script usage +## +function usage() { + echo -e "" + echo -e "Usage: $0 [options]" + echo -e "" + echo -e " Where options is one or more of:" + echo -e " -h Prints Help text" + echo -e " -d Debug mode. Disables simulated typing" + echo -e " -n No wait" + echo -e " -w Waits max the given amount of seconds before " + echo -e " proceeding with demo (e.g. '-w5')" + echo -e "" +} + +## +# wait for user to press ENTER +# if $PROMPT_TIMEOUT > 0 this will be used as the max time for proceeding automatically +## +function wait() { + if [[ "$PROMPT_TIMEOUT" == "0" ]]; then + read -rs + else + read -rst "$PROMPT_TIMEOUT" + fi +} + +## +# print command only. Useful for when you want to pretend to run a command +# +# takes 1 param - the string command to print +# +# usage: p "ls -l" +# +## +function p() { + if [[ ${1:0:1} == "#" ]]; then + cmd=$DEMO_COMMENT_COLOR$1$COLOR_RESET + else + cmd=$DEMO_CMD_COLOR$1$COLOR_RESET + fi + + # render the prompt + x=$(PS1="$DEMO_PROMPT" "$BASH" --norc -i &1 | sed -n '${s/^\(.*\)exit$/\1/p;}') + + # show command number is selected + if $SHOW_CMD_NUMS; then + printf "[$((++C_NUM))] $x" + else + printf "$x" + fi + + # wait for the user to press a key before typing the command + if [ $NO_WAIT = false ]; then + wait + fi + + if [[ -z $TYPE_SPEED ]]; then + echo -en "$cmd" + else + echo -en "$cmd" | pv -qL $[$TYPE_SPEED+(-2 + RANDOM%5)]; + fi + + # wait for the user to press a key before moving on + if [ $NO_WAIT = false ]; then + wait + fi + echo "" +} + +## +# Prints and executes a command +# +# takes 1 parameter - the string command to run +# +# usage: pe "ls -l" +# +## +function pe() { + # print the command + p "$@" + run_cmd "$@" +} + +## +# print and executes a command immediately +# +# takes 1 parameter - the string command to run +# +# usage: pei "ls -l" +# +## +function pei { + NO_WAIT=true pe "$@" +} + +## +# Enters script into interactive mode +# +# and allows newly typed commands to be executed within the script +# +# usage : cmd +# +## +function cmd() { + # render the prompt + x=$(PS1="$DEMO_PROMPT" "$BASH" --norc -i &1 | sed -n '${s/^\(.*\)exit$/\1/p;}') + printf "$x\033[0m" + read command + run_cmd "${command}" +} + +function run_cmd() { + function handle_cancel() { + printf "" + } + + trap handle_cancel SIGINT + stty -echoctl + eval $@ + stty echoctl + trap - SIGINT +} + + +function check_pv() { + command -v pv >/dev/null 2>&1 || { + + echo "" + echo -e "${RED}##############################################################" + echo "# HOLD IT!! I require pv for simulated typing but it's " >&2 + echo "# not installed. Aborting." >&2; + echo -e "${RED}##############################################################" + echo "" + echo -e "${COLOR_RESET}Disable simulated typing: " + echo "" + echo -e " unset TYPE_SPEED" + echo "" + echo "Installing pv:" + echo "" + echo " Mac: $ brew install pv" + echo "" + echo " Other: https://www.ivarch.com/programs/pv.shtml" + echo "" + exit 1; + } +} + +# +# handle some default params +# -h for help +# -d for disabling simulated typing +# +while getopts ":dhncw:" opt; do + case $opt in + h) + usage + exit 1 + ;; + d) + unset TYPE_SPEED + ;; + n) + NO_WAIT=true + ;; + c) + SHOW_CMD_NUMS=true + ;; + w) + PROMPT_TIMEOUT=$OPTARG + ;; + esac +done + +## +# Do not check for pv. This trusts the user to not set TYPE_SPEED later in the +# demo in which case an error will occur if pv is not installed. +## +if [[ -n "$TYPE_SPEED" ]]; then + check_pv +fi diff --git a/demo/third_party/demo-magic/license.txt b/demo/third_party/demo-magic/license.txt new file mode 100644 index 00000000..1f1f232e --- /dev/null +++ b/demo/third_party/demo-magic/license.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Paxton Hare + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/go.mod b/go.mod index e3c1f926..b503f257 100644 --- a/go.mod +++ b/go.mod @@ -4,12 +4,12 @@ go 1.21 require ( github.com/containerd/containerd v1.7.13 - github.com/moby/buildkit v0.13.0-rc3 + github.com/moby/buildkit v0.13.0 github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.1.0 github.com/pkg/errors v0.9.1 github.com/sirupsen/logrus v1.9.3 - google.golang.org/grpc v1.62.0 + google.golang.org/grpc v1.62.1 gopkg.in/yaml.v2 v2.4.0 ) @@ -40,7 +40,7 @@ require ( github.com/moby/sys/signal v0.7.0 // indirect github.com/secure-systems-lab/go-securesystemslib v0.8.0 // indirect github.com/shibumi/go-pathspec v1.3.0 // indirect - github.com/tonistiigi/fsutil v0.0.0-20240223190444-7a889f53dbf6 // indirect + github.com/tonistiigi/fsutil v0.0.0-20240301111122-7525a1af2bb5 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.48.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.48.0 // indirect go.opentelemetry.io/otel v1.23.1 // indirect diff --git a/go.sum b/go.sum index 6ab83433..299666c9 100644 --- a/go.sum +++ b/go.sum @@ -90,8 +90,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/moby/buildkit v0.13.0-rc3 h1:Dns5Ixnv4OH1MyLZy7S4G2m+MLwp2YEqrPPzEzfw7Tw= -github.com/moby/buildkit v0.13.0-rc3/go.mod h1:5pRtk7Wuv929XRIp9tqPdq07mrnBpXAUoOYYfOj0nhA= +github.com/moby/buildkit v0.13.0 h1:reVR1Y+rbNIUQ9jf0Q1YZVH5a/nhOixZsl+HJ9qQEGI= +github.com/moby/buildkit v0.13.0/go.mod h1:aNmNQKLBFYAOFuzQjR3VA27/FijlvtBD1pjNwTSN37k= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg= @@ -130,8 +130,8 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/tonistiigi/fsutil v0.0.0-20240223190444-7a889f53dbf6 h1:v9u6pmdUkarXL/1S/6LGcG9wsiBLd9N/WyJq/Y9WPcg= -github.com/tonistiigi/fsutil v0.0.0-20240223190444-7a889f53dbf6/go.mod h1:vbbYqJlnswsbJqWUcJN8fKtBhnEgldDrcagTgnBVKKM= +github.com/tonistiigi/fsutil v0.0.0-20240301111122-7525a1af2bb5 h1:oZS8KCqAg62sxJkEq/Ppzqrb6EooqzWtL8Oaex7bc5c= +github.com/tonistiigi/fsutil v0.0.0-20240301111122-7525a1af2bb5/go.mod h1:vbbYqJlnswsbJqWUcJN8fKtBhnEgldDrcagTgnBVKKM= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= @@ -228,8 +228,8 @@ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyac google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= -google.golang.org/grpc v1.62.0 h1:HQKZ/fa1bXkX1oFOvSjmZEUL8wLSaZTjCcLAlmZRtdk= -google.golang.org/grpc v1.62.0/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE= +google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk= +google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= diff --git a/pkg/aikit/config/finetune_specs.go b/pkg/aikit/config/finetune_specs.go new file mode 100644 index 00000000..26148de1 --- /dev/null +++ b/pkg/aikit/config/finetune_specs.go @@ -0,0 +1,40 @@ +package config + +type FineTuneConfig struct { + APIVersion string `yaml:"apiVersion"` + Target string `yaml:"target"` + BaseModel string `yaml:"baseModel"` + Datasets []Dataset `yaml:"datasets"` + Config FineTuneConfigSpec `yaml:"config"` + Output FineTuneOutputSpec `yaml:"output"` +} + +type FineTuneConfigSpec struct { + Unsloth FineTuneConfigUnslothSpec `yaml:"unsloth"` +} + +type Dataset struct { + Source string `yaml:"source"` + Type string `yaml:"type"` +} + +type FineTuneConfigUnslothSpec struct { + Packing bool `yaml:"packing"` + MaxSeqLength int `yaml:"maxSeqLength"` + LoadIn4bit bool `yaml:"loadIn4bit"` + BatchSize int `yaml:"batchSize"` + GradientAccumulationSteps int `yaml:"gradientAccumulationSteps"` + WarmupSteps int `yaml:"warmupSteps"` + MaxSteps int `yaml:"maxSteps"` + LearningRate float64 `yaml:"learningRate"` + LoggingSteps int `yaml:"loggingSteps"` + Optimizer string `yaml:"optimizer"` + WeightDecay float64 `yaml:"weightDecay"` + LrSchedulerType string `yaml:"lrSchedulerType"` + Seed int `yaml:"seed"` +} + +type FineTuneOutputSpec struct { + Quantize string `yaml:"quantize"` + Name string `yaml:"name"` +} diff --git a/pkg/aikit/config/inference_specs.go b/pkg/aikit/config/inference_specs.go new file mode 100644 index 00000000..dea66810 --- /dev/null +++ b/pkg/aikit/config/inference_specs.go @@ -0,0 +1,22 @@ +package config + +type InferenceConfig struct { + APIVersion string `yaml:"apiVersion"` + Debug bool `yaml:"debug"` + Runtime string `yaml:"runtime"` + Backends []string `yaml:"backends"` + Models []Model `yaml:"models"` + Config string `yaml:"config"` +} + +type Model struct { + Name string `yaml:"name"` + Source string `yaml:"source"` + SHA256 string `yaml:"sha256"` + PromptTemplates []PromptTemplate `yaml:"promptTemplates"` +} + +type PromptTemplate struct { + Name string `yaml:"name"` + Template string `yaml:"template"` +} diff --git a/pkg/aikit/config/specs.go b/pkg/aikit/config/specs.go index af769fd0..91507538 100644 --- a/pkg/aikit/config/specs.go +++ b/pkg/aikit/config/specs.go @@ -5,31 +5,19 @@ import ( yaml "gopkg.in/yaml.v2" ) -func NewFromBytes(b []byte) (*Config, error) { - c := &Config{} - if err := yaml.Unmarshal(b, c); err != nil { - return nil, errors.Wrap(err, "unmarshal config") +func NewFromBytes(b []byte) (*InferenceConfig, *FineTuneConfig, error) { + inferenceConfig := &InferenceConfig{} + fineTuneConfig := &FineTuneConfig{} + var err error + err = yaml.Unmarshal(b, inferenceConfig) + if err == nil { + return inferenceConfig, nil, nil } - return c, nil -} - -type Config struct { - APIVersion string `yaml:"apiVersion"` - Debug bool `yaml:"debug,omitempty"` - Runtime string `yaml:"runtime,omitempty"` - Backends []string `yaml:"backends,omitempty"` - Models []Model `yaml:"models"` - Config string `yaml:"config,omitempty"` -} -type Model struct { - Name string `yaml:"name"` - Source string `yaml:"source"` - SHA256 string `yaml:"sha256,omitempty"` - PromptTemplates []PromptTemplate `yaml:"promptTemplates,omitempty"` -} + err = yaml.Unmarshal(b, fineTuneConfig) + if err == nil { + return nil, fineTuneConfig, nil + } -type PromptTemplate struct { - Name string `yaml:"name,omitempty"` - Template string `yaml:"template,omitempty"` + return nil, nil, errors.Wrap(err, "unmarshal config") } diff --git a/pkg/aikit/config/specs_test.go b/pkg/aikit/config/specs_test.go index 3daf88a3..0e434f24 100644 --- a/pkg/aikit/config/specs_test.go +++ b/pkg/aikit/config/specs_test.go @@ -14,7 +14,7 @@ func TestNewFromBytes(t *testing.T) { tests := []struct { name string args args - want *Config + want *InferenceConfig wantErr bool }{ { @@ -29,7 +29,7 @@ models: - name: test source: foo `)}, - want: &Config{ + want: &InferenceConfig{ APIVersion: utils.APIv1alpha1, Runtime: utils.RuntimeCPUAVX512, Backends: []string{ @@ -56,13 +56,13 @@ foo } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := NewFromBytes(tt.args.b) + infCfg, _, err := NewFromBytes(tt.args.b) if (err != nil) != tt.wantErr { t.Errorf("NewFromBytes() error = %v, wantErr %v", err, tt.wantErr) return } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("NewFromBytes() = %v, want %v", got, tt.want) + if !reflect.DeepEqual(infCfg, tt.want) { + t.Errorf("NewFromBytes() = %v, want %v", infCfg, tt.want) } }) } diff --git a/pkg/aikit2llb/finetune/convert.go b/pkg/aikit2llb/finetune/convert.go new file mode 100644 index 00000000..de67c938 --- /dev/null +++ b/pkg/aikit2llb/finetune/convert.go @@ -0,0 +1,76 @@ +package finetune + +import ( + "fmt" + + "github.com/moby/buildkit/client/llb" + "github.com/moby/buildkit/util/system" + "github.com/sozercan/aikit/pkg/aikit/config" + "github.com/sozercan/aikit/pkg/utils" + "gopkg.in/yaml.v2" +) + +const ( + // https://github.com/unslothai/unsloth/releases/tag/February-2024 + unslothCommitSHA = "dbba69b085b9d6049b57b48b882af7e9f29df5b2" + nvidiaMknod = "mknod --mode 666 /dev/nvidiactl c 195 255 && mknod --mode 666 /dev/nvidia-uvm c 235 0 && mknod --mode 666 /dev/nvidia-uvm-tools c 235 1 && mknod --mode 666 /dev/nvidia0 c 195 0 && nvidia-smi" + sourceVenv = ". .venv/bin/activate" +) + +func Aikit2LLB(c *config.FineTuneConfig) llb.State { + env := map[string]string{ + "PATH": system.DefaultPathEnv("linux") + ":/usr/local/cuda/bin", + "NVIDIA_REQUIRE_CUDA": "cuda>=12.0", + "NVIDIA_DRIVER_CAPABILITIES": "compute,utility", + "NVIDIA_VISIBLE_DEVICES": "all", + "LD_LIBRARY_PATH": "/usr/local/cuda/lib64", + } + + state := llb.Image(utils.CudaDevel) + for k, v := range env { + state = state.AddEnv(k, v) + } + + // installing dependencies + // due to buildkit run limitations, we need to install nvidia drivers and driver version must match the host + state = state.Run(utils.Sh("apt-get update && apt-get install -y --no-install-recommends python3-dev python3 python3-pip python-is-python3 git wget kmod && cd /root && VERSION=$(cat /proc/driver/nvidia/version | sed -n 's/.*NVIDIA UNIX x86_64 Kernel Module \\([0-9]\\+\\.[0-9]\\+\\.[0-9]\\+\\).*/\\1/p') && wget --no-verbose https://download.nvidia.com/XFree86/Linux-x86_64/$VERSION/NVIDIA-Linux-x86_64-$VERSION.run && chmod +x NVIDIA-Linux-x86_64-$VERSION.run && ./NVIDIA-Linux-x86_64-$VERSION.run -x && rm NVIDIA-Linux-x86_64-$VERSION.run && /root/NVIDIA-Linux-x86_64-$VERSION/nvidia-installer -a -s --skip-depmod --no-dkms --no-nvidia-modprobe --no-questions --no-systemd --no-x-check --no-kernel-modules --no-kernel-module-source && rm -rf /root/NVIDIA-Linux-x86_64-$VERSION")).Root() + + // write config to /config.yaml + cfg, err := yaml.Marshal(c) + if err != nil { + panic(err) + } + state = state.Run(utils.Shf("echo -n \"%s\" > /config.yaml", string(cfg))).Root() + + var scratch llb.State + if c.Target == utils.TargetUnsloth { + // installing unsloth and its dependencies + // uv does not support installing xformers via unsloth pyproject + state = state.Run(utils.Shf("pip install --upgrade pip uv && uv venv --system-site-packages && %[1]s && uv pip install packaging torch==2.1.0 ipython ninja packaging bitsandbytes setuptools wheel psutil && uv pip install flash-attn --no-build-isolation && python -m pip install 'unsloth[cu121_ampere] @ git+https://github.com/unslothai/unsloth.git@%[2]s'", sourceVenv, unslothCommitSHA)).Root() + + // TODO: uncomment before release + // version := version.Version + // unslothScriptURL := fmt.Sprintf("https://raw.githubusercontent.com/sozercan/aikit/%s/pkg/finetune/target_unsloth.py", version) + unslothScriptURL := "https://raw.githubusercontent.com/sozercan/aikit/finetune/pkg/finetune/target_unsloth.py" + var opts []llb.HTTPOption + opts = append(opts, llb.Chmod(0o755)) + unslothScript := llb.HTTP(unslothScriptURL, opts...) + state = state.File( + llb.Copy(unslothScript, utils.FileNameFromURL(unslothScriptURL), "/"), + llb.WithCustomName("Copying "+utils.FileNameFromURL(unslothScriptURL)), + ) + + // setup nvidia devices and run unsloth + // due to buildkit run limitations, we need to create the devices manually and run unsloth in the same command + state = state.Run(utils.Shf("%[1]s && %[2]s && python -m target_unsloth", nvidiaMknod, sourceVenv), llb.Security(llb.SecurityModeInsecure)).Root() + + // copy gguf to scratch which will be the output + const inputFile = "*.gguf" + copyOpts := []llb.CopyOption{} + copyOpts = append(copyOpts, &llb.CopyInfo{AllowWildcard: true}) + outputFile := fmt.Sprintf("%s-%s.gguf", c.Output.Name, c.Output.Quantize) + scratch = llb.Scratch().File(llb.Copy(state, inputFile, outputFile, copyOpts...)) + } + + return scratch +} diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/inference/convert.go similarity index 51% rename from pkg/aikit2llb/convert.go rename to pkg/aikit2llb/inference/convert.go index 69790e77..2e490d41 100644 --- a/pkg/aikit2llb/convert.go +++ b/pkg/aikit2llb/inference/convert.go @@ -1,4 +1,4 @@ -package aikit2llb +package inference import ( "fmt" @@ -14,7 +14,6 @@ import ( ) const ( - debianSlim = "docker.io/library/debian:12-slim" distrolessBase = "gcr.io/distroless/cc-debian12:latest" localAIVersion = "v2.9.0" @@ -22,9 +21,9 @@ const ( cudaVersion = "12-3" ) -func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) { +func Aikit2LLB(c *config.InferenceConfig) (llb.State, *specs.Image) { var merge llb.State - state := llb.Image(debianSlim) + state := llb.Image(utils.DebianSlim) base := getBaseImage(c) state, merge = copyModels(c, base, state) @@ -52,52 +51,65 @@ func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) { return merge, imageCfg } -func getBaseImage(c *config.Config) llb.State { +func getBaseImage(c *config.InferenceConfig) llb.State { if len(c.Backends) > 0 { - return llb.Image(debianSlim) + return llb.Image(utils.DebianSlim) } return llb.Image(distrolessBase) } -func copyModels(c *config.Config, base llb.State, s llb.State) (llb.State, llb.State) { +func copyModels(c *config.InferenceConfig, base llb.State, s llb.State) (llb.State, llb.State) { savedState := s for _, model := range c.Models { - var opts []llb.HTTPOption - opts = append(opts, llb.Filename(fileNameFromURL(model.Source))) - if model.SHA256 != "" { - digest := digest.NewDigestFromEncoded(digest.SHA256, model.SHA256) - opts = append(opts, llb.Checksum(digest)) - } + // check if model source is a URL or a local path + _, err := url.ParseRequestURI(model.Source) + if err == nil { + var opts []llb.HTTPOption + opts = append(opts, llb.Filename(utils.FileNameFromURL(model.Source))) + if model.SHA256 != "" { + digest := digest.NewDigestFromEncoded(digest.SHA256, model.SHA256) + opts = append(opts, llb.Checksum(digest)) + } - m := llb.HTTP(model.Source, opts...) + m := llb.HTTP(model.Source, opts...) + + var modelPath string + if strings.Contains(model.Name, "/") { + modelPath = "/models/" + path.Dir(model.Name) + "/" + utils.FileNameFromURL(model.Source) + } else { + modelPath = "/models/" + utils.FileNameFromURL(model.Source) + } - var modelPath string - if strings.Contains(model.Name, "/") { - modelPath = "/models/" + path.Dir(model.Name) + "/" + fileNameFromURL(model.Source) + var copyOpts []llb.CopyOption + copyOpts = append(copyOpts, &llb.CopyInfo{ + CreateDestPath: true, + }) + s = s.File( + llb.Copy(m, utils.FileNameFromURL(model.Source), modelPath, copyOpts...), + llb.WithCustomName("Copying "+utils.FileNameFromURL(model.Source)+" to "+modelPath), //nolint: goconst + ) } else { - modelPath = "/models/" + fileNameFromURL(model.Source) + var copyOpts []llb.CopyOption + copyOpts = append(copyOpts, &llb.CopyInfo{ + CreateDestPath: true, + }) + s = s.File( + llb.Copy(llb.Local("context"), model.Source, "/models/", copyOpts...), + llb.WithCustomName("Copying "+utils.FileNameFromURL(model.Source)+" to "+"/models"), //nolint: goconst + ) } - var copyOpts []llb.CopyOption - copyOpts = append(copyOpts, &llb.CopyInfo{ - CreateDestPath: true, - }) - s = s.File( - llb.Copy(m, fileNameFromURL(model.Source), modelPath, copyOpts...), - llb.WithCustomName("Copying "+fileNameFromURL(model.Source)+" to "+modelPath), //nolint: goconst - ) - // create prompt templates if defined for _, pt := range model.PromptTemplates { if pt.Name != "" && pt.Template != "" { - s = s.Run(shf("echo -n \"%s\" > /models/%s.tmpl", pt.Template, pt.Name)).Root() + s = s.Run(utils.Shf("echo -n \"%s\" > /models/%s.tmpl", pt.Template, pt.Name)).Root() } } } // create config file if defined if c.Config != "" { - s = s.Run(shf("echo -n \"%s\" > /config.yaml", c.Config)).Root() + s = s.Run(utils.Shf("echo -n \"%s\" > /config.yaml", c.Config)).Root() } diff := llb.Diff(savedState, s) @@ -105,34 +117,26 @@ func copyModels(c *config.Config, base llb.State, s llb.State) (llb.State, llb.S return s, merge } -func fileNameFromURL(urlString string) string { - parsedURL, err := url.Parse(urlString) - if err != nil { - panic(err) - } - return path.Base(parsedURL.Path) -} - -func installCuda(c *config.Config, s llb.State, merge llb.State) (llb.State, llb.State) { +func installCuda(c *config.InferenceConfig, s llb.State, merge llb.State) (llb.State, llb.State) { cudaKeyringURL := "https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb" cudaKeyring := llb.HTTP(cudaKeyringURL) s = s.File( - llb.Copy(cudaKeyring, fileNameFromURL(cudaKeyringURL), "/"), - llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst + llb.Copy(cudaKeyring, utils.FileNameFromURL(cudaKeyringURL), "/"), + llb.WithCustomName("Copying "+utils.FileNameFromURL(cudaKeyringURL)), //nolint: goconst ) - s = s.Run(sh("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root() + s = s.Run(utils.Sh("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root() savedState := s // running apt-get update twice due to nvidia repo - s = s.Run(sh("apt-get update && apt-get install -y ca-certificates && apt-get update"), llb.IgnoreCache).Root() + s = s.Run(utils.Sh("apt-get update && apt-get install -y ca-certificates && apt-get update"), llb.IgnoreCache).Root() // install cuda libraries if len(c.Backends) == 0 { - s = s.Run(shf("apt-get install -y --no-install-recommends libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root() + s = s.Run(utils.Shf("apt-get install -y --no-install-recommends libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root() // using a distroless base image here // convert debian package metadata status file to distroless status.d directory // clean up apt directories - s = s.Run(bashf("apt-get install -y --no-install-recommends libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean && mkdir -p /var/lib/dpkg/status.d && description_flag=false; while IFS= read -r line || [[ -n $line ]]; do if [[ $line == Package:* ]]; then pkg_name=$(echo $line | cut -d' ' -f2); elif [[ $line == Maintainer:* ]]; then maintainer=$(echo $line | cut -d' ' -f2-); if [[ $maintainer == 'cudatools ' ]]; then pkg_file=/var/lib/dpkg/status.d/${pkg_name}; echo 'Package: '$pkg_name > $pkg_file; echo $line >> $pkg_file; else pkg_file=''; fi; elif [[ -n $pkg_file ]]; then if [[ $line == Description:* ]]; then description_flag=true; elif [[ $line == '' ]]; then description_flag=false; elif ! $description_flag; then echo $line >> $pkg_file; fi; fi; done < /var/lib/dpkg/status && find /var/lib/dpkg -mindepth 1 ! -regex '^/var/lib/dpkg/status\\.d\\(/.*\\)?' -delete && rm -r /var/lib/apt", cudaVersion)).Root() + s = s.Run(utils.Bashf("apt-get install -y --no-install-recommends libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean && mkdir -p /var/lib/dpkg/status.d && description_flag=false; while IFS= read -r line || [[ -n $line ]]; do if [[ $line == Package:* ]]; then pkg_name=$(echo $line | cut -d' ' -f2); elif [[ $line == Maintainer:* ]]; then maintainer=$(echo $line | cut -d' ' -f2-); if [[ $maintainer == 'cudatools ' ]]; then pkg_file=/var/lib/dpkg/status.d/${pkg_name}; echo 'Package: '$pkg_name > $pkg_file; echo $line >> $pkg_file; else pkg_file=''; fi; elif [[ -n $pkg_file ]]; then if [[ $line == Description:* ]]; then description_flag=true; elif [[ $line == '' ]]; then description_flag=false; elif ! $description_flag; then echo $line >> $pkg_file; fi; fi; done < /var/lib/dpkg/status && find /var/lib/dpkg -mindepth 1 ! -regex '^/var/lib/dpkg/status\\.d\\(/.*\\)?' -delete && rm -r /var/lib/apt", cudaVersion)).Root() } // installing dev dependencies used for exllama @@ -144,12 +148,12 @@ func installCuda(c *config.Config, s llb.State, merge llb.State) (llb.State, llb } exllamaDeps := fmt.Sprintf("apt-get install -y --no-install-recommends cuda-cudart-dev-%[1]s cuda-crt-%[1]s libcusparse-dev-%[1]s libcublas-dev-%[1]s libcusolver-dev-%[1]s cuda-nvcc-%[1]s %[2]s && apt-get clean", cudaVersion, exllama2Dep) - s = s.Run(sh(exllamaDeps)).Root() + s = s.Run(utils.Sh(exllamaDeps)).Root() } if c.Backends[b] == utils.BackendMamba { mambaDeps := fmt.Sprintf("apt-get install -y --no-install-recommends cuda-crt-%[1]s cuda-cudart-dev-%[1]s cuda-nvcc-%[1]s && apt-get clean", cudaVersion) - s = s.Run(sh(mambaDeps)).Root() + s = s.Run(utils.Sh(mambaDeps)).Root() } } @@ -157,7 +161,7 @@ func installCuda(c *config.Config, s llb.State, merge llb.State) (llb.State, llb return s, llb.Merge([]llb.State{merge, diff}) } -func installExllama(c *config.Config, s llb.State, merge llb.State) llb.State { +func installExllama(c *config.InferenceConfig, s llb.State, merge llb.State) llb.State { backend := utils.BackendExllama exllamaRepo := "https://github.com/turboderp/exllama" exllamaTag := "master" @@ -170,13 +174,13 @@ func installExllama(c *config.Config, s llb.State, merge llb.State) llb.State { } savedState := s - s = s.Run(sh("apt-get update && apt-get install --no-install-recommends -y git ca-certificates python3-pip python3-dev g++ && apt-get clean"), llb.IgnoreCache).Root() + s = s.Run(utils.Sh("apt-get update && apt-get install --no-install-recommends -y git ca-certificates python3-pip python3-dev g++ && apt-get clean"), llb.IgnoreCache).Root() // clone localai exllama backend only s = cloneLocalAI(s, backend) // clone exllama to localai exllama backend path and install python dependencies - s = s.Run(shf("git clone --depth 1 %[1]s --branch %[2]s /tmp/%[3]s && mv /tmp/%[3]s/* /tmp/localai/backend/python/%[3]s && rm -rf /tmp/%[3]s && cd /tmp/localai/backend/python/%[3]s && rm -rf .git && pip3 install grpcio protobuf typing-extensions sympy mpmath setuptools numpy --break-system-packages && pip3 install -r /tmp/localai/backend/python/%[3]s/requirements.txt --break-system-packages", exllamaRepo, exllamaTag, backend)).Root() + s = s.Run(utils.Shf("git clone --depth 1 %[1]s --branch %[2]s /tmp/%[3]s && mv /tmp/%[3]s/* /tmp/localai/backend/python/%[3]s && rm -rf /tmp/%[3]s && cd /tmp/localai/backend/python/%[3]s && rm -rf .git && pip3 install grpcio protobuf typing-extensions sympy mpmath setuptools numpy --break-system-packages && pip3 install -r /tmp/localai/backend/python/%[3]s/requirements.txt --break-system-packages", exllamaRepo, exllamaTag, backend)).Root() diff := llb.Diff(savedState, s) return llb.Merge([]llb.State{merge, diff}) @@ -185,11 +189,11 @@ func installExllama(c *config.Config, s llb.State, merge llb.State) llb.State { func installMamba(s llb.State, merge llb.State) llb.State { savedState := s // libexpat1 is requirement but git is not. however libexpat1 is a dependency of git - s = s.Run(sh("apt-get install --no-install-recommends -y git python3 python3-dev python3-pip libssl3 openssl && apt-get clean"), llb.IgnoreCache).Root() + s = s.Run(utils.Sh("apt-get install --no-install-recommends -y git python3 python3-dev python3-pip libssl3 openssl && apt-get clean"), llb.IgnoreCache).Root() s = cloneLocalAI(s, utils.BackendMamba) - s = s.Run(shf("pip3 install packaging numpy torch==2.1.0 grpcio protobuf --break-system-packages && pip3 install causal-conv1d==1.0.0 mamba-ssm==1.0.1 --break-system-packages")).Root() + s = s.Run(utils.Shf("pip3 install packaging numpy torch==2.1.0 grpcio protobuf --break-system-packages && pip3 install causal-conv1d==1.0.0 mamba-ssm==1.0.1 --break-system-packages")).Root() diff := llb.Diff(savedState, s) return llb.Merge([]llb.State{merge, diff}) @@ -198,11 +202,11 @@ func installMamba(s llb.State, merge llb.State) llb.State { func installOpenCV(s llb.State, merge llb.State) llb.State { savedState := s // adding debian 11 (bullseye) repo due to opencv 4.5 requirement - s = s.Run(sh("echo 'deb http://deb.debian.org/debian bullseye main' | tee -a /etc/apt/sources.list")).Root() + s = s.Run(utils.Sh("echo 'deb http://deb.debian.org/debian bullseye main' | tee -a /etc/apt/sources.list")).Root() // pinning libdap packages to bullseye version due to symbol error libdapVersion := "3.20.7-6" libPath := "/usr/lib/x86_64-linux-gnu" - s = s.Run(shf("apt-get update && mkdir -p /tmp/generated/images && apt-get install -y libopencv-imgcodecs4.5 libgomp1 libdap27=%[1]s libdapclient6v5=%[1]s && apt-get clean && ln -s %[2]s/libopencv_core.so.4.5 %[2]s/libopencv_core.so.4.5d && ln -s %[2]s/libopencv_imgcodecs.so.4.5 %[2]s/libopencv_imgcodecs.so.4.5d", libdapVersion, libPath), llb.IgnoreCache).Root() + s = s.Run(utils.Shf("apt-get update && mkdir -p /tmp/generated/images && apt-get install -y libopencv-imgcodecs4.5 libgomp1 libdap27=%[1]s libdapclient6v5=%[1]s && apt-get clean && ln -s %[2]s/libopencv_core.so.4.5 %[2]s/libopencv_core.so.4.5d && ln -s %[2]s/libopencv_imgcodecs.so.4.5 %[2]s/libopencv_imgcodecs.so.4.5d", libdapVersion, libPath), llb.IgnoreCache).Root() diff := llb.Diff(savedState, s) merge = llb.Merge([]llb.State{merge, diff}) @@ -222,7 +226,7 @@ func installOpenCV(s llb.State, merge llb.State) llb.State { return merge } -func addLocalAI(c *config.Config, s llb.State, merge llb.State) (llb.State, llb.State) { +func addLocalAI(c *config.InferenceConfig, s llb.State, merge llb.State) (llb.State, llb.State) { savedState := s var localAIURL string switch c.Runtime { @@ -242,7 +246,7 @@ func addLocalAI(c *config.Config, s llb.State, merge llb.State) (llb.State, llb. localAI := llb.HTTP(localAIURL, opts...) s = s.File( llb.Copy(localAI, "local-ai", "/usr/bin/local-ai"), - llb.WithCustomName("Copying "+fileNameFromURL(localAIURL)+" to /usr/bin"), //nolint: goconst + llb.WithCustomName("Copying "+utils.FileNameFromURL(localAIURL)+" to /usr/bin"), //nolint: goconst ) diff := llb.Diff(savedState, s) @@ -250,17 +254,5 @@ func addLocalAI(c *config.Config, s llb.State, merge llb.State) (llb.State, llb. } func cloneLocalAI(s llb.State, backend string) llb.State { - return s.Run(shf("git clone --filter=blob:none --no-checkout %[1]s /tmp/localai/ && cd /tmp/localai && git sparse-checkout init --cone && git sparse-checkout set backend/python/%[2]s && git checkout %[3]s && rm -rf .git", localAIRepo, backend, localAIVersion)).Root() -} - -func shf(cmd string, v ...interface{}) llb.RunOption { - return llb.Args([]string{"/bin/sh", "-c", fmt.Sprintf(cmd, v...)}) -} - -func sh(cmd string) llb.RunOption { - return llb.Args([]string{"/bin/sh", "-c", cmd}) -} - -func bashf(cmd string, v ...interface{}) llb.RunOption { - return llb.Args([]string{"/bin/bash", "-c", fmt.Sprintf(cmd, v...)}) + return s.Run(utils.Shf("git clone --filter=blob:none --no-checkout %[1]s /tmp/localai/ && cd /tmp/localai && git sparse-checkout init --cone && git sparse-checkout set backend/python/%[2]s && git checkout %[3]s && rm -rf .git", localAIRepo, backend, localAIVersion)).Root() } diff --git a/pkg/aikit2llb/image.go b/pkg/aikit2llb/inference/image.go similarity index 92% rename from pkg/aikit2llb/image.go rename to pkg/aikit2llb/inference/image.go index 10d70140..61fe18c2 100644 --- a/pkg/aikit2llb/image.go +++ b/pkg/aikit2llb/inference/image.go @@ -1,4 +1,4 @@ -package aikit2llb +package inference import ( "github.com/moby/buildkit/util/system" @@ -7,7 +7,7 @@ import ( "github.com/sozercan/aikit/pkg/utils" ) -func NewImageConfig(c *config.Config) *specs.Image { +func NewImageConfig(c *config.InferenceConfig) *specs.Image { img := emptyImage(c) cmd := []string{} if c.Debug { @@ -22,7 +22,7 @@ func NewImageConfig(c *config.Config) *specs.Image { return img } -func emptyImage(c *config.Config) *specs.Image { +func emptyImage(c *config.InferenceConfig) *specs.Image { img := &specs.Image{ Platform: specs.Platform{ Architecture: "amd64", diff --git a/pkg/build/build.go b/pkg/build/build.go index f39926a3..864bf852 100644 --- a/pkg/build/build.go +++ b/pkg/build/build.go @@ -13,7 +13,8 @@ import ( "github.com/moby/buildkit/frontend/gateway/client" "github.com/pkg/errors" "github.com/sozercan/aikit/pkg/aikit/config" - "github.com/sozercan/aikit/pkg/aikit2llb" + "github.com/sozercan/aikit/pkg/aikit2llb/finetune" + "github.com/sozercan/aikit/pkg/aikit2llb/inference" "github.com/sozercan/aikit/pkg/utils" ) @@ -21,20 +22,59 @@ const ( LocalNameDockerfile = "dockerfile" keyFilename = "filename" defaultDockerfileName = "aikitfile.yaml" + target = "target" + output = "output" ) func Build(ctx context.Context, c client.Client) (*client.Result, error) { - cfg, err := getAikitfileConfig(ctx, c) + inferenceCfg, finetuneCfg, err := getAikitfileConfig(ctx, c) if err != nil { return nil, errors.Wrap(err, "getting aikitfile") } - err = validateConfig(cfg) + if finetuneCfg != nil { + return buildFineTune(ctx, c, finetuneCfg) + } else if inferenceCfg != nil { + return buildInference(ctx, c, inferenceCfg) + } + + return nil, nil +} + +func buildFineTune(ctx context.Context, c client.Client, cfg *config.FineTuneConfig) (*client.Result, error) { + err := validateFinetuneConfig(cfg) + if err != nil { + return nil, errors.Wrap(err, "validating aikitfile") + } + + // set defaults for unsloth and finetune config + if cfg.Target == utils.TargetUnsloth { + cfg = defaultsUnslothConfig(cfg) + } + cfg = defaultsFineTune(cfg) + + st := finetune.Aikit2LLB(cfg) + + def, err := st.Marshal(ctx) + if err != nil { + return nil, errors.Wrapf(err, "failed to marshal local source") + } + res, err := c.Solve(ctx, client.SolveRequest{ + Definition: def.ToPB(), + }) + if err != nil { + return nil, errors.Wrapf(err, "failed to resolve dockerfile") + } + return res, nil +} + +func buildInference(ctx context.Context, c client.Client, cfg *config.InferenceConfig) (*client.Result, error) { + err := validateInferenceConfig(cfg) if err != nil { return nil, errors.Wrap(err, "validating aikitfile") } - st, img := aikit2llb.Aikit2LLB(cfg) + st, img := inference.Aikit2LLB(cfg) def, err := st.Marshal(ctx) if err != nil { @@ -63,7 +103,7 @@ func Build(ctx context.Context, c client.Client) (*client.Result, error) { return res, nil } -func getAikitfileConfig(ctx context.Context, c client.Client) (*config.Config, error) { +func getAikitfileConfig(ctx context.Context, c client.Client) (*config.InferenceConfig, *config.FineTuneConfig, error) { opts := c.BuildOpts().Opts filename := opts[keyFilename] if filename == "" { @@ -84,7 +124,7 @@ func getAikitfileConfig(ctx context.Context, c client.Client) (*config.Config, e def, err := src.Marshal(ctx) if err != nil { - return nil, errors.Wrapf(err, "failed to marshal local source") + return nil, nil, errors.Wrapf(err, "failed to marshal local source") } var dtDockerfile []byte @@ -92,30 +132,120 @@ func getAikitfileConfig(ctx context.Context, c client.Client) (*config.Config, e Definition: def.ToPB(), }) if err != nil { - return nil, errors.Wrapf(err, "failed to resolve dockerfile") + return nil, nil, errors.Wrapf(err, "failed to resolve dockerfile") } ref, err := res.SingleRef() if err != nil { - return nil, err + return nil, nil, err } dtDockerfile, err = ref.ReadFile(ctx, client.ReadRequest{ Filename: filename, }) if err != nil { - return nil, errors.Wrapf(err, "failed to read dockerfile") + return nil, nil, errors.Wrapf(err, "failed to read dockerfile") } - cfg, err := config.NewFromBytes(dtDockerfile) + inferenceCfg, finetuneCfg, err := config.NewFromBytes(dtDockerfile) if err != nil { - return nil, errors.Wrap(err, "getting config") + return nil, nil, errors.Wrap(err, "getting config") + } + if finetuneCfg != nil { + target, ok := opts[target] + if !ok { + target = utils.TargetUnsloth + } + finetuneCfg.Target = target + + if opts[output] != "" { + return nil, nil, errors.New("--output is required for finetune. please specify a directory to save the finetuned model") + } + } + + return inferenceCfg, finetuneCfg, nil +} + +func validateFinetuneConfig(c *config.FineTuneConfig) error { + supportedFineTuneTargets := []string{utils.TargetUnsloth} + + if c.APIVersion == "" { + return errors.New("apiVersion is not defined") + } + + if c.APIVersion != utils.APIv1alpha1 { + return errors.Errorf("apiVersion %s is not supported", c.APIVersion) + } + + if !slices.Contains(supportedFineTuneTargets, c.Target) { + return errors.Errorf("target %s is not supported", c.Target) + } + + if len(c.Datasets) == 0 { + return errors.New("no datasets defined") + } + + if len(c.Datasets) > 1 { + return errors.New("only one dataset is supported at this time") + } + + // only alpaca dataset is supported at this time + for _, d := range c.Datasets { + if d.Type != utils.DatasetAlpaca { + return errors.Errorf("dataset type %s is not supported", d.Type) + } } + return nil +} - return cfg, nil +func defaultsUnslothConfig(c *config.FineTuneConfig) *config.FineTuneConfig { + if c.Config.Unsloth.MaxSeqLength == 0 { + c.Config.Unsloth.MaxSeqLength = 2048 + } + if c.Config.Unsloth.BatchSize == 0 { + c.Config.Unsloth.BatchSize = 2 + } + if c.Config.Unsloth.GradientAccumulationSteps == 0 { + c.Config.Unsloth.GradientAccumulationSteps = 4 + } + if c.Config.Unsloth.WarmupSteps == 0 { + c.Config.Unsloth.WarmupSteps = 10 + } + if c.Config.Unsloth.MaxSteps == 0 { + c.Config.Unsloth.MaxSteps = 60 + } + if c.Config.Unsloth.LearningRate == 0 { + c.Config.Unsloth.LearningRate = 0.0002 + } + if c.Config.Unsloth.LoggingSteps == 0 { + c.Config.Unsloth.LoggingSteps = 1 + } + if c.Config.Unsloth.Optimizer == "" { + c.Config.Unsloth.Optimizer = "adamw_8bit" + } + if c.Config.Unsloth.WeightDecay == 0 { + c.Config.Unsloth.WeightDecay = 0.01 + } + if c.Config.Unsloth.LrSchedulerType == "" { + c.Config.Unsloth.LrSchedulerType = "linear" + } + if c.Config.Unsloth.Seed == 0 { + c.Config.Unsloth.Seed = 42 + } + return c +} + +func defaultsFineTune(c *config.FineTuneConfig) *config.FineTuneConfig { + if c.Output.Quantize == "" { + c.Output.Quantize = "q4_k_m" + } + if c.Output.Name == "" { + c.Output.Name = "aikit-model" + } + return c } -func validateConfig(c *config.Config) error { +func validateInferenceConfig(c *config.InferenceConfig) error { if c.APIVersion == "" { return errors.New("apiVersion is not defined") } diff --git a/pkg/build/build_test.go b/pkg/build/build_test.go index ec3900ec..f92df040 100644 --- a/pkg/build/build_test.go +++ b/pkg/build/build_test.go @@ -1,6 +1,7 @@ package build import ( + "reflect" "testing" "github.com/sozercan/aikit/pkg/aikit/config" @@ -8,7 +9,7 @@ import ( func Test_validateConfig(t *testing.T) { type args struct { - c *config.Config + c *config.InferenceConfig } tests := []struct { name string @@ -17,19 +18,19 @@ func Test_validateConfig(t *testing.T) { }{ { name: "no config", - args: args{c: &config.Config{}}, + args: args{c: &config.InferenceConfig{}}, wantErr: true, }, { name: "unsupported api version", - args: args{c: &config.Config{ + args: args{c: &config.InferenceConfig{ APIVersion: "v10", }}, wantErr: true, }, { name: "invalid runtime", - args: args{c: &config.Config{ + args: args{c: &config.InferenceConfig{ APIVersion: "v1", Runtime: "foo", }}, @@ -37,14 +38,14 @@ func Test_validateConfig(t *testing.T) { }, { name: "no models", - args: args{c: &config.Config{ + args: args{c: &config.InferenceConfig{ APIVersion: "v1alpha1", }}, wantErr: true, }, { name: "valid backend", - args: args{c: &config.Config{ + args: args{c: &config.InferenceConfig{ APIVersion: "v1alpha1", Runtime: "cuda", Backends: []string{"exllama"}, @@ -59,7 +60,7 @@ func Test_validateConfig(t *testing.T) { }, { name: "invalid backend", - args: args{c: &config.Config{ + args: args{c: &config.InferenceConfig{ APIVersion: "v1alpha1", Backends: []string{"foo"}, Models: []config.Model{ @@ -73,7 +74,7 @@ func Test_validateConfig(t *testing.T) { }, { name: "valid backend but no cuda runtime", - args: args{c: &config.Config{ + args: args{c: &config.InferenceConfig{ APIVersion: "v1alpha1", Backends: []string{"exllama"}, Models: []config.Model{ @@ -87,7 +88,7 @@ func Test_validateConfig(t *testing.T) { }, { name: "invalid backend combination 1", - args: args{c: &config.Config{ + args: args{c: &config.InferenceConfig{ APIVersion: "v1alpha1", Runtime: "cuda", Backends: []string{"exllama", "exllama2"}, @@ -102,7 +103,7 @@ func Test_validateConfig(t *testing.T) { }, { name: "invalid backend combination 2", - args: args{c: &config.Config{ + args: args{c: &config.InferenceConfig{ APIVersion: "v1alpha1", Runtime: "cuda", Backends: []string{"exllama", "stablediffusion"}, @@ -118,9 +119,186 @@ func Test_validateConfig(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if err := validateConfig(tt.args.c); (err != nil) != tt.wantErr { + if err := validateInferenceConfig(tt.args.c); (err != nil) != tt.wantErr { t.Errorf("validateConfig() error = %v, wantErr %v", err, tt.wantErr) } }) } } + +func Test_validateFineTuneConfig(t *testing.T) { + type args struct { + c *config.FineTuneConfig + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "no config", + args: args{c: &config.FineTuneConfig{}}, + wantErr: true, + }, + { + name: "unsupported api version", + args: args{c: &config.FineTuneConfig{ + APIVersion: "v10", + }}, + wantErr: true, + }, + { + name: "invalid target", + args: args{c: &config.FineTuneConfig{ + APIVersion: "v1alpha1", + Target: "foo", + }}, + wantErr: true, + }, + { + name: "no datasets", + args: args{c: &config.FineTuneConfig{ + APIVersion: "v1alpha1", + Target: "unsloth", + }}, + wantErr: true, + }, + { + name: "invalid dataset type", + args: args{c: &config.FineTuneConfig{ + APIVersion: "v1alpha1", + Target: "unsloth", + Datasets: []config.Dataset{ + { + Source: "foo", + Type: "bar", + }, + }, + }}, + wantErr: true, + }, + { + name: "valid dataset type", + args: args{c: &config.FineTuneConfig{ + APIVersion: "v1alpha1", + Target: "unsloth", + Datasets: []config.Dataset{ + { + Source: "foo", + Type: "alpaca", + }, + }, + }}, + wantErr: false, + }, + { + name: "multiple datasets", + args: args{c: &config.FineTuneConfig{ + APIVersion: "v1alpha1", + Target: "unsloth", + Datasets: []config.Dataset{ + { + Source: "foo", + Type: "alpaca", + }, + { + Source: "bar", + Type: "alpaca", + }, + }, + }}, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := validateFinetuneConfig(tt.args.c); (err != nil) != tt.wantErr { + t.Errorf("validateFineTuneConfig() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func Test_defaultsUnslothConfig(t *testing.T) { + type args struct { + c *config.FineTuneConfig + } + tests := []struct { + name string + args args + want *config.FineTuneConfig + }{ + { + name: "no config", + args: args{c: &config.FineTuneConfig{}}, + want: &config.FineTuneConfig{ + Config: config.FineTuneConfigSpec{ + Unsloth: config.FineTuneConfigUnslothSpec{ + Packing: false, + MaxSeqLength: 2048, + LoadIn4bit: false, + BatchSize: 2, + GradientAccumulationSteps: 4, + WarmupSteps: 10, + MaxSteps: 60, + LearningRate: 0.0002, + LoggingSteps: 1, + Optimizer: "adamw_8bit", + WeightDecay: 0.01, + LrSchedulerType: "linear", + Seed: 42, + }, + }, + }, + }, + { + name: "with config", + args: args{c: &config.FineTuneConfig{ + Config: config.FineTuneConfigSpec{ + Unsloth: config.FineTuneConfigUnslothSpec{ + Packing: true, + MaxSeqLength: 1024, + LoadIn4bit: true, + BatchSize: 4, + GradientAccumulationSteps: 8, + WarmupSteps: 20, + MaxSteps: 120, + LearningRate: 0.0004, + LoggingSteps: 2, + Optimizer: "adamw_16bit", + WeightDecay: 0.02, + LrSchedulerType: "cosine", + Seed: 24, + }, + }, + }}, + want: &config.FineTuneConfig{ + Config: config.FineTuneConfigSpec{ + Unsloth: config.FineTuneConfigUnslothSpec{ + Packing: true, + MaxSeqLength: 1024, + LoadIn4bit: true, + BatchSize: 4, + GradientAccumulationSteps: 8, + WarmupSteps: 20, + MaxSteps: 120, + LearningRate: 0.0004, + LoggingSteps: 2, + Optimizer: "adamw_16bit", + WeightDecay: 0.02, + LrSchedulerType: "cosine", + Seed: 24, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + defaultsUnslothConfig(tt.args.c) + if !reflect.DeepEqual(tt.args.c, tt.want) { + t.Errorf("defaultsUnslothConfig() = %v, want %v", tt.args.c, tt.want) + } + }) + } +} diff --git a/pkg/finetune/target_unsloth.py b/pkg/finetune/target_unsloth.py new file mode 100644 index 00000000..0d01bbdf --- /dev/null +++ b/pkg/finetune/target_unsloth.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 + +from unsloth import FastLanguageModel +import torch +from trl import SFTTrainer +from transformers import TrainingArguments +from datasets import load_dataset +import yaml + +with open('config.yaml', 'r') as config_file: + try: + data = yaml.safe_load(config_file) + print(data) + except yaml.YAMLError as exc: + print(exc) + +cfg = data.get('config').get('unsloth') +max_seq_length = cfg.get('maxSeqLength') + +model, tokenizer = FastLanguageModel.from_pretrained( + model_name=data.get('baseModel'), + max_seq_length=max_seq_length, + dtype=None, + load_in_4bit=True, +) + +model = FastLanguageModel.get_peft_model( + model, + r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128 + target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", + "gate_proj", "up_proj", "down_proj",], + lora_alpha = 16, + lora_dropout = 0, # Supports any, but = 0 is optimized + bias = "none", # Supports any, but = "none" is optimized + use_gradient_checkpointing = True, + random_state = 3407, + use_rslora = False, # We support rank stabilized LoRA + loftq_config = None, # And LoftQ +) + +# TODO: right now, this is hardcoded for alpaca. use the dataset type here in the future to make this customizable +alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. + +### Instruction: +{} + +### Input: +{} + +### Response: +{}""" + +EOS_TOKEN = tokenizer.eos_token +def formatting_prompts_func(examples): + instructions = examples["instruction"] + inputs = examples["input"] + outputs = examples["output"] + texts = [] + for instruction, input, output in zip(instructions, inputs, outputs): + # Must add EOS_TOKEN, otherwise your generation will go on forever! + text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN + texts.append(text) + return { "text" : texts, } +pass + +from datasets import load_dataset +source = data.get('datasets')[0]['source'] + +if source.startswith('http'): + dataset = load_dataset("json", data_files={"train": source}, split="train") +else: + dataset = load_dataset(source, split = "train") + +dataset = dataset.map(formatting_prompts_func, batched = True) + +trainer = SFTTrainer( + model=model, + train_dataset=dataset, + dataset_text_field="text", + max_seq_length=max_seq_length, + tokenizer=tokenizer, + dataset_num_proc = 2, + packing = cfg.get('packing'), # Can make training 5x faster for short sequences. + args=TrainingArguments( + per_device_train_batch_size=cfg.get('batchSize'), + gradient_accumulation_steps=cfg.get('gradientAccumulationSteps'), + warmup_steps=cfg.get('warmupSteps'), + max_steps=cfg.get('maxSteps'), + learning_rate = cfg.get('learningRate'), + fp16=not torch.cuda.is_bf16_supported(), + bf16=torch.cuda.is_bf16_supported(), + logging_steps=cfg.get('loggingSteps'), + optim=cfg.get('optimizer'), + weight_decay = cfg.get('weightDecay'), + lr_scheduler_type = cfg.get('lrSchedulerType'), + seed=cfg.get('seed'), + output_dir="outputs", + ), +) +trainer.train() + +output = data.get('output') +model.save_pretrained_gguf(output.get('name'), tokenizer, + quantization_method=output.get('quantize')) diff --git a/pkg/utils/const.go b/pkg/utils/const.go index 47e961c2..8adb4cc9 100644 --- a/pkg/utils/const.go +++ b/pkg/utils/const.go @@ -11,5 +11,12 @@ const ( BackendExllamaV2 = "exllama2" BackendMamba = "mamba" + TargetUnsloth = "unsloth" + + DatasetAlpaca = "alpaca" + APIv1alpha1 = "v1alpha1" + + DebianSlim = "docker.io/library/debian:12-slim" + CudaDevel = "nvcr.io/nvidia/cuda:12.3.2-devel-ubuntu22.04" ) diff --git a/pkg/utils/util.go b/pkg/utils/util.go new file mode 100644 index 00000000..16fd4dd8 --- /dev/null +++ b/pkg/utils/util.go @@ -0,0 +1,29 @@ +package utils + +import ( + "fmt" + "net/url" + "path" + + "github.com/moby/buildkit/client/llb" +) + +func FileNameFromURL(urlString string) string { + parsedURL, err := url.Parse(urlString) + if err != nil { + panic(err) + } + return path.Base(parsedURL.Path) +} + +func Sh(cmd string) llb.RunOption { + return llb.Args([]string{"/bin/sh", "-c", cmd}) +} + +func Shf(cmd string, v ...interface{}) llb.RunOption { + return llb.Args([]string{"/bin/sh", "-c", fmt.Sprintf(cmd, v...)}) +} + +func Bashf(cmd string, v ...interface{}) llb.RunOption { + return llb.Args([]string{"/bin/bash", "-c", fmt.Sprintf(cmd, v...)}) +} diff --git a/pkg/aikit2llb/convert_test.go b/pkg/utils/util_test.go similarity index 74% rename from pkg/aikit2llb/convert_test.go rename to pkg/utils/util_test.go index 5ac4e8d1..7d035d36 100644 --- a/pkg/aikit2llb/convert_test.go +++ b/pkg/utils/util_test.go @@ -1,10 +1,10 @@ -package aikit2llb +package utils import ( "testing" ) -func Test_fileNameFromURL(t *testing.T) { +func Test_FileNameFromURL(t *testing.T) { type args struct { urlString string } @@ -31,8 +31,8 @@ func Test_fileNameFromURL(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := fileNameFromURL(tt.args.urlString); got != tt.want { - t.Errorf("fileNameFromURL() = %v, want %v", got, tt.want) + if got := FileNameFromURL(tt.args.urlString); got != tt.want { + t.Errorf("FileNameFromURL() = %v, want %v", got, tt.want) } }) } diff --git a/pkg/version/version.go b/pkg/version/version.go new file mode 100644 index 00000000..6fb8cbf4 --- /dev/null +++ b/pkg/version/version.go @@ -0,0 +1,4 @@ +package version + +// Version is the aikit version. +var Version string diff --git a/test/aikitfile-dev.yaml b/test/aikitfile-dev.yaml index 913b75d0..cd55c4b7 100644 --- a/test/aikitfile-dev.yaml +++ b/test/aikitfile-dev.yaml @@ -1,4 +1,4 @@ -#syntax=ghcr.io/sozercan/aikit:dev +#syntax=ghcr.io/sozercan/aikit/inference:dev apiVersion: v1alpha1 debug: true models: diff --git a/test/aikitfile-unsloth-custom.yaml b/test/aikitfile-unsloth-custom.yaml new file mode 100644 index 00000000..89320401 --- /dev/null +++ b/test/aikitfile-unsloth-custom.yaml @@ -0,0 +1,31 @@ +#syntax=aikit:test +apiVersion: v1alpha1 +debug: true +runtime: cuda +models: + - name: custom + source: model-q4_k_m.gguf + promptTemplates: + - name: instruct + template: | + Below is an instruction that describes a task. Write a response that appropriately completes the request. + + ### Instruction: + {{.Input}} + + ### Response: +config: | + - name: custom + backend: llama + parameters: + top_k: 80 + temperature: 0.8 + top_p: 0.7 + model: model-q4_k_m.gguf + context_size: 4096 + gpu_layers: 35 + f16: true + batch: 512 + mmap: + template: + chat: instruct diff --git a/test/aikitfile-unsloth.yaml b/test/aikitfile-unsloth.yaml new file mode 100644 index 00000000..9a7f16ba --- /dev/null +++ b/test/aikitfile-unsloth.yaml @@ -0,0 +1,24 @@ +#syntax=aikit:test +apiVersion: v1alpha1 +baseModel: unsloth/llama-2-7b-bnb-4bit +datasets: + - source: "yahma/alpaca-cleaned" + type: alpaca +config: + unsloth: + packing: false + maxSeqLength: 2048 + loadIn4bit: true + batchSize: 2 + gradientAccumulationSteps: 4 + warmupSteps: 10 + maxSteps: 60 + learningRate: 0.0002 + loggingSteps: 1 + optimizer: adamw_8bit + weightDecay: 0.01 + lrSchedulerType: linear + seed: 42 +output: + quantize: q4_k_m + name: model diff --git a/website/docs/create-images.md b/website/docs/create-images.md index 93839188..95162c74 100644 --- a/website/docs/create-images.md +++ b/website/docs/create-images.md @@ -17,7 +17,7 @@ models: ``` :::tip -This is the simplest way to get started to build an image. For full `aikitfile` specification, see [specs](docs/specs.md). +This is the simplest way to get started to build an image. For full `aikitfile` inference specifications, see [Inference API Specifications](docs/specs-inference.md). ::: First, create a buildx buildkit instance. Alternatively, if you are using Docker v24 with [containerd image store](https://docs.docker.com/storage/containerd/) enabled, you can skip this step. @@ -57,4 +57,4 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] }' {"created":1701236489,"object":"chat.completion","id":"dd1ff40b-31a7-4418-9e32-42151ab6875a","model":"llama-2-7b-chat","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"\nKubernetes is a container orchestration system that automates the deployment, scaling, and management of containerized applications in a microservices architecture."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} -``` \ No newline at end of file +``` diff --git a/website/docs/exllama2.md b/website/docs/exllama2.md index 5a1fb135..fcbf9eb5 100644 --- a/website/docs/exllama2.md +++ b/website/docs/exllama2.md @@ -2,8 +2,7 @@ title: Exllama v2 (GPTQ and EXL2) --- -[ExLlamaV2](https://github.com/turboderp/exllamav2 -) is an inference library for running local LLMs on modern consumer GPUs. +[ExLlamaV2](https://github.com/turboderp/exllamav2) is an inference library for running local LLMs on modern consumer GPUs. This backend: - provides support for GPTQ and EXL2 models diff --git a/website/docs/fine-tune.md b/website/docs/fine-tune.md new file mode 100644 index 00000000..a391c4f0 --- /dev/null +++ b/website/docs/fine-tune.md @@ -0,0 +1,83 @@ +--- +title: Fine Tuning +--- + +Fine tuning process allows the adaptation of pre-trained models to domain-specific data. At this time, AIKit fine tuning process is only supported with NVIDIA GPUs. + +:::note +Due to current BuildKit and NVIDIA limitations, your host GPU driver version must match the driver that AIKit will install into the container during build. + +To find your host GPU driver version, you can run `nvidia-smi` or `cat /proc/driver/nvidia/version` + +For a list of supported driver versions for AIKit, please refer to https://download.nvidia.com/XFree86/Linux-x86_64/ + +If you don't see your host GPU driver version in that list, you'll need to install one that's matching the version in that list. You don't need to install drivers from that location, only the versions need to match. + +This might be further optimizated in the future to remove this requirement, if possible. +::: + +## Getting Started + +To get started, you need to create a builder to be able to access host GPU devices. + +Create a builder with the following configuration: + +```bash +docker buildx create --name aikit-builder --use --buildkitd-flags '--allow-insecure-entitlement security.insecure' +``` + +## Targets and Configuration + +AIKit is capable of supporting multiple fine tuning implementation targets. At this time, [Unsloth](https://github.com/unslothai/unsloth) is the only supported target, but can be extended for other fine tuning implementations in the future. + +### Unsloth + +Create a YAML file with your configuration. For example, minimum config looks like: + +```yaml +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +baseModel: unsloth/llama-2-7b-bnb-4bit # base model to be fine tuned. this can be any model from Huggingface. For unsloth optimized base models, see https://huggingface.co/unsloth +datasets: + - source: "yahma/alpaca-cleaned" # data set to be used for fine tuning. This can be a Huggingface dataset or a URL pointing to a JSON file + type: alpaca # type of dataset. only alpaca is supported at this time. +``` + +For full configuration, please refer to [Fine Tune API Specifications](./specs-finetune.md) + +:::note +Please refer to [Unsloth documentation](https://github.com/unslothai/unsloth) for more information about Unsloth configuration. +::: + +## Build + +Build using following command and make sure to replace `--target` with the fine-tuning implementation of your choice (`unsloth` is the only option supported at this time), `--file` with the path to your configuration YAML and `--output` with the output directory of the finetuned model. + +```bash +docker buildx build --builder aikit-builder --allow security.insecure --file "/path/to/config.yaml" --output "/path/to/output" --target unsloth --progress plain . +``` + +Depending on your setup and configuration, build process may take some time. At the end of the build, the fine-tuned model will automatically be quantized with the specified format and output to the path specified in the `--output`. + +Output will be a `GGUF` model file with the name and quanization format from the configuration. For example: + +```bash +$ ls -al _output +-rw-r--r-- 1 sozercan sozercan 7161089856 Mar 3 00:19 aikit-model-q4_k_m.gguf +``` + +## What's next? + +๐Ÿ‘‰ Now that you have a fine-tuned model output as a GGUF file, you can refer to [Creating Model Images](./create-images.md) on how to create an image with AIKit to serve your fine-tuned model! + +## Troubleshooting + +### Build fails with `failed to solve: DeadlineExceeded: context deadline exceeded` + +This is a known issue with BuildKit and might be related to disk speed. For more information, please see https://github.com/moby/buildkit/issues/4327 + +### Build fails with `ERROR 404: Not Found.` + +This is due to mismatching host and container GPU driver versions. Please refer to the note at the top of this page for more information. + +If you are on Windows Subsystem for Linux (WSL), WSL doesn't expose the host driver version information on `/proc/driver/nvidia/version`. Due to this limitation, WSL is not supported at this time. diff --git a/website/docs/intro.md b/website/docs/intro.md index 3ae9a2e8..b9dc0f50 100644 --- a/website/docs/intro.md +++ b/website/docs/intro.md @@ -3,15 +3,23 @@ title: Introduction slug: / --- -AIKit is a quick, easy, and local or cloud-agnostic way to get started to host and deploy large language models (LLMs) for inference. No GPU, internet access or additional tools are needed to get started except for [Docker](https://docs.docker.com/desktop/install/linux-install/)! +AIKit is a one-stop shop to quickly get started to host, deploy, build and fine-tune large language models (LLMs). -AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), [Chatbot-UI](https://github.com/sozercan/chatbot-ui) and many more, to send requests to open-source LLMs powered by AIKit! +AIKit offers two main capabilities: + +- **Inference**: AIKit uses [LocalAI](https://localai.io/), which supports a wide range of inference capabilities and formats. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), [Chatbot-UI](https://github.com/sozercan/chatbot-ui) and many more, to send requests to open-source LLMs! + +- **Fine Tuning**: AIKit uses [Unsloth](https://github.com/unslothai/unsloth) for fast, memory efficient, and easy fine-tuning experience. + +To get started, please see [Quick Start](quick-start.md)! ## Features -- ๐Ÿณ No GPU, Internet access or additional tools needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/)! +- ๐Ÿ’ก No GPU, or Internet access is required for inference! +- ๐Ÿณ No additional tools are needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/)! - ๐Ÿค Minimal image size, resulting in less vulnerabilities and smaller attack surface with a custom [distroless](https://github.com/GoogleContainerTools/distroless)-based image -- ๐Ÿš€ [Easy to use declarative configuration](specs.md) +- ๐ŸŽต [Fine tune support](fine-tune.md) +- ๐Ÿš€ Easy to use declarative configuration for [inference](specs-inference.md) and [fine tuning](specs-finetune.md) - โœจ OpenAI API compatible to use with any OpenAI API compatible client - ๐Ÿ“ธ [Multi-modal model support](vision.md) - ๐Ÿ–ผ๏ธ Image generation support with Stable Diffusion @@ -21,5 +29,3 @@ AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. Local - ๐Ÿ–ฅ๏ธ [Supports GPU-accelerated inferencing with NVIDIA GPUs](gpu.md) - ๐Ÿ” [Signed images for `aikit` and pre-made models](cosign.md) - ๐ŸŒˆ Support for non-proprietary and self-hosted container registries to store model images - -To get started, please see [Quick Start](quick-start.md)! diff --git a/website/docs/quick-start.md b/website/docs/quick-start.md index a13dcbe5..a50e6fd2 100644 --- a/website/docs/quick-start.md +++ b/website/docs/quick-start.md @@ -19,4 +19,12 @@ Output should be similar to: `{"created":1701236489,"object":"chat.completion","id":"dd1ff40b-31a7-4418-9e32-42151ab6875a","model":"llama-2-7b-chat","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"\nKubernetes is a container orchestration system that automates the deployment, scaling, and management of containerized applications in a microservices architecture."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` -That's it! ๐ŸŽ‰ API is OpenAI compatible so this is a drop-in replacement for any OpenAI API compatible client. \ No newline at end of file +That's it! ๐ŸŽ‰ API is OpenAI compatible so this is a drop-in replacement for any OpenAI API compatible client. + +## What's next? + +๐Ÿ‘‰ If you are interested in other pre-made models (such as Mistral or Mixtral), please refer to [Pre-made models](./premade-models.md). + +๐Ÿ‘‰ If you are interested in learning more about how to create your own custom model images, please refer to [Creating Model Images](./create-images.md). + +๐Ÿ‘‰ If you are interested in fine tuning a model with domain-specific knowledge, please refer to [Fine Tuning](./fine-tune.md). diff --git a/website/docs/specs-finetune.md b/website/docs/specs-finetune.md new file mode 100644 index 00000000..d6bc75c8 --- /dev/null +++ b/website/docs/specs-finetune.md @@ -0,0 +1,61 @@ +--- +title: Fine Tuning API Specifications +--- + +## v1alpha1 + +```yaml +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: # required. only v1alpha1 is supported at the moment +baseModel: # required. any base model from Huggingface. for unsloth, see for 4bit pre-quantized models: https://huggingface.co/unsloth +datasets: + - source: # required. this can be a Huggingface dataset repo or a URL pointing to a JSON file + type: # required. can be "alpaca". only alpaca is supported at the moment +config: + unsloth: + packing: # optional. defaults to false. can make training 5x faster for short sequences. + maxSeqLength: # optional. defaults to 2048 + loadIn4bit: # optional. defaults to true + batchSize: # optional. default to 2 + gradientAccumulationSteps: # optional. defaults to 4 + warmupSteps: # optional. defaults to 10 + maxSteps: # optional. defaults to 60 + learningRate: # optional. defaults to 0.0002 + loggingSteps: # optional. defaults to 1 + optimizer: # optional. defaults to adamw_8bit + weightDecay: # optional. defaults to 0.01 + lrSchedulerType: # optional. defaults to linear + seed: # optional. defaults to 42 +output: + quantize: # optional. defaults to q4_k_m. for unsloth, see for allowed quantization methods: https://github.com/unslothai/unsloth/wiki#saving-to-gguf. + name: # optional. defaults to "aikit-model" +``` + +Example: + +```yaml +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +baseModel: unsloth/mistral-7b-instruct-v0.2-bnb-4bit +datasets: + - source: "yahma/alpaca-cleaned" + type: alpaca +config: + unsloth: + packing: false + maxSeqLength: 2048 + loadIn4bit: true + batchSize: 2 + gradientAccumulationSteps: 4 + warmupSteps: 10 + maxSteps: 60 + learningRate: 0.0002 + loggingSteps: 1 + optimizer: adamw_8bit + weightDecay: 0.01 + lrSchedulerType: linear + seed: 42 +output: + quantize: q4_k_m + name: model +``` diff --git a/website/docs/specs.md b/website/docs/specs-inference.md similarity index 94% rename from website/docs/specs.md rename to website/docs/specs-inference.md index 15ad0f35..df151aeb 100644 --- a/website/docs/specs.md +++ b/website/docs/specs-inference.md @@ -1,5 +1,5 @@ --- -title: API Specifications +title: Inference API Specifications --- ## v1alpha1 @@ -11,7 +11,7 @@ runtime: # optional. defaults to avx. can be "avx", "avx2", "avx512", "cuda" backends: # optional. list of additional backends. can be "stablediffusion", "exllama" or "exllama2" models: # required. list of models to build - name: # required. name of the model - source: # required. source of the model. must be a url + source: # required. source of the model. can be a url or a local file sha256: # optional. sha256 hash of the model file promptTemplates: # optional. list of prompt templates for a model - name: # required. name of the template diff --git a/website/sidebars.js b/website/sidebars.js index edb21c19..c5f99cf3 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -31,7 +31,7 @@ const sidebars = { collapsed: false, items: [ 'create-images', - 'specs', + 'fine-tune', 'vision', 'gpu', 'kubernetes', @@ -40,7 +40,16 @@ const sidebars = { }, { type: 'category', - label: 'Supported Backends', + label: 'Specifications', + collapsed: false, + items: [ + 'specs-inference', + 'specs-finetune', + ], + }, + { + type: 'category', + label: 'Inference Supported Backends', collapsed: false, items: [ 'llama-cpp',