diff --git a/.github/workflows/test-docker-gpu.yaml b/.github/workflows/test-docker-gpu.yaml index b8f57473..985c03aa 100644 --- a/.github/workflows/test-docker-gpu.yaml +++ b/.github/workflows/test-docker-gpu.yaml @@ -15,7 +15,7 @@ jobs: matrix: backend: # - llama-cuda - # - exllama # https://github.com/sozercan/aikit/issues/94 + - exllama # https://github.com/sozercan/aikit/issues/94 - exllama2-gptq - exllama2-exl2 - mamba diff --git a/pkg/aikit2llb/inference/exllama.go b/pkg/aikit2llb/inference/exllama.go index 5e20be9e..06b3b93f 100644 --- a/pkg/aikit2llb/inference/exllama.go +++ b/pkg/aikit2llb/inference/exllama.go @@ -21,13 +21,9 @@ func installExllama(c *config.InferenceConfig, s llb.State, merge llb.State) llb savedState := s s = s.Run(utils.Sh("apt-get update && apt-get install --no-install-recommends -y bash git ca-certificates python3-pip python3-dev python3-venv python-is-python3 make g++ curl && curl -LsSf https://astral.sh/uv/install.sh | sh && pip install grpcio-tools --break-system-packages && apt-get clean"), llb.IgnoreCache).Root() - // clone localai exllama backend only s = cloneLocalAI(s) - s = s.Run(utils.Bashf("source $HOME/.cargo/env && export BUILD_TYPE=cublas && cd /tmp/localai/backend/python/%[1]s && make exllama2", backend)).Root() - - // clone exllama to localai exllama backend path and install python dependencies - // s = s.Run(utils.Bashf("git clone --depth 1 %[1]s --branch %[2]s /tmp/%[3]s && mv /tmp/%[3]s/* /tmp/localai/backend/python/%[3]s && rm -rf /tmp/%[3]s && cd /tmp/localai/backend/python/%[3]s && rm -rf .git && source $HOME/.cargo/env && python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto && uv venv && source .venv/bin/activate && ls -al && uv pip install --no-build-isolation --requirement requirements-install.txt && EXLLAMA_NOCOMPILE= uv pip install --no-build-isolation --requirement requirements.txt", exllamaRepo, exllamaTag, backend)).Root() + s = s.Run(utils.Bashf("source $HOME/.cargo/env && export BUILD_TYPE=cublas && cd /tmp/localai/backend/python/%[1]s && make %[1]s", backend)).Root() diff := llb.Diff(savedState, s) return llb.Merge([]llb.State{merge, diff}) diff --git a/pkg/aikit2llb/inference/image.go b/pkg/aikit2llb/inference/image.go index 43a923aa..49c716cd 100644 --- a/pkg/aikit2llb/inference/image.go +++ b/pkg/aikit2llb/inference/image.go @@ -42,6 +42,7 @@ func emptyImage(c *config.InferenceConfig) *specs.Image { "NVIDIA_DRIVER_CAPABILITIES=compute,utility", "NVIDIA_VISIBLE_DEVICES=all", "LD_LIBRARY_PATH=/usr/local/cuda/lib64", + "BUILD_TYPE=cublas", } if c.Runtime == utils.RuntimeNVIDIA { img.Config.Env = append(img.Config.Env, cudaEnv...) @@ -51,14 +52,13 @@ func emptyImage(c *config.InferenceConfig) *specs.Image { switch c.Backends[b] { case utils.BackendExllama, utils.BackendExllamaV2: exllamaEnv := []string{ - "BUILD_TYPE=cublas", - "EXTERNAL_GRPC_BACKENDS=exllama:/tmp/localai/backend/python/exllama/backend.py,exllama2:/tmp/localai/backend/python/exllama2/run.sh", + "EXTERNAL_GRPC_BACKENDS=exllama:/tmp/localai/backend/python/exllama/run.sh,exllama2:/tmp/localai/backend/python/exllama2/run.sh", "CUDA_HOME=/usr/local/cuda", } img.Config.Env = append(img.Config.Env, exllamaEnv...) case utils.BackendMamba: mambaEnv := []string{ - "EXTERNAL_GRPC_BACKENDS=mamba:/tmp/localai/backend/python/mamba/backend.py", + "EXTERNAL_GRPC_BACKENDS=mamba:/tmp/localai/backend/python/mamba/run.sh", "CUDA_HOME=/usr/local/cuda", } img.Config.Env = append(img.Config.Env, mambaEnv...) diff --git a/pkg/aikit2llb/inference/mamba.go b/pkg/aikit2llb/inference/mamba.go index 49a41e7f..c8522b6d 100644 --- a/pkg/aikit2llb/inference/mamba.go +++ b/pkg/aikit2llb/inference/mamba.go @@ -1,8 +1,6 @@ package inference import ( - "fmt" - "github.com/moby/buildkit/client/llb" "github.com/sozercan/aikit/pkg/utils" ) @@ -10,39 +8,12 @@ import ( func installMamba(s llb.State, merge llb.State) llb.State { savedState := s // libexpat1 is requirement but git is not. however libexpat1 is a dependency of git - s = s.Run(utils.Sh("apt-get install --no-install-recommends -y git python3 python3-dev python3-pip libssl3 openssl && apt-get clean"), llb.IgnoreCache).Root() + s = s.Run(utils.Sh("apt-get install --no-install-recommends -y git python3 python3-dev python3-pip python3-venv python-is-python3 libssl3 openssl && apt-get clean"), llb.IgnoreCache).Root() s = cloneLocalAI(s) - s = s.Run(utils.Shf("pip3 install packaging numpy torch==2.1.0 grpcio protobuf --break-system-packages && pip3 install causal-conv1d==1.0.0 mamba-ssm==1.0.1 --break-system-packages")).Root() + s = s.Run(utils.Bashf("source $HOME/.cargo/env && export BUILD_TYPE=cublas && cd /tmp/localai/backend/python/%[1]s && make %[1]s", utils.BackendMamba)).Root() diff := llb.Diff(savedState, s) return llb.Merge([]llb.State{merge, diff}) } - -func installOpenCV(s llb.State, merge llb.State) llb.State { - savedState := s - // adding debian 11 (bullseye) repo due to opencv 4.5 requirement - s = s.Run(utils.Sh("echo 'deb http://deb.debian.org/debian bullseye main' | tee -a /etc/apt/sources.list")).Root() - // pinning libdap packages to bullseye version due to symbol error - libdapVersion := "3.20.7-6" - libPath := "/usr/lib/x86_64-linux-gnu" - s = s.Run(utils.Shf("apt-get update && mkdir -p /tmp/generated/images && apt-get install -y libopencv-imgcodecs4.5 libgomp1 libdap27=%[1]s libdapclient6v5=%[1]s && apt-get clean && ln -s %[2]s/libopencv_core.so.4.5 %[2]s/libopencv_core.so.4.5d && ln -s %[2]s/libopencv_imgcodecs.so.4.5 %[2]s/libopencv_imgcodecs.so.4.5d", libdapVersion, libPath), llb.IgnoreCache).Root() - diff := llb.Diff(savedState, s) - merge = llb.Merge([]llb.State{merge, diff}) - - sdURL := fmt.Sprintf("https://sertaccdn.azureedge.net/localai/%s/stablediffusion", localAIVersion) - var opts []llb.HTTPOption - opts = append(opts, llb.Filename("stablediffusion")) - opts = append(opts, llb.Chmod(0o755)) - var copyOpts []llb.CopyOption - copyOpts = append(copyOpts, &llb.CopyInfo{ - CreateDestPath: true, - }) - sd := llb.HTTP(sdURL, opts...) - merge = merge.File( - llb.Copy(sd, "stablediffusion", "/tmp/localai/backend_data/backend-assets/grpc/stablediffusion", copyOpts...), - llb.WithCustomName("Copying stable diffusion backend"), //nolint: goconst - ) - return merge -} diff --git a/pkg/aikit2llb/inference/stablediffusion.go b/pkg/aikit2llb/inference/stablediffusion.go new file mode 100644 index 00000000..5a40e537 --- /dev/null +++ b/pkg/aikit2llb/inference/stablediffusion.go @@ -0,0 +1,35 @@ +package inference + +import ( + "fmt" + + "github.com/moby/buildkit/client/llb" + "github.com/sozercan/aikit/pkg/utils" +) + +func installOpenCV(s llb.State, merge llb.State) llb.State { + savedState := s + // adding debian 11 (bullseye) repo due to opencv 4.5 requirement + s = s.Run(utils.Sh("echo 'deb http://deb.debian.org/debian bullseye main' | tee -a /etc/apt/sources.list")).Root() + // pinning libdap packages to bullseye version due to symbol error + libdapVersion := "3.20.7-6" + libPath := "/usr/lib/x86_64-linux-gnu" + s = s.Run(utils.Shf("apt-get update && mkdir -p /tmp/generated/images && apt-get install -y libopencv-imgcodecs4.5 libgomp1 libdap27=%[1]s libdapclient6v5=%[1]s && apt-get clean && ln -s %[2]s/libopencv_core.so.4.5 %[2]s/libopencv_core.so.4.5d && ln -s %[2]s/libopencv_imgcodecs.so.4.5 %[2]s/libopencv_imgcodecs.so.4.5d", libdapVersion, libPath), llb.IgnoreCache).Root() + diff := llb.Diff(savedState, s) + merge = llb.Merge([]llb.State{merge, diff}) + + sdURL := fmt.Sprintf("https://sertaccdn.azureedge.net/localai/%s/stablediffusion", localAIVersion) + var opts []llb.HTTPOption + opts = append(opts, llb.Filename("stablediffusion")) + opts = append(opts, llb.Chmod(0o755)) + var copyOpts []llb.CopyOption + copyOpts = append(copyOpts, &llb.CopyInfo{ + CreateDestPath: true, + }) + sd := llb.HTTP(sdURL, opts...) + merge = merge.File( + llb.Copy(sd, "stablediffusion", "/tmp/localai/backend_data/backend-assets/grpc/stablediffusion", copyOpts...), + llb.WithCustomName("Copying stable diffusion backend"), //nolint: goconst + ) + return merge +}