Skip to content

Commit

Permalink
feat: package vllm into the preset model image
Browse files Browse the repository at this point in the history
Signed-off-by: jerryzhuang <[email protected]>
  • Loading branch information
zhuangqh committed Oct 24, 2024
1 parent 8906190 commit 99e397d
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 15 deletions.
42 changes: 28 additions & 14 deletions docker/presets/models/tfs/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,23 @@ ARG MODEL_TYPE
ARG VERSION

# Set the working directory
WORKDIR /workspace/tfs
WORKDIR /workspace

# Write the version to a file
RUN echo $VERSION > /workspace/tfs/version.txt
RUN pip install --no-cache-dir virtualenv

# First, copy just the preset files and install dependencies
# This is done before copying the code to utilize Docker's layer caching and
# avoid reinstalling dependencies unless the requirements file changes.
# Inference
# 1. Huggingface transformers
# dependencies
COPY kaito/presets/inference/${MODEL_TYPE}/requirements.txt /workspace/tfs/inference-requirements.txt
RUN pip install --no-cache-dir -r inference-requirements.txt

COPY kaito/presets/inference/${MODEL_TYPE}/inference_api.py /workspace/tfs/inference_api.py

# Fine Tuning
COPY kaito/presets/tuning/${MODEL_TYPE}/requirements.txt /workspace/tfs/tuning-requirements.txt
RUN pip install --no-cache-dir -r tuning-requirements.txt
RUN virtualenv tfs && \
. tfs/bin/activate && \
pip install --no-cache-dir -r /workspace/tfs/inference-requirements.txt && \
pip install --no-cache-dir -r /workspace/tfs/tuning-requirements.txt && \
deactivate

# Copy the inference and tuning scripts
COPY kaito/presets/inference/${MODEL_TYPE}/inference_api.py /workspace/tfs/inference_api.py
COPY kaito/presets/tuning/${MODEL_TYPE}/cli.py /workspace/tfs/cli.py
COPY kaito/presets/tuning/${MODEL_TYPE}/fine_tuning.py /workspace/tfs/fine_tuning.py
COPY kaito/presets/tuning/${MODEL_TYPE}/parser.py /workspace/tfs/parser.py
Expand All @@ -31,5 +30,20 @@ COPY kaito/presets/tuning/${MODEL_TYPE}/dataset.py /workspace/tfs/dataset.py
# Copy the metrics server
COPY kaito/presets/tuning/${MODEL_TYPE}/metrics/metrics_server.py /workspace/tfs/metrics_server.py

# Copy the entire model weights to the weights directory
COPY ${WEIGHTS_PATH} /workspace/tfs/weights
# 2. vLLM
COPY kaito/presets/inference/vllm/requirements.txt /workspace/vllm/inference-requirements.txt
RUN virtualenv vllm && \
. vllm/bin/activate && \
pip install --no-cache-dir -r /workspace/vllm/inference-requirements.txt && \
deactivate

COPY kaito/presets/inference/vllm/inference_api.py /workspace/vllm/inference_api.py

# 3. Model weights
COPY kaito/docker/presets/models/tfs/entrypoint /workspace/entrypoint
COPY ${WEIGHTS_PATH} /workspace/weights
RUN echo $VERSION > /workspace/version.txt && \
ln -s /workspace/weights /workspace/tfs/weights && \
ln -s /workspace/weights /workspace/vllm/weights

ENTRYPOINT ["/workspace/entrypoint"]
43 changes: 43 additions & 0 deletions docker/presets/models/tfs/entrypoint
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash

if [[ "$1" == "--help" || "$1" == "-h" ]]; then
echo "Usage: $0 [backend] -- [command]"
echo "Available backends:"
echo " vllm Use the vllm backend"
echo " transformers Use the huggingface/transformers backend"
echo "Options:"
echo " -h, --help Show this help message and exit"
exit 0
fi

backend="$1"
while [[ $# -gt 0 && "$1" != "--" ]]; do
shift
done
if [[ $# -gt 0 ]]; then
shift
fi
if [[ $# -eq 0 ]]; then
echo "No command provided"
exit 1
fi

case "$backend" in
"vllm")
backend="vllm"
;;
"transformers")
backend="tfs"
;;
*)
echo "Unknown backend: $backend"
exit 1
;;
esac

echo "Switch to backend: $backend"
. "$backend"/bin/activate
cd "$backend"

# Evaluate the command after "--"
eval "$@"
3 changes: 2 additions & 1 deletion presets/models/supported_models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,9 @@ models:
type: text-generation
version: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/commit/d269012bea6fbe38ce7752c8940fea010eea3383
runtime: tfs
tag: 0.0.2
tag: 0.0.3
# Tag history:
# 0.0.3 - Add vllm inference backend
# 0.0.2 - Add Logging & Metrics Server
# 0.0.1 - Initial Release

Expand Down

0 comments on commit 99e397d

Please sign in to comment.