Skip to content

Commit

Permalink
Add integration tests for vllm
Browse files Browse the repository at this point in the history
Signed-off-by: Radoslav Dimitrov <[email protected]>
  • Loading branch information
rdimitrov committed Jan 29, 2025
1 parent 5135e51 commit 0d56e43
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 46 deletions.
64 changes: 61 additions & 3 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ jobs:
echo "Running container from image: $DOCKER_IMAGE"
# Run the container
docker run --name $CONTAINER_NAME -d -p 8989:8989 -p 9090:9090 \
-p 8990:8990 \
docker run --name $CONTAINER_NAME -d --network host \
-v "$(pwd)"/codegate_volume:/app/codegate_volume \
-e CODEGATE_APP_LOG_LEVEL=DEBUG \
-e CODEGATE_VLLM_URL=http://localhost:8000 \
--restart unless-stopped $DOCKER_IMAGE
# Confirm the container started
Expand Down Expand Up @@ -146,7 +146,60 @@ jobs:
run: |
poetry run python tests/integration/integration_tests.py
- name: Print the container logs (useful for debugging)
- name: Build and run the vllm container
run: |
git clone https://github.com/vllm-project/vllm.git
cd vllm
docker build -f Dockerfile.cpu -t vllm-cpu-env --shm-size=4g .
docker run -d --name vllm \
--network="host" \
vllm-cpu-env --model Qwen/Qwen2.5-Coder-1.5B-Instruct
- name: Verify the vllm container is running
run: |
echo -e "\nVerify the vllm container is serving\n"
docker ps -f name=vllm
echo "Loop until the endpoint responds successfully"
while ! curl --silent --fail --get "http://localhost:8000/ping" >/dev/null; do
echo "Ping not available yet. Retrying in 2 seconds..."
sleep 2
done
echo -e "\nPing is now available!\n"
echo -e "\nVerify the completions endpoint works\n"
curl http://localhost:8000/v1/completions -H "Content-Type: application/json" -d '{
"model": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
"prompt": ["How to make pizza"],
"max_tokens": 100,
"temperature": 0
}'
echo -e "\nVerify the chat/completions endpoint works\n"
curl -X POST http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
"messages": [
{"role": "system", "content": "You are a coding assistant."},
{"role": "user", "content": "Hello"}
],
"temperature": 0,
"max_tokens": 4096,
"extra_body": {}
}'
# Print a new line and then the message in a single echo
echo -e "\nPrint the vllm container logs\n"
docker logs vllm
- name: Run integration tests - vllm
env:
CODEGATE_PROVIDERS: "vllm"
run: |
poetry run python tests/integration/integration_tests.py
- name: Print the CodeGate container logs (useful for debugging)
if: always()
run: |
docker logs $CONTAINER_NAME
Expand All @@ -159,3 +212,8 @@ jobs:
echo "DB contents:"
ls -la codegate_volume/db
docker exec $CONTAINER_NAME ls -la /app/codegate_volume/db
- name: Print the vllm container logs (useful for debugging)
if: always()
run: |
docker logs vllm
7 changes: 1 addition & 6 deletions src/codegate/providers/vllm/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def _has_chat_ml_format(data: Dict) -> bool:
content = input_chat_request["messages"][0]["content"]
if isinstance(content, str) and "<|im_start|>" in content:
return True
return False

def normalize(self, data: Dict) -> ChatCompletionRequest:
"""
Expand All @@ -117,12 +118,6 @@ def normalize(self, data: Dict) -> ChatCompletionRequest:
if not model_name.startswith("hosted_vllm/"):
normalized_data["model"] = f"hosted_vllm/{model_name}"

# Ensure the base_url ends with /v1 if provided
if "base_url" in normalized_data:
base_url = normalized_data["base_url"].rstrip("/")
if not base_url.endswith("/v1"):
normalized_data["base_url"] = f"{base_url}/v1"

ret_data = normalized_data
if self._has_chat_ml_format(normalized_data):
ret_data = self._chat_ml_normalizer.normalize(normalized_data)
Expand Down
87 changes: 55 additions & 32 deletions src/codegate/providers/vllm/provider.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from urllib.parse import urljoin

import httpx
import structlog
Expand Down Expand Up @@ -31,6 +32,19 @@ def __init__(
def provider_route_name(self) -> str:
return "vllm"

def _get_base_url(self) -> str:
"""
Get the base URL from config with proper formatting
"""
config = Config.get_config()
base_url = config.provider_urls.get("vllm") if config else ""
if base_url:
base_url = base_url.rstrip("/")
# Add /v1 if not present
if not base_url.endswith("/v1"):
base_url = f"{base_url}/v1"
return base_url

def models(self):
resp = httpx.get(f"{self.base_url}/v1/models")
jsonresp = resp.json()
Expand All @@ -40,60 +54,69 @@ def models(self):
def _setup_routes(self):
"""
Sets up the /chat/completions route for the provider as expected by the
OpenAI API. Extracts the API key from the "Authorization" header and
passes it to the completion handler.
OpenAI API. Makes the API key optional in the "Authorization" header.
"""

@self.router.get(f"/{self.provider_route_name}/models")
async def get_models(authorization: str = Header(..., description="Bearer token")):
if not authorization.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Invalid authorization header")

token = authorization.split(" ")[1]
config = Config.get_config()
if config:
base_url = config.provider_urls.get("vllm")
else:
base_url = ""

async with httpx.AsyncClient() as client:
response = await client.get(
f"{base_url}/v1/models", headers={"Authorization": f"Bearer {token}"}
async def get_models(
authorization: str | None = Header(None, description="Optional Bearer token")
):
base_url = self._get_base_url()
headers = {}

if authorization:
if not authorization.startswith("Bearer "):
raise HTTPException(
status_code=401, detail="Invalid authorization header format"
)
token = authorization.split(" ")[1]
headers["Authorization"] = f"Bearer {token}"

try:
models_url = urljoin(base_url, "v1/models")
async with httpx.AsyncClient() as client:
response = await client.get(models_url, headers=headers)
response.raise_for_status()
return response.json()
except httpx.HTTPError as e:
logger = structlog.get_logger("codegate")
logger.error("Error fetching vLLM models", error=str(e))
raise HTTPException(
status_code=e.response.status_code if hasattr(e, "response") else 500,
detail=str(e),
)
response.raise_for_status()
return response.json()

@self.router.post(f"/{self.provider_route_name}/chat/completions")
@self.router.post(f"/{self.provider_route_name}/completions")
async def create_completion(
request: Request,
authorization: str = Header(..., description="Bearer token"),
authorization: str | None = Header(None, description="Optional Bearer token"),
):
if not authorization.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Invalid authorization header")
api_key = None
if authorization:
if not authorization.startswith("Bearer "):
raise HTTPException(
status_code=401, detail="Invalid authorization header format"
)
api_key = authorization.split(" ")[1]

api_key = authorization.split(" ")[1]
body = await request.body()
data = json.loads(body)

# Add the vLLM base URL to the request
config = Config.get_config()
if config:
data["base_url"] = config.provider_urls.get("vllm")
else:
data["base_url"] = ""
base_url = self._get_base_url()
data["base_url"] = base_url

is_fim_request = self._is_fim_request(request, data)
try:
# Pass the potentially None api_key to complete
stream = await self.complete(data, api_key, is_fim_request=is_fim_request)
except Exception as e:
#  check if we have an status code there
# Check if we have a status code there
if hasattr(e, "status_code"):
logger = structlog.get_logger("codegate")
logger.error("Error in VLLMProvider completion", error=str(e))
raise HTTPException(status_code=e.status_code, detail=str(e))
raise e

raise HTTPException(status_code=e.status_code, detail=str(e)) # type: ignore
else:
# just continue raising the exception
raise e
return self._completion_handler.create_response(stream)
10 changes: 5 additions & 5 deletions tests/integration/testcases.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
headers:
vllm:
Authorization: Bearer ENV_VLLM_KEY
Content-Type: application/json
openai:
Authorization: Bearer ENV_OPENAI_KEY
ollama:
Expand Down Expand Up @@ -160,20 +160,20 @@ testcases:
"role":"user"
}
],
"model":"Qwen/Qwen2.5-Coder-14B-Instruct",
"model":"Qwen/Qwen2.5-Coder-1.5B-Instruct",
"stream":true,
"temperature":0
}
likes: |
Hello! How can I assist you today? If you have any questions about software security, package analysis, or need guidance on secure coding practices, feel free to ask.
Hello! How can I assist you today?
vllm_fim:
name: VLLM FIM
provider: vllm
url: http://127.0.0.1:8989/vllm/completions
data: |
{
"model": "Qwen/Qwen2.5-Coder-14B",
"model": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
"max_tokens": 4096,
"temperature": 0,
"stream": true,
Expand Down Expand Up @@ -332,4 +332,4 @@ testcases:
print(response.status_code)
print(response.json())
```
```

0 comments on commit 0d56e43

Please sign in to comment.