Skip to content

Commit

Permalink
Enable the integration tests for llamacpp (#868)
Browse files Browse the repository at this point in the history
* Enable the integration tests for llamacpp

Signed-off-by: Radoslav Dimitrov <[email protected]>
  • Loading branch information
rdimitrov authored Feb 1, 2025
1 parent a428992 commit 4dedb4e
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 19 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ jobs:
echo "Loaded image:"
docker images
- name: Download the Qwen2.5-Coder-0.5B-Instruct-GGUF model
run: |
# This is needed for the llamacpp integration tests
wget -P ./codegate_volume/models https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-0.5b-instruct-q5_k_m.gguf
- name: Run container from the loaded image
run: |
# Get the image name
Expand Down Expand Up @@ -235,6 +240,12 @@ jobs:
run: |
poetry run python tests/integration/integration_tests.py
- name: Run integration tests - llamacpp
env:
CODEGATE_PROVIDERS: "llamacpp"
run: |
poetry run python tests/integration/integration_tests.py
- name: Print the CodeGate container logs (useful for debugging)
if: always()
run: |
Expand Down
10 changes: 8 additions & 2 deletions src/codegate/providers/llamacpp/completion_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,25 @@ async def execute_completion(
"""
model_path = f"{Config.get_config().model_base_path}/{request['model']}.gguf"

# Create a copy of the request dict and remove stream_options
# Reason - Request error as JSON:
# {'error': "Llama.create_completion() got an unexpected keyword argument 'stream_options'"}
request_dict = dict(request)
request_dict.pop("stream_options", None)

if is_fim_request:
response = await self.inference_engine.complete(
model_path,
Config.get_config().chat_model_n_ctx,
Config.get_config().chat_model_n_gpu_layers,
**request,
**request_dict,
)
else:
response = await self.inference_engine.chat(
model_path,
Config.get_config().chat_model_n_ctx,
Config.get_config().chat_model_n_gpu_layers,
**request,
**request_dict,
)

return convert_to_async_iterator(response) if stream else response
Expand Down
26 changes: 17 additions & 9 deletions tests/integration/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,25 @@ def parse_response_message(response, streaming=True):
if "DONE" in decoded_line or "message_stop" in decoded_line:
break

decoded_line = decoded_line.replace("data:", "")
decoded_line = decoded_line.replace("data:", "").strip()
json_line = json.loads(decoded_line)

message_content = None
# Handle both chat and FIM responses
if "choices" in json_line:
if "finish_reason" in json_line["choices"][0]:
choice = json_line["choices"][0]
# Break if the conversation is over
if choice.get("finish_reason") == "stop":
break
if "delta" in json_line["choices"][0]:
message_content = json_line["choices"][0]["delta"].get("content", "")
elif "text" in json_line["choices"][0]:
message_content = json_line["choices"][0].get("text", "")
# Handle chat responses
if "delta" in choice:
delta = choice["delta"]
if "content" in delta and delta["content"] is not None:
message_content = delta["content"]
# Handle FIM responses
elif "text" in choice:
text = choice["text"]
if text is not None:
message_content = text
elif "delta" in json_line:
message_content = json_line["delta"].get("text", "")
elif "message" in json_line:
Expand All @@ -87,7 +95,6 @@ def parse_response_message(response, streaming=True):

if message_content is not None:
response_message += message_content

else:
if "choices" in response.json():
response_message = response.json()["choices"][0]["message"].get("content", "")
Expand All @@ -97,7 +104,8 @@ def parse_response_message(response, streaming=True):
except Exception as e:
logger.exception("An error occurred: %s", e)

return response_message
# Remove any trailing newlines and return
return response_message.strip()

@staticmethod
def replace_env_variables(input_string, env):
Expand Down
14 changes: 6 additions & 8 deletions tests/integration/testcases.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ headers:
ollama:
Content-Type: application/json
llamacpp:
Content-Type: application/json
anthropic:
x-api-key: ENV_ANTHROPIC_KEY
copilot:
Expand Down Expand Up @@ -68,7 +69,7 @@ testcases:
"role":"user"
}
],
"model":"qwen2.5-coder-1.5b-instruct-q5_k_m",
"model":"qwen2.5-coder-0.5b-instruct-q5_k_m",
"stream":true,
"temperature":0
}
Expand All @@ -81,18 +82,15 @@ testcases:
url: http://127.0.0.1:8989/llamacpp/completions
data: |
{
"model": "qwen2.5-coder-1.5b-instruct-q5_k_m",
"model": "qwen2.5-coder-0.5b-instruct-q5_k_m",
"max_tokens": 4096,
"temperature": 0,
"stream": true,
"stop": ["<|endoftext|>", "<|fim_prefix|>", "<|fim_middle|>", "<|fim_suffix|>", "<|fim_pad|>", "<|repo_name|>", "<|file_sep|>", "<|im_start|>", "<|im_end|>", "/src/", "#- coding: utf-8", "```"],
"prompt":"<|fim_prefix|>\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n <|fim_suffix|>\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n<|fim_middle|>"
"stop": ["<|endoftext|>", "<|fim_prefix|>", "<|fim_middle|>", "<|fim_suffix|>", "<|fim_pad|>", "<|repo_name|>", "<|file_sep|>", "<|im_start|>", "<|im_end|>", "/src/", "#- coding: utf-8", "```", "def test"],
"prompt":"# Do not add comments\n<|fim_prefix|>\n# codegate/greet.py\ndef print_hello():\n <|fim_suffix|>\n\n\nprint_hello()\n<|fim_middle|>"
}
likes: |
url = 'http://localhost:8080'
headers = {'Authorization': f'Bearer {key}'}
response = requests.get(url, headers=headers)
return response.json()
print("Hello, World!")
openai_chat:
name: OpenAI Chat
Expand Down

0 comments on commit 4dedb4e

Please sign in to comment.