From 27b0c5b29824a70512fc19b1314983a8a626061c Mon Sep 17 00:00:00 2001 From: mitya Date: Fri, 20 Dec 2024 12:10:08 +0100 Subject: [PATCH 1/5] remove old openai models, group models by provider --- refact_known_models/passthrough.py | 73 ++++++++++++------------------ 1 file changed, 29 insertions(+), 44 deletions(-) diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py index d94f8af2..3fdcffa6 100644 --- a/refact_known_models/passthrough.py +++ b/refact_known_models/passthrough.py @@ -1,6 +1,7 @@ # refer to https://docs.litellm.ai/docs/providers/ passthrough_mini_db = { + # OpenAI models "gpt-4o": { "backend": "litellm", "provider": "openai", @@ -12,28 +13,41 @@ "pp1000t_generated": 15_000, # $15.00 / 1M tokens (2024 may) "filter_caps": ["chat", "tools", "completion"], }, - "gpt-4-turbo": { + "gpt-4o-2024-05-13": { "backend": "litellm", "provider": "openai", - "tokenizer_path": "Xenova/gpt-4", - "resolve_as": "gpt-4-turbo", + "tokenizer_path": "Xenova/gpt-4o", + "resolve_as": "gpt-4o-2024-05-13", "T": 128_000, "T_out": 4096, - "pp1000t_prompt": 10_000, - "pp1000t_generated": 30_000, # $30.00 / 1M tokens (2024 may) + "pp1000t_prompt": 5_000, + "pp1000t_generated": 15_000, # $15.00 / 1M tokens "filter_caps": ["chat", "tools", "completion"], }, - "gpt-3.5-turbo": { + "gpt-4o-2024-08-06": { "backend": "litellm", "provider": "openai", - "tokenizer_path": "Xenova/gpt-3.5-turbo-16k", - "resolve_as": "gpt-3.5-turbo-1106", - "T": 16_000, + "tokenizer_path": "Xenova/gpt-4o", + "resolve_as": "gpt-4o-2024-08-06", + "T": 128_000, "T_out": 4096, - "pp1000t_prompt": 1000, - "pp1000t_generated": 2000, + "pp1000t_prompt": 2_500, + "pp1000t_generated": 10_000, # $15.00 / 1M tokens + "filter_caps": ["chat", "tools", "completion"] + }, + "gpt-4o-mini": { + "backend": "litellm", + "provider": "openai", + "tokenizer_path": "Xenova/gpt-4o", + "resolve_as": "gpt-4o-mini-2024-07-18", + "T": 128_000, + "T_out": 4096, + "pp1000t_prompt": 150, + "pp1000t_generated": 600, # $0.60 / 1M tokens "filter_caps": ["chat", "tools", "completion"], }, + + # Anthropic models "claude-3-5-sonnet": { "backend": "litellm", "provider": "anthropic", @@ -78,39 +92,6 @@ "pp1000t_generated": 15_000, "filter_caps": ["chat", "tools", "completion"], }, - "gpt-4o-2024-05-13": { - "backend": "litellm", - "provider": "openai", - "tokenizer_path": "Xenova/gpt-4o", - "resolve_as": "gpt-4o-2024-05-13", - "T": 128_000, - "T_out": 4096, - "pp1000t_prompt": 5_000, - "pp1000t_generated": 15_000, # $15.00 / 1M tokens - "filter_caps": ["chat", "tools", "completion"], - }, - "gpt-4o-2024-08-06": { - "backend": "litellm", - "provider": "openai", - "tokenizer_path": "Xenova/gpt-4o", - "resolve_as": "gpt-4o-2024-08-06", - "T": 128_000, - "T_out": 4096, - "pp1000t_prompt": 2_500, - "pp1000t_generated": 10_000, # $15.00 / 1M tokens - "filter_caps": ["chat", "tools", "completion"] - }, - "gpt-4o-mini": { - "backend": "litellm", - "provider": "openai", - "tokenizer_path": "Xenova/gpt-4o", - "resolve_as": "gpt-4o-mini-2024-07-18", - "T": 128_000, - "T_out": 4096, - "pp1000t_prompt": 150, - "pp1000t_generated": 600, # $0.60 / 1M tokens - "filter_caps": ["chat", "tools", "completion"], - }, "claude-3-5-sonnet-20241022": { "backend": "litellm", "provider": "anthropic", @@ -122,6 +103,8 @@ "pp1000t_generated": 15_000, # $15.00 / 1M tokens (2024 oct) "filter_caps": ["chat", "tools", "completion"], }, + + # Groq models "groq-llama-3.1-8b": { "backend": "litellm", "provider": "groq", @@ -188,6 +171,8 @@ "pp1000t_generated": 600, # TODO: don't know the price "filter_caps": ["chat", "completion"], }, + + # Cerebras models "cerebras-llama3.1-8b": { "backend": "litellm", "provider": "cerebras", From 47dd9fa2765054071139a5ba86a44a161cd164e5 Mon Sep 17 00:00:00 2001 From: mitya Date: Fri, 20 Dec 2024 15:26:45 +0100 Subject: [PATCH 2/5] do not double models --- refact_webgui/webgui/selfhost_fastapi_completions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py index 6fbfa0ec..47e39125 100644 --- a/refact_webgui/webgui/selfhost_fastapi_completions.py +++ b/refact_webgui/webgui/selfhost_fastapi_completions.py @@ -270,7 +270,7 @@ def _caps_base_data(self) -> Dict[str, Any]: "telemetry_basic_dest": "/stats/telemetry-basic", "telemetry_corrected_snippets_dest": "/stats/telemetry-snippets", "telemetry_basic_retrieve_my_own": "/stats/rh-stats", - "running_models": [r for r in [*running['completion'], *running['chat']]], + "running_models": list(set(r for r in [*running['completion'], *running['chat']])), "code_completion_default_model": code_completion_default_model, "multiline_code_completion_default_model": multiline_code_completion_default_model, "code_chat_default_model": code_chat_default_model, From a675fefd8adcb9b2d3933a11d8f4b8e039bfb338 Mon Sep 17 00:00:00 2001 From: mitya Date: Fri, 20 Dec 2024 15:38:48 +0100 Subject: [PATCH 3/5] local models first --- refact_utils/finetune/utils.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/refact_utils/finetune/utils.py b/refact_utils/finetune/utils.py index 69558235..51874c03 100644 --- a/refact_utils/finetune/utils.py +++ b/refact_utils/finetune/utils.py @@ -103,6 +103,20 @@ def _add_results_for_passthrough_provider(provider: str) -> None: if 'completion' in v.get('filter_caps', []): result['completion'].append(k) + for k, v in data.get("model_assign", {}).items(): + if model_dict := [d for d in data['models'] if d['name'] == k]: + model_dict = model_dict[0] + + add_result(k, model_dict) + + if not model_dict.get('has_finetune'): + continue + + finetune_info = model_dict.get('finetune_info', []) or [] + for run in finetune_info: + val = f"{k}:{run['run_id']}:{run['checkpoint']}" + add_result(val, model_dict) + if data.get("openai_api_enable"): _add_results_for_passthrough_provider('openai') @@ -121,20 +135,6 @@ def _add_results_for_passthrough_provider(provider: str) -> None: if data.get('xai_api_enable'): _add_results_for_passthrough_provider('xai') - for k, v in data.get("model_assign", {}).items(): - if model_dict := [d for d in data['models'] if d['name'] == k]: - model_dict = model_dict[0] - - add_result(k, model_dict) - - if not model_dict.get('has_finetune'): - continue - - finetune_info = model_dict.get('finetune_info', []) or [] - for run in finetune_info: - val = f"{k}:{run['run_id']}:{run['checkpoint']}" - add_result(val, model_dict) - return result From dbe93a88f3b5b51475411fe62d1ecfae5f52b8b7 Mon Sep 17 00:00:00 2001 From: mitya Date: Mon, 23 Dec 2024 15:23:40 +0100 Subject: [PATCH 4/5] caps version logic for 3rdparty --- .../webgui/selfhost_fastapi_completions.py | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py index 47e39125..9625a1cc 100644 --- a/refact_webgui/webgui/selfhost_fastapi_completions.py +++ b/refact_webgui/webgui/selfhost_fastapi_completions.py @@ -455,6 +455,8 @@ async def _generate_embeddings(self, account: str, inputs: Union[str, List[str]] async def _embeddings_style_openai(self, post: EmbeddingsStyleOpenAI, authorization: str = Header(None)): account = await self._account_from_bearer(authorization) + # TODO: we'll implement caps_version logic later + data = [ { "embedding": res["embedding"], @@ -511,6 +513,8 @@ def compose_usage_dict(model_dict, prompt_tokens_n, generated_tokens_n) -> Dict[ return usage_dict _account = await self._account_from_bearer(authorization) + caps_version = self._caps_version + messages = [] for m in (i.dict() for i in post.messages): # drop tool_calls if empty, otherwise litellm tokenizing won't work @@ -519,6 +523,16 @@ def compose_usage_dict(model_dict, prompt_tokens_n, generated_tokens_n) -> Dict[ messages.append(m) prefix, postfix = "data: ", "\n\n" + + def _patch_caps_version(data: Dict) -> Dict: + return { + **data, + "caps_version": caps_version, + } + + def _wrap_output(output: str) -> str: + return prefix + output + postfix + model_dict = self._model_assigner.models_db_with_passthrough.get(post.model, {}) async def litellm_streamer(): @@ -546,19 +560,19 @@ async def litellm_streamer(): except json.JSONDecodeError: data = {"choices": [{"finish_reason": finish_reason}]} - yield prefix + json.dumps(data) + postfix + yield _wrap_output(json.dumps(_patch_caps_version(data))) final_msg = {"choices": []} usage_dict = compose_usage_dict(model_dict, prompt_tokens_n, generated_tokens_n) final_msg.update(usage_dict) - yield prefix + json.dumps(final_msg) + postfix + yield _wrap_output(json.dumps(_patch_caps_version(final_msg))) # NOTE: DONE needed by refact-lsp server - yield prefix + "[DONE]" + postfix + yield _wrap_output("[DONE]") except BaseException as e: err_msg = f"litellm error (1): {e}" log(err_msg) - yield prefix + json.dumps({"error": err_msg}) + postfix + yield _wrap_output(json.dumps(_patch_caps_version({"error": err_msg}))) async def litellm_non_streamer(): generated_tokens_n = 0 @@ -613,11 +627,11 @@ async def chat_completion_streamer(): data["choices"][0]["finish_reason"] = None except json.JSONDecodeError: data = {"choices": [{"finish_reason": finish_reason}]} - yield prefix + json.dumps(data) + postfix + yield _wrap_output(json.dumps(_patch_caps_version(data))) except aiohttp.ClientConnectorError as e: err_msg = f"LSP server is not ready yet: {e}" log(err_msg) - yield prefix + json.dumps({"error": err_msg}) + postfix + yield _wrap_output(json.dumps(_patch_caps_version({"error": err_msg}))) if model_dict.get('backend') == 'litellm': model_name = model_dict.get('resolve_as', post.model) @@ -629,6 +643,7 @@ async def chat_completion_streamer(): prompt_tokens_n += litellm.token_counter(model_name, text=json.dumps(post.tools)) response_streamer = litellm_streamer() if post.stream else litellm_non_streamer() else: + # TODO: unused refact-lsp logic, remove ASAP response_streamer = chat_completion_streamer() return StreamingResponse(response_streamer, media_type="text/event-stream") From e720dfa6cc3ff03e4466e9d47fc77c9f479073bc Mon Sep 17 00:00:00 2001 From: mitya Date: Mon, 23 Dec 2024 15:34:05 +0100 Subject: [PATCH 5/5] litellm non streamer patch with caps version --- refact_webgui/webgui/selfhost_fastapi_completions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py index 9625a1cc..8388bedb 100644 --- a/refact_webgui/webgui/selfhost_fastapi_completions.py +++ b/refact_webgui/webgui/selfhost_fastapi_completions.py @@ -598,11 +598,11 @@ async def litellm_non_streamer(): data.update(usage_dict) except json.JSONDecodeError: data = {"choices": [{"finish_reason": finish_reason}]} - yield json.dumps(data) + yield json.dumps(_patch_caps_version(data)) except BaseException as e: err_msg = f"litellm error (2): {e}" log(err_msg) - yield json.dumps({"error": err_msg}) + yield json.dumps(_patch_caps_version({"error": err_msg})) async def chat_completion_streamer(): post_url = "http://127.0.0.1:8001/v1/chat"