diff --git a/presets/inference/text-generation/api_spec.json b/presets/inference/text-generation/api_spec.json
index 480fa97e4..d6448d15d 100644
--- a/presets/inference/text-generation/api_spec.json
+++ b/presets/inference/text-generation/api_spec.json
@@ -5,60 +5,96 @@
         "version": "0.1.0"
     },
     "paths": {
-        "/": {
+        "/health": {
             "get": {
-                "summary": "Home Endpoint",
-                "description": "A simple endpoint that indicates the server is running.\nNo parameters are required. Returns a message indicating the server status.",
-                "operationId": "home__get",
+                "summary": "Health",
+                "description": "Health check.",
+                "operationId": "health_health_get",
                 "responses": {
                     "200": {
                         "description": "Successful Response",
                         "content": {
                             "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/HomeResponse"
-                                }
+                                "schema": {}
                             }
                         }
                     }
                 }
             }
         },
-        "/healthz": {
-            "get": {
-                "summary": "Health Check Endpoint",
-                "operationId": "health_check_healthz_get",
+        "/tokenize": {
+            "post": {
+                "summary": "Tokenize",
+                "operationId": "tokenize_tokenize_post",
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "anyOf": [
+                                    {
+                                        "$ref": "#/components/schemas/TokenizeCompletionRequest"
+                                    },
+                                    {
+                                        "$ref": "#/components/schemas/TokenizeChatRequest"
+                                    }
+                                ],
+                                "title": "Request"
+                            }
+                        }
+                    },
+                    "required": true
+                },
                 "responses": {
                     "200": {
                         "description": "Successful Response",
+                        "content": {
+                            "application/json": {
+                                "schema": {}
+                            }
+                        }
+                    },
+                    "422": {
+                        "description": "Validation Error",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/HealthStatus"
-                                },
-                                "example": {
-                                    "status": "Healthy"
+                                    "$ref": "#/components/schemas/HTTPValidationError"
                                 }
                             }
                         }
+                    }
+                }
+            }
+        },
+        "/detokenize": {
+            "post": {
+                "summary": "Detokenize",
+                "operationId": "detokenize_detokenize_post",
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/DetokenizeRequest"
+                            }
+                        }
                     },
-                    "500": {
-                        "description": "Error Response",
+                    "required": true
+                },
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
                         "content": {
                             "application/json": {
-                                "examples": {
-                                    "model_uninitialized": {
-                                        "summary": "Model not initialized",
-                                        "value": {
-                                            "detail": "Model not initialized"
-                                        }
-                                    },
-                                    "pipeline_uninitialized": {
-                                        "summary": "Pipeline not initialized",
-                                        "value": {
-                                            "detail": "Pipeline not initialized"
-                                        }
-                                    }
+                                "schema": {}
+                            }
+                        }
+                    },
+                    "422": {
+                        "description": "Validation Error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/HTTPValidationError"
                                 }
                             }
                         }
@@ -66,75 +102,47 @@
                 }
             }
         },
-        "/chat": {
+        "/v1/models": {
+            "get": {
+                "summary": "Show Available Models",
+                "operationId": "show_available_models_v1_models_get",
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
+                        "content": {
+                            "application/json": {
+                                "schema": {}
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/version": {
+            "get": {
+                "summary": "Show Version",
+                "operationId": "show_version_version_get",
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
+                        "content": {
+                            "application/json": {
+                                "schema": {}
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/chat/completions": {
             "post": {
-                "summary": "Chat Endpoint",
-                "description": "Processes chat requests, generating text based on the specified pipeline (text generation or conversational).\nValidates required parameters based on the pipeline and returns the generated text.",
-                "operationId": "generate_text_chat_post",
+                "summary": "Create Chat Completion",
+                "operationId": "create_chat_completion_v1_chat_completions_post",
                 "requestBody": {
                     "content": {
                         "application/json": {
                             "schema": {
-                                "$ref": "#/components/schemas/UnifiedRequestModel"
-                            },
-                            "examples": {
-                                "text_generation_example": {
-                                    "summary": "Text Generation Example",
-                                    "description": "An example of a text generation request.",
-                                    "value": {
-                                        "prompt": "Tell me a joke",
-                                        "return_full_text": true,
-                                        "clean_up_tokenization_spaces": false,
-                                        "generate_kwargs": {
-                                            "max_length": 200,
-                                            "min_length": 0,
-                                            "do_sample": true,
-                                            "early_stopping": false,
-                                            "num_beams": 1,
-                                            "temperature": 1,
-                                            "top_k": 10,
-                                            "top_p": 1,
-                                            "typical_p": 1,
-                                            "repetition_penalty": 1,
-                                            "eos_token_id": 11
-                                        }
-                                    }
-                                },
-                                "conversation_example": {
-                                    "summary": "Conversation Example",
-                                    "description": "An example of a conversational request.",
-                                    "value": {
-                                        "messages": [
-                                            {
-                                                "role": "user",
-                                                "content": "What is your favourite condiment?"
-                                            },
-                                            {
-                                                "role": "assistant",
-                                                "content": "Well, im quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever im cooking up in the kitchen!"
-                                            },
-                                            {
-                                                "role": "user",
-                                                "content": "Do you have mayonnaise recipes?"
-                                            }
-                                        ],
-                                        "return_full_text": true,
-                                        "clean_up_tokenization_spaces": false,
-                                        "generate_kwargs": {
-                                            "max_length": 200,
-                                            "min_length": 0,
-                                            "do_sample": true,
-                                            "early_stopping": false,
-                                            "num_beams": 1,
-                                            "temperature": 1,
-                                            "top_k": 10,
-                                            "top_p": 1,
-                                            "typical_p": 1,
-                                            "repetition_penalty": 1,
-                                            "eos_token_id": 11
-                                        }
-                                    }
-                                }
+                                "$ref": "#/components/schemas/ChatCompletionRequest"
                             }
                         }
                     },
@@ -145,47 +153,45 @@
                         "description": "Successful Response",
                         "content": {
                             "application/json": {
-                                "schema": {},
-                                "examples": {
-                                    "text_generation": {
-                                        "summary": "Text Generation Response",
-                                        "value": {
-                                            "Result": "Generated text based on the prompt."
-                                        }
-                                    },
-                                    "conversation": {
-                                        "summary": "Conversation Response",
-                                        "value": {
-                                            "Result": "Response to the last message in the conversation."
-                                        }
-                                    }
-                                }
+                                "schema": {}
                             }
                         }
                     },
-                    "400": {
+                    "422": {
                         "description": "Validation Error",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/ErrorResponse"
-                                },
-                                "examples": {
-                                    "missing_prompt": {
-                                        "summary": "Missing Prompt",
-                                        "value": {
-                                            "detail": "Text generation parameter prompt required"
-                                        }
-                                    },
-                                    "missing_messages": {
-                                        "summary": "Missing Messages",
-                                        "value": {
-                                            "detail": "Conversational parameter messages required"
-                                        }
-                                    }
+                                    "$ref": "#/components/schemas/HTTPValidationError"
                                 }
                             }
                         }
+                    }
+                }
+            }
+        },
+        "/v1/completions": {
+            "post": {
+                "summary": "Create Completion",
+                "operationId": "create_completion_v1_completions_post",
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/CompletionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                },
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
+                        "content": {
+                            "application/json": {
+                                "schema": {}
+                            }
+                        }
                     },
                     "422": {
                         "description": "Validation Error",
@@ -196,13 +202,39 @@
                                 }
                             }
                         }
+                    }
+                }
+            }
+        },
+        "/v1/embeddings": {
+            "post": {
+                "summary": "Create Embedding",
+                "operationId": "create_embedding_v1_embeddings_post",
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/EmbeddingRequest"
+                            }
+                        }
                     },
-                    "500": {
-                        "description": "Internal Server Error",
+                    "required": true
+                },
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
+                        "content": {
+                            "application/json": {
+                                "schema": {}
+                            }
+                        }
+                    },
+                    "422": {
+                        "description": "Validation Error",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/ErrorResponse"
+                                    "$ref": "#/components/schemas/HTTPValidationError"
                                 }
                             }
                         }
@@ -210,61 +242,36 @@
                 }
             }
         },
-        "/metrics": {
+        "/healthz": {
             "get": {
-                "summary": "Metrics Endpoint",
-                "description": "Provides system metrics, including GPU details if available, or CPU and memory usage otherwise.\nUseful for monitoring the resource utilization of the server running the ML models.",
-                "operationId": "get_metrics_metrics_get",
+                "summary": "Health Check Endpoint",
+                "description": "Health check.",
+                "operationId": "health_check_healthz_get",
                 "responses": {
                     "200": {
                         "description": "Successful Response",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/MetricsResponse"
+                                    "$ref": "#/components/schemas/HealthStatus"
                                 },
-                                "examples": {
-                                    "gpu_metrics": {
-                                        "summary": "Example when GPUs are available",
-                                        "value": {
-                                            "gpu_info": [
-                                                {
-                                                    "id": "GPU-1234",
-                                                    "name": "GeForce GTX 950",
-                                                    "load": "25.00%",
-                                                    "temperature": "55 C",
-                                                    "memory": {
-                                                        "used": "1.00 GB",
-                                                        "total": "2.00 GB"
-                                                    }
-                                                }
-                                            ]
-                                        }
-                                    },
-                                    "cpu_metrics": {
-                                        "summary": "Example when only CPU is available",
-                                        "value": {
-                                            "cpu_info": {
-                                                "load_percentage": 20,
-                                                "physical_cores": 4,
-                                                "total_cores": 8,
-                                                "memory": {
-                                                    "used": "4.00 GB",
-                                                    "total": "16.00 GB"
-                                                }
-                                            }
-                                        }
-                                    }
+                                "example": {
+                                    "status": "Healthy"
                                 }
                             }
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
+                        "description": "Error Response",
                         "content": {
                             "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ErrorResponse"
+                                "examples": {
+                                    "model_uninitialized": {
+                                        "summary": "Model not initialized",
+                                        "value": {
+                                            "detail": "Model not initialized"
+                                        }
+                                    }
                                 }
                             }
                         }
@@ -275,148 +282,1661 @@
     },
     "components": {
         "schemas": {
-            "CPUInfo": {
+            "AudioURL": {
                 "properties": {
-                    "load_percentage": {
-                        "type": "number",
-                        "title": "Load Percentage"
+                    "url": {
+                        "type": "string",
+                        "title": "Url"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "url"
+                ],
+                "title": "AudioURL"
+            },
+            "BaseModel": {
+                "properties": {},
+                "type": "object",
+                "title": "BaseModel"
+            },
+            "ChatCompletionAssistantMessageParam": {
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "enum": [
+                            "assistant"
+                        ],
+                        "const": "assistant",
+                        "title": "Role"
                     },
-                    "physical_cores": {
-                        "type": "integer",
-                        "title": "Physical Cores"
+                    "content": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "items": {
+                                    "anyOf": [
+                                        {
+                                            "$ref": "#/components/schemas/ChatCompletionContentPartTextParam"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ChatCompletionContentPartRefusalParam"
+                                        }
+                                    ]
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Content"
                     },
-                    "total_cores": {
-                        "type": "integer",
-                        "title": "Total Cores"
+                    "function_call": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/FunctionCall"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    },
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    },
+                    "refusal": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Refusal"
                     },
-                    "memory": {
-                        "$ref": "#/components/schemas/MemoryInfo"
+                    "tool_calls": {
+                        "items": {
+                            "$ref": "#/components/schemas/ChatCompletionMessageToolCallParam"
+                        },
+                        "type": "array",
+                        "title": "Tool Calls"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "load_percentage",
-                    "physical_cores",
-                    "total_cores",
-                    "memory"
+                    "role"
                 ],
-                "title": "CPUInfo"
+                "title": "ChatCompletionAssistantMessageParam"
             },
-            "ErrorResponse": {
+            "ChatCompletionContentPartAudioParam": {
                 "properties": {
-                    "detail": {
+                    "audio_url": {
+                        "$ref": "#/components/schemas/AudioURL"
+                    },
+                    "type": {
                         "type": "string",
-                        "title": "Detail"
+                        "enum": [
+                            "audio_url"
+                        ],
+                        "const": "audio_url",
+                        "title": "Type"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "detail"
+                    "audio_url",
+                    "type"
                 ],
-                "title": "ErrorResponse"
+                "title": "ChatCompletionContentPartAudioParam"
             },
-            "GPUInfo": {
+            "ChatCompletionContentPartImageParam": {
                 "properties": {
-                    "id": {
-                        "type": "string",
-                        "title": "Id"
-                    },
-                    "name": {
-                        "type": "string",
-                        "title": "Name"
-                    },
-                    "load": {
-                        "type": "string",
-                        "title": "Load"
+                    "image_url": {
+                        "$ref": "#/components/schemas/ImageURL"
                     },
-                    "temperature": {
+                    "type": {
                         "type": "string",
-                        "title": "Temperature"
-                    },
-                    "memory": {
-                        "$ref": "#/components/schemas/MemoryInfo"
+                        "enum": [
+                            "image_url"
+                        ],
+                        "const": "image_url",
+                        "title": "Type"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "id",
-                    "name",
-                    "load",
-                    "temperature",
-                    "memory"
+                    "image_url",
+                    "type"
                 ],
-                "title": "GPUInfo"
+                "title": "ChatCompletionContentPartImageParam"
             },
-            "GenerateKwargs": {
+            "ChatCompletionContentPartRefusalParam": {
                 "properties": {
-                    "max_length": {
-                        "type": "integer",
-                        "title": "Max Length",
-                        "default": 200
-                    },
-                    "min_length": {
-                        "type": "integer",
-                        "title": "Min Length",
-                        "default": 0
+                    "refusal": {
+                        "type": "string",
+                        "title": "Refusal"
                     },
-                    "do_sample": {
-                        "type": "boolean",
-                        "title": "Do Sample",
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "refusal"
+                        ],
+                        "const": "refusal",
+                        "title": "Type"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "refusal",
+                    "type"
+                ],
+                "title": "ChatCompletionContentPartRefusalParam"
+            },
+            "ChatCompletionContentPartTextParam": {
+                "properties": {
+                    "text": {
+                        "type": "string",
+                        "title": "Text"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "text"
+                        ],
+                        "const": "text",
+                        "title": "Type"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "text",
+                    "type"
+                ],
+                "title": "ChatCompletionContentPartTextParam"
+            },
+            "ChatCompletionFunctionMessageParam": {
+                "properties": {
+                    "content": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Content"
+                    },
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    },
+                    "role": {
+                        "type": "string",
+                        "enum": [
+                            "function"
+                        ],
+                        "const": "function",
+                        "title": "Role"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "content",
+                    "name",
+                    "role"
+                ],
+                "title": "ChatCompletionFunctionMessageParam"
+            },
+            "ChatCompletionMessageToolCallParam": {
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "title": "Id"
+                    },
+                    "function": {
+                        "$ref": "#/components/schemas/Function"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "function"
+                        ],
+                        "const": "function",
+                        "title": "Type"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "id",
+                    "function",
+                    "type"
+                ],
+                "title": "ChatCompletionMessageToolCallParam"
+            },
+            "ChatCompletionNamedFunction": {
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "name"
+                ],
+                "title": "ChatCompletionNamedFunction"
+            },
+            "ChatCompletionNamedToolChoiceParam": {
+                "properties": {
+                    "function": {
+                        "$ref": "#/components/schemas/ChatCompletionNamedFunction"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "function"
+                        ],
+                        "const": "function",
+                        "title": "Type",
+                        "default": "function"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "function"
+                ],
+                "title": "ChatCompletionNamedToolChoiceParam"
+            },
+            "ChatCompletionRequest": {
+                "properties": {
+                    "messages": {
+                        "items": {
+                            "anyOf": [
+                                {
+                                    "$ref": "#/components/schemas/ChatCompletionSystemMessageParam"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/ChatCompletionUserMessageParam"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/ChatCompletionAssistantMessageParam"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/ChatCompletionToolMessageParam"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/ChatCompletionFunctionMessageParam"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/CustomChatCompletionMessageParam"
+                                }
+                            ]
+                        },
+                        "type": "array",
+                        "title": "Messages"
+                    },
+                    "model": {
+                        "type": "string",
+                        "title": "Model"
+                    },
+                    "frequency_penalty": {
+                        "anyOf": [
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Frequency Penalty",
+                        "default": 0.0
+                    },
+                    "logit_bias": {
+                        "anyOf": [
+                            {
+                                "additionalProperties": {
+                                    "type": "number"
+                                },
+                                "type": "object"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Logit Bias"
+                    },
+                    "logprobs": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Logprobs",
+                        "default": false
+                    },
+                    "top_logprobs": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Top Logprobs",
+                        "default": 0
+                    },
+                    "max_tokens": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Max Tokens"
+                    },
+                    "n": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "N",
+                        "default": 1
+                    },
+                    "presence_penalty": {
+                        "anyOf": [
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Presence Penalty",
+                        "default": 0.0
+                    },
+                    "response_format": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/ResponseFormat"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    },
+                    "seed": {
+                        "anyOf": [
+                            {
+                                "type": "integer",
+                                "maximum": 9.223372036854776e+18,
+                                "minimum": -9.223372036854776e+18
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Seed"
+                    },
+                    "stop": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Stop"
+                    },
+                    "stream": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Stream",
+                        "default": false
+                    },
+                    "stream_options": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/StreamOptions"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    },
+                    "temperature": {
+                        "anyOf": [
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Temperature",
+                        "default": 0.7
+                    },
+                    "top_p": {
+                        "anyOf": [
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Top P",
+                        "default": 1.0
+                    },
+                    "tools": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "$ref": "#/components/schemas/ChatCompletionToolsParam"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Tools"
+                    },
+                    "tool_choice": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "enum": [
+                                    "none"
+                                ],
+                                "const": "none"
+                            },
+                            {
+                                "type": "string",
+                                "enum": [
+                                    "auto"
+                                ],
+                                "const": "auto"
+                            },
+                            {
+                                "$ref": "#/components/schemas/ChatCompletionNamedToolChoiceParam"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Tool Choice",
+                        "default": "none"
+                    },
+                    "parallel_tool_calls": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Parallel Tool Calls",
+                        "default": false
+                    },
+                    "user": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "User"
+                    },
+                    "best_of": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Best Of"
+                    },
+                    "use_beam_search": {
+                        "type": "boolean",
+                        "title": "Use Beam Search",
+                        "default": false
+                    },
+                    "top_k": {
+                        "type": "integer",
+                        "title": "Top K",
+                        "default": -1
+                    },
+                    "min_p": {
+                        "type": "number",
+                        "title": "Min P",
+                        "default": 0.0
+                    },
+                    "repetition_penalty": {
+                        "type": "number",
+                        "title": "Repetition Penalty",
+                        "default": 1.0
+                    },
+                    "length_penalty": {
+                        "type": "number",
+                        "title": "Length Penalty",
+                        "default": 1.0
+                    },
+                    "stop_token_ids": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "integer"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Stop Token Ids"
+                    },
+                    "include_stop_str_in_output": {
+                        "type": "boolean",
+                        "title": "Include Stop Str In Output",
+                        "default": false
+                    },
+                    "ignore_eos": {
+                        "type": "boolean",
+                        "title": "Ignore Eos",
+                        "default": false
+                    },
+                    "min_tokens": {
+                        "type": "integer",
+                        "title": "Min Tokens",
+                        "default": 0
+                    },
+                    "skip_special_tokens": {
+                        "type": "boolean",
+                        "title": "Skip Special Tokens",
+                        "default": true
+                    },
+                    "spaces_between_special_tokens": {
+                        "type": "boolean",
+                        "title": "Spaces Between Special Tokens",
+                        "default": true
+                    },
+                    "truncate_prompt_tokens": {
+                        "anyOf": [
+                            {
+                                "type": "integer",
+                                "minimum": 1.0
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Truncate Prompt Tokens"
+                    },
+                    "prompt_logprobs": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Prompt Logprobs"
+                    },
+                    "echo": {
+                        "type": "boolean",
+                        "title": "Echo",
+                        "description": "If true, the new message will be prepended with the last message if they belong to the same role.",
+                        "default": false
+                    },
+                    "add_generation_prompt": {
+                        "type": "boolean",
+                        "title": "Add Generation Prompt",
+                        "description": "If true, the generation prompt will be added to the chat template. This is a parameter used by chat template in tokenizer config of the model.",
+                        "default": true
+                    },
+                    "continue_final_message": {
+                        "type": "boolean",
+                        "title": "Continue Final Message",
+                        "description": "If this is set, the chat will be formatted so that the final message in the chat is open-ended, without any EOS tokens. The model will continue this message rather than starting a new one. This allows you to \"prefill\" part of the model's response for it. Cannot be used at the same time as `add_generation_prompt`.",
+                        "default": false
+                    },
+                    "add_special_tokens": {
+                        "type": "boolean",
+                        "title": "Add Special Tokens",
+                        "description": "If true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).",
+                        "default": false
+                    },
+                    "documents": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "additionalProperties": {
+                                        "type": "string"
+                                    },
+                                    "type": "object"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Documents",
+                        "description": "A list of dicts representing documents that will be accessible to the model if it is performing RAG (retrieval-augmented generation). If the template does not support RAG, this argument will have no effect. We recommend that each document should be a dict containing \"title\" and \"text\" keys."
+                    },
+                    "chat_template": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Chat Template",
+                        "description": "A Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one."
+                    },
+                    "chat_template_kwargs": {
+                        "anyOf": [
+                            {
+                                "type": "object"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Chat Template Kwargs",
+                        "description": "Additional kwargs to pass to the template renderer. Will be accessible by the chat template."
+                    },
+                    "guided_json": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "object"
+                            },
+                            {
+                                "$ref": "#/components/schemas/BaseModel"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Guided Json",
+                        "description": "If specified, the output will follow the JSON schema."
+                    },
+                    "guided_regex": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Guided Regex",
+                        "description": "If specified, the output will follow the regex pattern."
+                    },
+                    "guided_choice": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Guided Choice",
+                        "description": "If specified, the output will be exactly one of the choices."
+                    },
+                    "guided_grammar": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Guided Grammar",
+                        "description": "If specified, the output will follow the context free grammar."
+                    },
+                    "guided_decoding_backend": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Guided Decoding Backend",
+                        "description": "If specified, will override the default guided decoding backend of the server for this specific request. If set, must be either 'outlines' / 'lm-format-enforcer'"
+                    },
+                    "guided_whitespace_pattern": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Guided Whitespace Pattern",
+                        "description": "If specified, will override the default whitespace pattern for guided json decoding."
+                    },
+                    "priority": {
+                        "type": "integer",
+                        "title": "Priority",
+                        "description": "The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.",
+                        "default": 0
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "messages",
+                    "model"
+                ],
+                "title": "ChatCompletionRequest"
+            },
+            "ChatCompletionSystemMessageParam": {
+                "properties": {
+                    "content": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "items": {
+                                    "$ref": "#/components/schemas/ChatCompletionContentPartTextParam"
+                                },
+                                "type": "array"
+                            }
+                        ],
+                        "title": "Content"
+                    },
+                    "role": {
+                        "type": "string",
+                        "enum": [
+                            "system"
+                        ],
+                        "const": "system",
+                        "title": "Role"
+                    },
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "content",
+                    "role"
+                ],
+                "title": "ChatCompletionSystemMessageParam"
+            },
+            "ChatCompletionToolMessageParam": {
+                "properties": {
+                    "content": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "items": {
+                                    "$ref": "#/components/schemas/ChatCompletionContentPartTextParam"
+                                },
+                                "type": "array"
+                            }
+                        ],
+                        "title": "Content"
+                    },
+                    "role": {
+                        "type": "string",
+                        "enum": [
+                            "tool"
+                        ],
+                        "const": "tool",
+                        "title": "Role"
+                    },
+                    "tool_call_id": {
+                        "type": "string",
+                        "title": "Tool Call Id"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "content",
+                    "role",
+                    "tool_call_id"
+                ],
+                "title": "ChatCompletionToolMessageParam"
+            },
+            "ChatCompletionToolsParam": {
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "function"
+                        ],
+                        "const": "function",
+                        "title": "Type",
+                        "default": "function"
+                    },
+                    "function": {
+                        "$ref": "#/components/schemas/FunctionDefinition"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "function"
+                ],
+                "title": "ChatCompletionToolsParam"
+            },
+            "ChatCompletionUserMessageParam": {
+                "properties": {
+                    "content": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "items": {
+                                    "anyOf": [
+                                        {
+                                            "$ref": "#/components/schemas/ChatCompletionContentPartTextParam"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ChatCompletionContentPartImageParam"
+                                        }
+                                    ]
+                                },
+                                "type": "array"
+                            }
+                        ],
+                        "title": "Content"
+                    },
+                    "role": {
+                        "type": "string",
+                        "enum": [
+                            "user"
+                        ],
+                        "const": "user",
+                        "title": "Role"
+                    },
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "content",
+                    "role"
+                ],
+                "title": "ChatCompletionUserMessageParam"
+            },
+            "CompletionRequest": {
+                "properties": {
+                    "model": {
+                        "type": "string",
+                        "title": "Model"
+                    },
+                    "prompt": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "integer"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "items": {
+                                    "items": {
+                                        "type": "integer"
+                                    },
+                                    "type": "array"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            }
+                        ],
+                        "title": "Prompt"
+                    },
+                    "best_of": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Best Of"
+                    },
+                    "echo": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Echo",
+                        "default": false
+                    },
+                    "frequency_penalty": {
+                        "anyOf": [
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Frequency Penalty",
+                        "default": 0.0
+                    },
+                    "logit_bias": {
+                        "anyOf": [
+                            {
+                                "additionalProperties": {
+                                    "type": "number"
+                                },
+                                "type": "object"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Logit Bias"
+                    },
+                    "logprobs": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Logprobs"
+                    },
+                    "max_tokens": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Max Tokens",
+                        "default": 16
+                    },
+                    "n": {
+                        "type": "integer",
+                        "title": "N",
+                        "default": 1
+                    },
+                    "presence_penalty": {
+                        "anyOf": [
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Presence Penalty",
+                        "default": 0.0
+                    },
+                    "seed": {
+                        "anyOf": [
+                            {
+                                "type": "integer",
+                                "maximum": 9.223372036854776e+18,
+                                "minimum": -9.223372036854776e+18
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Seed"
+                    },
+                    "stop": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Stop"
+                    },
+                    "stream": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Stream",
+                        "default": false
+                    },
+                    "stream_options": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/StreamOptions"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    },
+                    "suffix": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Suffix"
+                    },
+                    "temperature": {
+                        "anyOf": [
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Temperature",
+                        "default": 1.0
+                    },
+                    "top_p": {
+                        "anyOf": [
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Top P",
+                        "default": 1.0
+                    },
+                    "user": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "User"
+                    },
+                    "use_beam_search": {
+                        "type": "boolean",
+                        "title": "Use Beam Search",
+                        "default": false
+                    },
+                    "top_k": {
+                        "type": "integer",
+                        "title": "Top K",
+                        "default": -1
+                    },
+                    "min_p": {
+                        "type": "number",
+                        "title": "Min P",
+                        "default": 0.0
+                    },
+                    "repetition_penalty": {
+                        "type": "number",
+                        "title": "Repetition Penalty",
+                        "default": 1.0
+                    },
+                    "length_penalty": {
+                        "type": "number",
+                        "title": "Length Penalty",
+                        "default": 1.0
+                    },
+                    "stop_token_ids": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "integer"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Stop Token Ids"
+                    },
+                    "include_stop_str_in_output": {
+                        "type": "boolean",
+                        "title": "Include Stop Str In Output",
+                        "default": false
+                    },
+                    "ignore_eos": {
+                        "type": "boolean",
+                        "title": "Ignore Eos",
+                        "default": false
+                    },
+                    "min_tokens": {
+                        "type": "integer",
+                        "title": "Min Tokens",
+                        "default": 0
+                    },
+                    "skip_special_tokens": {
+                        "type": "boolean",
+                        "title": "Skip Special Tokens",
+                        "default": true
+                    },
+                    "spaces_between_special_tokens": {
+                        "type": "boolean",
+                        "title": "Spaces Between Special Tokens",
+                        "default": true
+                    },
+                    "truncate_prompt_tokens": {
+                        "anyOf": [
+                            {
+                                "type": "integer",
+                                "minimum": 1.0
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Truncate Prompt Tokens"
+                    },
+                    "allowed_token_ids": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "integer"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Allowed Token Ids"
+                    },
+                    "prompt_logprobs": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Prompt Logprobs"
+                    },
+                    "add_special_tokens": {
+                        "type": "boolean",
+                        "title": "Add Special Tokens",
+                        "description": "If true (the default), special tokens (e.g. BOS) will be added to the prompt.",
                         "default": true
                     },
-                    "early_stopping": {
-                        "type": "boolean",
-                        "title": "Early Stopping",
-                        "default": false
+                    "response_format": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/ResponseFormat"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "description": "Similar to chat completion, this parameter specifies the format of output. Only {'type': 'json_object'} or {'type': 'text' } is supported."
+                    },
+                    "guided_json": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "object"
+                            },
+                            {
+                                "$ref": "#/components/schemas/BaseModel"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Guided Json",
+                        "description": "If specified, the output will follow the JSON schema."
+                    },
+                    "guided_regex": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Guided Regex",
+                        "description": "If specified, the output will follow the regex pattern."
+                    },
+                    "guided_choice": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Guided Choice",
+                        "description": "If specified, the output will be exactly one of the choices."
+                    },
+                    "guided_grammar": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Guided Grammar",
+                        "description": "If specified, the output will follow the context free grammar."
+                    },
+                    "guided_decoding_backend": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Guided Decoding Backend",
+                        "description": "If specified, will override the default guided decoding backend of the server for this specific request. If set, must be one of 'outlines' / 'lm-format-enforcer'"
+                    },
+                    "guided_whitespace_pattern": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Guided Whitespace Pattern",
+                        "description": "If specified, will override the default whitespace pattern for guided json decoding."
+                    },
+                    "priority": {
+                        "type": "integer",
+                        "title": "Priority",
+                        "description": "The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.",
+                        "default": 0
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "model",
+                    "prompt"
+                ],
+                "title": "CompletionRequest"
+            },
+            "CustomChatCompletionContentPartParam": {
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "title": "Type"
+                    }
+                },
+                "additionalProperties": true,
+                "type": "object",
+                "required": [
+                    "type"
+                ],
+                "title": "CustomChatCompletionContentPartParam"
+            },
+            "CustomChatCompletionMessageParam": {
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "title": "Role"
+                    },
+                    "content": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "items": {
+                                    "anyOf": [
+                                        {
+                                            "$ref": "#/components/schemas/ChatCompletionContentPartTextParam"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ChatCompletionContentPartImageParam"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ChatCompletionContentPartAudioParam"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ChatCompletionContentPartRefusalParam"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/CustomChatCompletionContentPartParam"
+                                        }
+                                    ]
+                                },
+                                "type": "array"
+                            }
+                        ],
+                        "title": "Content"
                     },
-                    "num_beams": {
-                        "type": "integer",
-                        "title": "Num Beams",
-                        "default": 1
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
                     },
-                    "temperature": {
-                        "type": "number",
-                        "title": "Temperature",
-                        "default": 1
+                    "tool_call_id": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Tool Call Id"
                     },
-                    "top_k": {
-                        "type": "integer",
-                        "title": "Top K",
-                        "default": 10
+                    "tool_calls": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "$ref": "#/components/schemas/ChatCompletionMessageToolCallParam"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Tool Calls"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "role"
+                ],
+                "title": "CustomChatCompletionMessageParam",
+                "description": "Enables custom roles in the Chat Completion API."
+            },
+            "DetokenizeRequest": {
+                "properties": {
+                    "model": {
+                        "type": "string",
+                        "title": "Model"
                     },
-                    "top_p": {
-                        "type": "number",
-                        "title": "Top P",
-                        "default": 1
+                    "tokens": {
+                        "items": {
+                            "type": "integer"
+                        },
+                        "type": "array",
+                        "title": "Tokens"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "model",
+                    "tokens"
+                ],
+                "title": "DetokenizeRequest"
+            },
+            "EmbeddingRequest": {
+                "properties": {
+                    "model": {
+                        "type": "string",
+                        "title": "Model"
                     },
-                    "typical_p": {
-                        "type": "number",
-                        "title": "Typical P",
-                        "default": 1
+                    "input": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "integer"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "items": {
+                                    "items": {
+                                        "type": "integer"
+                                    },
+                                    "type": "array"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            }
+                        ],
+                        "title": "Input"
                     },
-                    "repetition_penalty": {
-                        "type": "number",
-                        "title": "Repetition Penalty",
-                        "default": 1
+                    "encoding_format": {
+                        "type": "string",
+                        "enum": [
+                            "float",
+                            "base64"
+                        ],
+                        "title": "Encoding Format",
+                        "default": "float"
                     },
-                    "pad_token_id": {
-                        "type": "integer",
-                        "title": "Pad Token Id"
+                    "dimensions": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Dimensions"
+                    },
+                    "user": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "User"
+                    },
+                    "truncate_prompt_tokens": {
+                        "anyOf": [
+                            {
+                                "type": "integer",
+                                "minimum": 1.0
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Truncate Prompt Tokens"
+                    },
+                    "additional_data": {
+                        "anyOf": [
+                            {},
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Additional Data"
                     },
-                    "eos_token_id": {
+                    "priority": {
                         "type": "integer",
-                        "title": "Eos Token Id",
-                        "default": 11
+                        "title": "Priority",
+                        "description": "The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.",
+                        "default": 0
                     }
                 },
+                "additionalProperties": false,
                 "type": "object",
-                "title": "GenerateKwargs",
-                "example": {
-                    "max_length": 200,
-                    "temperature": 0.7,
-                    "top_p": 0.9,
-                    "additional_param": "Example value"
-                }
+                "required": [
+                    "model",
+                    "input"
+                ],
+                "title": "EmbeddingRequest"
+            },
+            "Function": {
+                "properties": {
+                    "arguments": {
+                        "type": "string",
+                        "title": "Arguments"
+                    },
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "arguments",
+                    "name"
+                ],
+                "title": "Function"
+            },
+            "FunctionCall": {
+                "properties": {
+                    "arguments": {
+                        "type": "string",
+                        "title": "Arguments"
+                    },
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "arguments",
+                    "name"
+                ],
+                "title": "FunctionCall"
+            },
+            "FunctionDefinition": {
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    },
+                    "description": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Description"
+                    },
+                    "parameters": {
+                        "anyOf": [
+                            {
+                                "type": "object"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Parameters"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "name"
+                ],
+                "title": "FunctionDefinition"
             },
             "HTTPValidationError": {
                 "properties": {
@@ -445,121 +1965,214 @@
                 ],
                 "title": "HealthStatus"
             },
-            "HomeResponse": {
+            "ImageURL": {
                 "properties": {
-                    "message": {
+                    "url": {
+                        "type": "string",
+                        "title": "Url"
+                    },
+                    "detail": {
                         "type": "string",
-                        "title": "Message",
-                        "example": "Server is running"
+                        "enum": [
+                            "auto",
+                            "low",
+                            "high"
+                        ],
+                        "title": "Detail"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "message"
+                    "url"
                 ],
-                "title": "HomeResponse"
+                "title": "ImageURL"
             },
-            "MemoryInfo": {
+            "JsonSchemaResponseFormat": {
                 "properties": {
-                    "used": {
+                    "name": {
                         "type": "string",
-                        "title": "Used"
+                        "title": "Name"
                     },
-                    "total": {
-                        "type": "string",
-                        "title": "Total"
+                    "description": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Description"
+                    },
+                    "schema": {
+                        "anyOf": [
+                            {
+                                "type": "object"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Schema"
+                    },
+                    "strict": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Strict"
                     }
                 },
+                "additionalProperties": false,
                 "type": "object",
                 "required": [
-                    "used",
-                    "total"
+                    "name"
                 ],
-                "title": "MemoryInfo"
+                "title": "JsonSchemaResponseFormat"
             },
-            "Message": {
+            "ResponseFormat": {
                 "properties": {
-                    "role": {
+                    "type": {
                         "type": "string",
-                        "title": "Role"
+                        "enum": [
+                            "text",
+                            "json_object",
+                            "json_schema"
+                        ],
+                        "title": "Type"
                     },
-                    "content": {
-                        "type": "string",
-                        "title": "Content"
+                    "json_schema": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/JsonSchemaResponseFormat"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
                     }
                 },
+                "additionalProperties": false,
                 "type": "object",
                 "required": [
-                    "role",
-                    "content"
+                    "type"
                 ],
-                "title": "Message"
+                "title": "ResponseFormat"
             },
-            "MetricsResponse": {
+            "StreamOptions": {
                 "properties": {
-                    "gpu_info": {
-                        "items": {
-                            "$ref": "#/components/schemas/GPUInfo"
-                        },
-                        "type": "array",
-                        "title": "Gpu Info"
+                    "include_usage": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Include Usage",
+                        "default": true
                     },
-                    "cpu_info": {
-                        "$ref": "#/components/schemas/CPUInfo"
+                    "continuous_usage_stats": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Continuous Usage Stats",
+                        "default": true
                     }
                 },
+                "additionalProperties": false,
                 "type": "object",
-                "title": "MetricsResponse"
+                "title": "StreamOptions"
             },
-            "UnifiedRequestModel": {
+            "TokenizeChatRequest": {
                 "properties": {
-                    "prompt": {
+                    "model": {
                         "type": "string",
-                        "title": "Prompt",
-                        "description": "Prompt for text generation. Required for text-generation pipeline. Do not use with 'messages'."
+                        "title": "Model"
+                    },
+                    "messages": {
+                        "items": {
+                            "anyOf": [
+                                {
+                                    "$ref": "#/components/schemas/ChatCompletionSystemMessageParam"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/ChatCompletionUserMessageParam"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/ChatCompletionAssistantMessageParam"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/ChatCompletionToolMessageParam"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/ChatCompletionFunctionMessageParam"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/CustomChatCompletionMessageParam"
+                                }
+                            ]
+                        },
+                        "type": "array",
+                        "title": "Messages"
                     },
-                    "return_full_text": {
+                    "add_generation_prompt": {
                         "type": "boolean",
-                        "title": "Return Full Text",
-                        "description": "Return full text if True, else only added text",
+                        "title": "Add Generation Prompt",
                         "default": true
                     },
-                    "clean_up_tokenization_spaces": {
+                    "continue_final_message": {
                         "type": "boolean",
-                        "title": "Clean Up Tokenization Spaces",
-                        "description": "Clean up extra spaces in text output",
+                        "title": "Continue Final Message",
                         "default": false
                     },
-                    "prefix": {
+                    "add_special_tokens": {
+                        "type": "boolean",
+                        "title": "Add Special Tokens",
+                        "default": false
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "model",
+                    "messages"
+                ],
+                "title": "TokenizeChatRequest"
+            },
+            "TokenizeCompletionRequest": {
+                "properties": {
+                    "model": {
                         "type": "string",
-                        "title": "Prefix",
-                        "description": "Prefix added to prompt"
+                        "title": "Model"
                     },
-                    "handle_long_generation": {
+                    "prompt": {
                         "type": "string",
-                        "title": "Handle Long Generation",
-                        "description": "Strategy to handle long generation"
-                    },
-                    "generate_kwargs": {
-                        "allOf": [
-                            {
-                                "$ref": "#/components/schemas/GenerateKwargs"
-                            }
-                        ],
-                        "title": "Generate Kwargs",
-                        "description": "Additional kwargs for generate method"
+                        "title": "Prompt"
                     },
-                    "messages": {
-                        "items": {
-                            "$ref": "#/components/schemas/Message"
-                        },
-                        "type": "array",
-                        "title": "Messages",
-                        "description": "Messages for conversational model. Required for conversational pipeline. Do not use with 'prompt'."
+                    "add_special_tokens": {
+                        "type": "boolean",
+                        "title": "Add Special Tokens",
+                        "default": true
                     }
                 },
+                "additionalProperties": false,
                 "type": "object",
-                "title": "UnifiedRequestModel"
+                "required": [
+                    "model",
+                    "prompt"
+                ],
+                "title": "TokenizeCompletionRequest"
             },
             "ValidationError": {
                 "properties": {
diff --git a/presets/inference/text-generation/inference_api.py b/presets/inference/text-generation/inference_api.py
index b9381e220..fa04c5d0d 100644
--- a/presets/inference/text-generation/inference_api.py
+++ b/presets/inference/text-generation/inference_api.py
@@ -461,7 +461,7 @@ def get_metrics():
         if torch.cuda.is_available():
             gpus = GPUtil.getGPUs()
             gpu_info = [GPUInfo(
-                id=gpu.id,
+                id=str(gpu.id),
                 name=gpu.name,
                 load=f"{gpu.load * 100:.2f}%",
                 temperature=f"{gpu.temperature} C",
diff --git a/presets/inference/text-generation/inference_api_vllm.py b/presets/inference/text-generation/inference_api_vllm.py
new file mode 100644
index 000000000..865bb82d7
--- /dev/null
+++ b/presets/inference/text-generation/inference_api_vllm.py
@@ -0,0 +1,98 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+import logging
+import os
+import asyncio
+
+import uvloop
+from pydantic import BaseModel, Field
+from fastapi import Request, HTTPException
+
+from vllm.utils import FlexibleArgumentParser
+import vllm.entrypoints.openai.api_server as api_server
+
+# Initialize logger
+logger = logging.getLogger(__name__)
+debug_mode = os.environ.get('DEBUG_MODE', 'false').lower() == 'true'
+logging.basicConfig(level=logging.DEBUG if debug_mode else logging.INFO)
+
+class HealthStatus(BaseModel):
+    status: str = Field(..., example="Healthy")
+
+@api_server.router.get("/healthz",
+                       response_model=HealthStatus,
+                       summary="Health Check Endpoint",
+                       responses={
+                           200: {
+                               "description": "Successful Response",
+                               "content": {
+                                   "application/json": {
+                                       "example": {
+                                           "status": "Healthy"
+                                       }
+                                   }
+                               }
+                           },
+                           500: {
+                               "description": "Error Response",
+                               "content": {
+                                   "application/json": {
+                                       "examples": {
+                                           "model_uninitialized": {
+                                               "summary":
+                                               "Model not initialized",
+                                               "value": {
+                                                   "detail":
+                                                   "Model not initialized"
+                                               }
+                                           }
+                                       }
+                                   }
+                               }
+                           }
+                       })
+async def health_check(raw_request: Request) -> HealthStatus:
+    """Health check."""
+    try:
+        await asyncio.wait_for(api_server.engine_client(raw_request).check_health(), timeout=1.0)
+        return {"status": "Healthy"}
+    except asyncio.TimeoutError:
+        raise HTTPException(status_code=500, detail="Model not initialized")
+
+
+def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
+    local_rank = int(os.environ.get("LOCAL_RANK",
+                                    0))  # Default to 0 if not set
+    port = 5000 + local_rank  # Adjust port based on local rank
+
+    server_default_args = {
+        "disable-frontend-multiprocessing": False,
+        "port": port
+    }
+    parser.set_defaults(**server_default_args)
+
+    # See https://docs.vllm.ai/en/latest/models/engine_args.html for more args
+    engine_default_args = {
+        "model": "/workspace/tfs/weights",
+        "dtype": "float16",
+        "cpu-offload-gb": 0,
+        "gpu-memory-utilization": 0.9,
+        "swap-space": 4,
+        "disable_log_stats": False,
+    }
+    parser.set_defaults(**engine_default_args)
+
+    return parser
+
+
+if __name__ == "__main__":
+    parser = FlexibleArgumentParser(description='vLLM serving server')
+    parser = api_server.make_arg_parser(parser)
+    parser = make_arg_parser(parser)
+    args = parser.parse_args()
+
+    # Run the serving server
+    logger.info(f"Starting server on port {args.port}")
+    # See https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html for more
+    # details about serving server
+    uvloop.run(api_server.run_server(args))
diff --git a/presets/inference/text-generation/requirements.txt b/presets/inference/text-generation/requirements.txt
index 2530fed6a..0e5f6e4d8 100644
--- a/presets/inference/text-generation/requirements.txt
+++ b/presets/inference/text-generation/requirements.txt
@@ -2,6 +2,7 @@
 
 # Core Dependencies
 transformers==4.41.2
+vllm==0.6.3
 torch==2.2.0
 accelerate==0.30.1
 fastapi>=0.111.0,<0.112.0  # Allow patch updates