diff --git a/output/schema/schema.json b/output/schema/schema.json index 685d65e04b..b35e34c095 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -8929,6 +8929,86 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.18.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Perform chat completion inference", + "docId": "inference-api-chat-completion", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/chat-completion-inference-api.html", + "name": "inference.chat_completion_unified", + "request": { + "name": "Request", + "namespace": "inference.chat_completion_unified" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.chat_completion_unified" + }, + "responseMediaType": [ + "text/event-stream" + ], + "urls": [ + { + "methods": [ + "POST" + ], + "path": "/_inference/chat_completion/{inference_id}/_unified" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.11.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Perform completion inference on the service", + "docId": "inference-api-post", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html", + "name": "inference.completion", + "request": { + "name": "Request", + "namespace": "inference.completion" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.completion" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "POST" + ], + "path": "/_inference/completion/{inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -9033,26 +9113,26 @@ "visibility": "public" } }, - "description": "Perform inference on the service.\n\nThis API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "docId": "inference-api-post", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html", - "name": "inference.inference", + "description": "Create an inference endpoint.\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "docId": "inference-api-put", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html", + "name": "inference.put", "privileges": { "cluster": [ - "monitor_inference" + "manage_inference" ] }, "request": { "name": "Request", - "namespace": "inference.inference" + "namespace": "inference.put" }, - "requestBodyRequired": false, + "requestBodyRequired": true, "requestMediaType": [ "application/json" ], "response": { "name": "Response", - "namespace": "inference.inference" + "namespace": "inference.put" }, "responseMediaType": [ "application/json" @@ -9060,13 +9140,13 @@ "urls": [ { "methods": [ - "POST" + "PUT" ], "path": "/_inference/{inference_id}" }, { "methods": [ - "POST" + "PUT" ], "path": "/_inference/{task_type}/{inference_id}" } @@ -9084,26 +9164,26 @@ "visibility": "public" } }, - "description": "Create an inference endpoint.\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "docId": "inference-api-put", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html", - "name": "inference.put", + "description": "Perform rereanking inference on the service", + "docId": "inference-api-post", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html", + "name": "inference.rerank", "privileges": { "cluster": [ - "manage_inference" + "monitor_inference" ] }, "request": { "name": "Request", - "namespace": "inference.put" + "namespace": "inference.rerank" }, - "requestBodyRequired": true, + "requestBodyRequired": false, "requestMediaType": [ "application/json" ], "response": { "name": "Response", - "namespace": "inference.put" + "namespace": "inference.rerank" }, "responseMediaType": [ "application/json" @@ -9111,15 +9191,49 @@ "urls": [ { "methods": [ - "PUT" + "POST" ], - "path": "/_inference/{inference_id}" + "path": "/_inference/rerank/{inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" }, + "stack": { + "since": "8.11.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Perform sparse embedding inference on the service", + "docId": "inference-api-post", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html", + "name": "inference.sparse_embedding", + "request": { + "name": "Request", + "namespace": "inference.sparse_embedding" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.sparse_embedding" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ { "methods": [ - "PUT" + "POST" ], - "path": "/_inference/{task_type}/{inference_id}" + "path": "/_inference/sparse_embedding/{inference_id}" } ] }, @@ -9134,7 +9248,7 @@ "description": "Perform streaming inference.\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", "docId": "inference-api-stream", "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html", - "name": "inference.stream_inference", + "name": "inference.stream_completion", "privileges": { "cluster": [ "monitor_inference" @@ -9142,7 +9256,7 @@ }, "request": { "name": "Request", - "namespace": "inference.stream_inference" + "namespace": "inference.stream_completion" }, "requestBodyRequired": false, "requestMediaType": [ @@ -9150,7 +9264,7 @@ ], "response": { "name": "Response", - "namespace": "inference.stream_inference" + "namespace": "inference.stream_completion" }, "responseMediaType": [ "text/event-stream" @@ -9160,13 +9274,7 @@ "methods": [ "POST" ], - "path": "/_inference/{inference_id}/_stream" - }, - { - "methods": [ - "POST" - ], - "path": "/_inference/{task_type}/{inference_id}/_stream" + "path": "/_inference/completion/{inference_id}/_stream" } ] }, @@ -9177,17 +9285,18 @@ "visibility": "public" }, "stack": { - "since": "8.18.0", + "since": "8.11.0", "stability": "stable", "visibility": "public" } }, - "description": "Perform inference on the service using the Unified Schema", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html", - "name": "inference.unified_inference", + "description": "Perform text embedding inference on the service", + "docId": "inference-api-post", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html", + "name": "inference.text_embedding", "request": { "name": "Request", - "namespace": "inference.unified_inference" + "namespace": "inference.text_embedding" }, "requestBodyRequired": false, "requestMediaType": [ @@ -9195,23 +9304,17 @@ ], "response": { "name": "Response", - "namespace": "inference.unified_inference" + "namespace": "inference.text_embedding" }, "responseMediaType": [ - "text/event-stream" + "application/json" ], "urls": [ { "methods": [ "POST" ], - "path": "/_inference/{inference_id}/_unified" - }, - { - "methods": [ - "POST" - ], - "path": "/_inference/{task_type}/{inference_id}/_unified" + "path": "/_inference/text_embedding/{inference_id}" } ] }, @@ -145534,6 +145637,31 @@ }, "specLocation": "indices/validate_query/IndicesValidateQueryResponse.ts#L23-L30" }, + { + "kind": "interface", + "description": "Defines the completion result.", + "name": { + "name": "CompletionInferenceResult", + "namespace": "inference._types" + }, + "properties": [ + { + "name": "completion", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "CompletionResult", + "namespace": "inference._types" + } + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L84-L89" + }, { "kind": "interface", "description": "The completion result object", @@ -145554,7 +145682,7 @@ } } ], - "specLocation": "inference/_types/Results.ts#L60-L65" + "specLocation": "inference/_types/Results.ts#L77-L82" }, { "kind": "interface", @@ -145585,7 +145713,7 @@ } } ], - "specLocation": "inference/_types/Results.ts#L91-L96" + "specLocation": "inference/_types/Results.ts#L110-L115" }, { "kind": "type_alias", @@ -145594,7 +145722,7 @@ "name": "DenseByteVector", "namespace": "inference._types" }, - "specLocation": "inference/_types/Results.ts#L40-L44", + "specLocation": "inference/_types/Results.ts#L48-L52", "type": { "kind": "array_of", "value": { @@ -145713,90 +145841,6 @@ ], "specLocation": "inference/_types/Services.ts#L41-L53" }, - { - "kind": "interface", - "description": "InferenceResult is an aggregation of mutually exclusive variants", - "name": { - "name": "InferenceResult", - "namespace": "inference._types" - }, - "properties": [ - { - "name": "text_embedding_bytes", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "TextEmbeddingByteResult", - "namespace": "inference._types" - } - } - } - }, - { - "name": "text_embedding", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "TextEmbeddingResult", - "namespace": "inference._types" - } - } - } - }, - { - "name": "sparse_embedding", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "SparseEmbeddingResult", - "namespace": "inference._types" - } - } - } - }, - { - "name": "completion", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "CompletionResult", - "namespace": "inference._types" - } - } - } - }, - { - "name": "rerank", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "RankedDocument", - "namespace": "inference._types" - } - } - } - } - ], - "specLocation": "inference/_types/Results.ts#L79-L89", - "variants": { - "kind": "container" - } - }, { "kind": "interface", "description": "The rerank result object representing a single ranked document\nid: the original index of the document in the request\nscore: the score of the document relative to the query\ntext: Optional, the text of the document, if requested", @@ -145839,7 +145883,32 @@ } } ], - "specLocation": "inference/_types/Results.ts#L67-L77" + "specLocation": "inference/_types/Results.ts#L91-L101" + }, + { + "kind": "interface", + "description": "Defines the response for a rerank request.", + "name": { + "name": "RerankedInferenceResult", + "namespace": "inference._types" + }, + "properties": [ + { + "name": "rerank", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "RankedDocument", + "namespace": "inference._types" + } + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L103-L108" }, { "kind": "type_alias", @@ -145852,6 +145921,31 @@ "kind": "user_defined_value" } }, + { + "kind": "interface", + "description": "The response format for the sparse embedding request.", + "name": { + "name": "SparseEmbeddingInferenceResult", + "namespace": "inference._types" + }, + "properties": [ + { + "name": "sparse_embedding", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "SparseEmbeddingResult", + "namespace": "inference._types" + } + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L40-L46" + }, { "kind": "interface", "name": { @@ -145953,506 +146047,78 @@ } } ], - "specLocation": "inference/_types/Results.ts#L46-L51" + "specLocation": "inference/_types/Results.ts#L54-L59" }, { "kind": "interface", - "description": "The text embedding result object", + "description": "TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants", "name": { - "name": "TextEmbeddingResult", + "name": "TextEmbeddingInferenceResult", "namespace": "inference._types" }, "properties": [ { - "name": "embedding", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "DenseVector", - "namespace": "inference._types" - } - } - } - ], - "specLocation": "inference/_types/Results.ts#L53-L58" - }, - { - "kind": "request", - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "no_body" - }, - "description": "Delete an inference endpoint", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "name": { - "name": "Request", - "namespace": "inference.delete" - }, - "path": [ - { - "description": "The task type", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } - }, - { - "description": "The inference identifier.", - "name": "inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [ - { - "description": "When true, the endpoint is not deleted and a list of ingest processors which reference this endpoint is returned.", - "name": "dry_run", - "required": false, - "serverDefault": false, - "type": { - "kind": "instance_of", - "type": { - "name": "boolean", - "namespace": "_builtins" - } - } - }, - { - "description": "When true, the inference endpoint is forcefully deleted even if it is still being used by ingest processors or semantic text fields.", - "name": "force", - "required": false, - "serverDefault": false, - "type": { - "kind": "instance_of", - "type": { - "name": "boolean", - "namespace": "_builtins" - } - } - } - ], - "specLocation": "inference/delete/DeleteRequest.ts#L24-L66" - }, - { - "kind": "response", - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "DeleteInferenceEndpointResult", - "namespace": "inference._types" - } - } - }, - "name": { - "name": "Response", - "namespace": "inference.delete" - }, - "specLocation": "inference/delete/DeleteResponse.ts#L22-L24" - }, - { - "kind": "request", - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "no_body" - }, - "description": "Get an inference endpoint", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "name": { - "name": "Request", - "namespace": "inference.get" - }, - "path": [ - { - "description": "The task type", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } - }, - { - "description": "The inference Id", - "name": "inference_id", + "name": "text_embedding_bytes", "required": false, "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/get/GetRequest.ts#L24-L56" - }, - { - "kind": "response", - "body": { - "kind": "properties", - "properties": [ - { - "name": "endpoints", - "required": true, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - } - } - ] - }, - "name": { - "name": "Response", - "namespace": "inference.get" - }, - "specLocation": "inference/get/GetResponse.ts#L22-L26" - }, - { - "kind": "request", - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The query input, which is required only for the `rerank` task.\nIt is not required for other tasks.", - "name": "query", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.", - "name": "input", - "required": true, - "type": { - "kind": "union_of", - "items": [ - { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - }, - { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - ] - } - }, - { - "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.", - "name": "task_settings", - "required": false, - "type": { + "kind": "array_of", + "value": { "kind": "instance_of", "type": { - "name": "TaskSettings", + "name": "TextEmbeddingByteResult", "namespace": "inference._types" } } } - ] - }, - "description": "Perform inference on the service.\n\nThis API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "name": { - "name": "Request", - "namespace": "inference.inference" - }, - "path": [ - { - "description": "The type of inference task that the model performs.", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } }, { - "description": "The unique identifier for the inference endpoint.", - "name": "inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [ - { - "description": "The amount of time to wait for the inference request to complete.", - "name": "timeout", - "required": false, - "serverDefault": "30s", - "type": { - "kind": "instance_of", - "type": { - "name": "Duration", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/inference/InferenceRequest.ts#L26-L89" - }, - { - "kind": "response", - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceResult", - "namespace": "inference._types" - } - } - }, - "name": { - "name": "Response", - "namespace": "inference.inference" - }, - "specLocation": "inference/inference/InferenceResponse.ts#L22-L24" - }, - { - "kind": "request", - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "value", - "codegenName": "inference_config", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpoint", - "namespace": "inference._types" - } - } - }, - "description": "Create an inference endpoint.\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "name": { - "name": "Request", - "namespace": "inference.put" - }, - "path": [ - { - "description": "The task type", - "name": "task_type", + "name": "text_embedding", "required": false, "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } - }, - { - "description": "The inference Id", - "name": "inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "TextEmbeddingResult", + "namespace": "inference._types" + } } } } ], - "query": [], - "specLocation": "inference/put/PutRequest.ts#L25-L65" - }, - { - "kind": "response", - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "name": { - "name": "Response", - "namespace": "inference.put" - }, - "specLocation": "inference/put/PutResponse.ts#L22-L24" + "specLocation": "inference/_types/Results.ts#L68-L75", + "variants": { + "kind": "container" + } }, { - "kind": "request", - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.", - "name": "input", - "required": true, - "type": { - "kind": "union_of", - "items": [ - { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - }, - { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - ] - } - } - ] - }, - "description": "Perform streaming inference.\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, + "kind": "interface", + "description": "The text embedding result object", "name": { - "name": "Request", - "namespace": "inference.stream_inference" + "name": "TextEmbeddingResult", + "namespace": "inference._types" }, - "path": [ + "properties": [ { - "description": "The unique identifier for the inference endpoint.", - "name": "inference_id", + "name": "embedding", "required": true, "type": { "kind": "instance_of", "type": { - "name": "Id", - "namespace": "_types" - } - } - }, - { - "description": "The type of task that the model performs.", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", + "name": "DenseVector", "namespace": "inference._types" } } } ], - "query": [], - "specLocation": "inference/stream_inference/StreamInferenceRequest.ts#L24-L67" - }, - { - "kind": "response", - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "StreamResult", - "namespace": "_types" - } - } - }, - "name": { - "name": "Response", - "namespace": "inference.stream_inference" - }, - "specLocation": "inference/stream_inference/StreamInferenceResponse.ts#L22-L24" + "specLocation": "inference/_types/Results.ts#L61-L66" }, { "kind": "interface", "description": "A list of tools that the model can call.", "name": { "name": "CompletionTool", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -146475,19 +146141,19 @@ "kind": "instance_of", "type": { "name": "CompletionToolFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L223-L235" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L215-L227" }, { "kind": "interface", "description": "Controls which tool is called by the model.", "name": { "name": "CompletionToolChoice", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -146510,19 +146176,19 @@ "kind": "instance_of", "type": { "name": "CompletionToolChoiceFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L186-L198" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L178-L190" }, { "kind": "interface", "description": "The tool choice function.", "name": { "name": "CompletionToolChoiceFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -146538,14 +146204,14 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L175-L184" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L167-L176" }, { "kind": "interface", "description": "The completion tool function definition.", "name": { "name": "CompletionToolFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -146593,7 +146259,7 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L200-L221" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L192-L213" }, { "kind": "type_alias", @@ -146603,9 +146269,9 @@ ], "name": { "name": "CompletionToolType", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L97-L100", + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L89-L92", "type": { "kind": "union_of", "items": [ @@ -146620,7 +146286,7 @@ "kind": "instance_of", "type": { "name": "CompletionToolChoice", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } ] @@ -146631,7 +146297,7 @@ "description": "An object style representation of a single portion of a conversation.", "name": { "name": "ContentObject", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -146659,14 +146325,14 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L102-L114" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L94-L106" }, { "kind": "interface", "description": "An object representing part of the conversation.", "name": { "name": "Message", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -146677,7 +146343,7 @@ "kind": "instance_of", "type": { "name": "MessageContent", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } }, @@ -146715,13 +146381,13 @@ "kind": "instance_of", "type": { "name": "ToolCall", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L153-L173" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L145-L165" }, { "kind": "type_alias", @@ -146731,9 +146397,9 @@ ], "name": { "name": "MessageContent", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L148-L151", + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L140-L143", "type": { "kind": "union_of", "items": [ @@ -146750,7 +146416,7 @@ "kind": "instance_of", "type": { "name": "ContentObject", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } @@ -146775,7 +146441,7 @@ "kind": "instance_of", "type": { "name": "Message", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } @@ -146839,7 +146505,7 @@ "kind": "instance_of", "type": { "name": "CompletionToolType", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } }, @@ -146853,7 +146519,7 @@ "kind": "instance_of", "type": { "name": "CompletionTool", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } @@ -146872,7 +146538,7 @@ } ] }, - "description": "Perform inference on the service using the Unified Schema", + "description": "Perform chat completion inference", "inherits": { "type": { "name": "RequestBase", @@ -146881,21 +146547,9 @@ }, "name": { "name": "Request", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "path": [ - { - "description": "The task type", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } - }, { "description": "The inference Id", "name": "inference_id", @@ -146924,7 +146578,7 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L27-L95" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L26-L87" }, { "kind": "response", @@ -146940,16 +146594,16 @@ }, "name": { "name": "Response", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/unified_inference/UnifiedResponse.ts#L22-L24" + "specLocation": "inference/chat_completion_unified/UnifiedResponse.ts#L22-L24" }, { "kind": "interface", "description": "A tool call generated by the model.", "name": { "name": "ToolCall", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -146972,7 +146626,7 @@ "kind": "instance_of", "type": { "name": "ToolCallFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } }, @@ -146989,14 +146643,14 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L130-L146" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L122-L138" }, { "kind": "interface", "description": "The function that the model called.", "name": { "name": "ToolCallFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -147024,7 +146678,792 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L116-L128" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L108-L120" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "Inference input.\nEither a string or an array of strings.", + "name": "input", + "required": true, + "type": { + "kind": "union_of", + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ] + } + }, + { + "description": "Optional task settings", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskSettings", + "namespace": "inference._types" + } + } + } + ] + }, + "description": "Perform completion inference on the service", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.completion" + }, + "path": [ + { + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "Specifies the amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", + "type": { + "kind": "instance_of", + "type": { + "name": "Duration", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/completion/CompletionRequest.ts#L25-L63" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "CompletionInferenceResult", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.completion" + }, + "specLocation": "inference/completion/CompletionResponse.ts#L22-L24" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "no_body" + }, + "description": "Delete an inference endpoint", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.delete" + }, + "path": [ + { + "description": "The task type", + "name": "task_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskType", + "namespace": "inference._types" + } + } + }, + { + "description": "The inference identifier.", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "When true, the endpoint is not deleted and a list of ingest processors which reference this endpoint is returned.", + "name": "dry_run", + "required": false, + "serverDefault": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + }, + { + "description": "When true, the inference endpoint is forcefully deleted even if it is still being used by ingest processors or semantic text fields.", + "name": "force", + "required": false, + "serverDefault": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/delete/DeleteRequest.ts#L24-L66" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "DeleteInferenceEndpointResult", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.delete" + }, + "specLocation": "inference/delete/DeleteResponse.ts#L22-L24" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "no_body" + }, + "description": "Get an inference endpoint", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.get" + }, + "path": [ + { + "description": "The task type", + "name": "task_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskType", + "namespace": "inference._types" + } + } + }, + { + "description": "The inference Id", + "name": "inference_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/get/GetRequest.ts#L24-L56" + }, + { + "kind": "response", + "body": { + "kind": "properties", + "properties": [ + { + "name": "endpoints", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + } + } + ] + }, + "name": { + "name": "Response", + "namespace": "inference.get" + }, + "specLocation": "inference/get/GetResponse.ts#L22-L26" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "value", + "codegenName": "inference_config", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpoint", + "namespace": "inference._types" + } + } + }, + "description": "Create an inference endpoint.\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put" + }, + "path": [ + { + "description": "The task type", + "name": "task_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskType", + "namespace": "inference._types" + } + } + }, + { + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put/PutRequest.ts#L25-L65" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put" + }, + "specLocation": "inference/put/PutResponse.ts#L22-L24" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "Query input.", + "name": "query", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.", + "name": "input", + "required": true, + "type": { + "kind": "union_of", + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ] + } + }, + { + "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskSettings", + "namespace": "inference._types" + } + } + } + ] + }, + "description": "Perform rereanking inference on the service", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.rerank" + }, + "path": [ + { + "description": "The unique identifier for the inference endpoint.", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "The amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", + "type": { + "kind": "instance_of", + "type": { + "name": "Duration", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/rerank/RerankRequest.ts#L25-L72" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "RerankedInferenceResult", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.rerank" + }, + "specLocation": "inference/rerank/RerankResponse.ts#L22-L24" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "Inference input.\nEither a string or an array of strings.", + "name": "input", + "required": true, + "type": { + "kind": "union_of", + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ] + } + }, + { + "description": "Optional task settings", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskSettings", + "namespace": "inference._types" + } + } + } + ] + }, + "description": "Perform sparse embedding inference on the service", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.sparse_embedding" + }, + "path": [ + { + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "Specifies the amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", + "type": { + "kind": "instance_of", + "type": { + "name": "Duration", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/sparse_embedding/SparseEmbeddingRequest.ts#L25-L63" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "SparseEmbeddingInferenceResult", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.sparse_embedding" + }, + "specLocation": "inference/sparse_embedding/SparseEmbeddingResponse.ts#L22-L24" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.", + "name": "input", + "required": true, + "type": { + "kind": "union_of", + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ] + } + }, + { + "description": "Optional task settings", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskSettings", + "namespace": "inference._types" + } + } + } + ] + }, + "description": "Perform streaming inference.\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.stream_completion" + }, + "path": [ + { + "description": "The unique identifier for the inference endpoint.", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/stream_completion/StreamInferenceRequest.ts#L24-L63" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "StreamResult", + "namespace": "_types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.stream_completion" + }, + "specLocation": "inference/stream_completion/StreamInferenceResponse.ts#L22-L24" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "Inference input.\nEither a string or an array of strings.", + "name": "input", + "required": true, + "type": { + "kind": "union_of", + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ] + } + }, + { + "description": "Optional task settings", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskSettings", + "namespace": "inference._types" + } + } + } + ] + }, + "description": "Perform text embedding inference on the service", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.text_embedding" + }, + "path": [ + { + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "Specifies the amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", + "type": { + "kind": "instance_of", + "type": { + "name": "Duration", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/text_embedding/TextEmbeddingRequest.ts#L25-L63" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "TextEmbeddingInferenceResult", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.text_embedding" + }, + "specLocation": "inference/text_embedding/TextEmbeddingResponse.ts#L22-L24" }, { "kind": "request", diff --git a/output/typescript/types.ts b/output/typescript/types.ts index c5789a62aa..ded98a90c0 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -12971,6 +12971,10 @@ export interface IndicesValidateQueryResponse { error?: string } +export interface InferenceCompletionInferenceResult { + completion: InferenceCompletionResult[] +} + export interface InferenceCompletionResult { result: string } @@ -12994,22 +12998,22 @@ export interface InferenceInferenceEndpointInfo extends InferenceInferenceEndpoi task_type: InferenceTaskType } -export interface InferenceInferenceResult { - text_embedding_bytes?: InferenceTextEmbeddingByteResult[] - text_embedding?: InferenceTextEmbeddingResult[] - sparse_embedding?: InferenceSparseEmbeddingResult[] - completion?: InferenceCompletionResult[] - rerank?: InferenceRankedDocument[] -} - export interface InferenceRankedDocument { index: integer score: float text?: string } +export interface InferenceRerankedInferenceResult { + rerank: InferenceRankedDocument[] +} + export type InferenceServiceSettings = any +export interface InferenceSparseEmbeddingInferenceResult { + sparse_embedding: InferenceSparseEmbeddingResult[] +} + export interface InferenceSparseEmbeddingResult { embedding: InferenceSparseVector } @@ -13024,125 +13028,161 @@ export interface InferenceTextEmbeddingByteResult { embedding: InferenceDenseByteVector } +export interface InferenceTextEmbeddingInferenceResult { + text_embedding_bytes?: InferenceTextEmbeddingByteResult[] + text_embedding?: InferenceTextEmbeddingResult[] +} + export interface InferenceTextEmbeddingResult { embedding: InferenceDenseVector } -export interface InferenceDeleteRequest extends RequestBase { - task_type?: InferenceTaskType - inference_id: Id - dry_run?: boolean - force?: boolean +export interface InferenceChatCompletionUnifiedCompletionTool { + type: string + function: InferenceChatCompletionUnifiedCompletionToolFunction } -export type InferenceDeleteResponse = InferenceDeleteInferenceEndpointResult +export interface InferenceChatCompletionUnifiedCompletionToolChoice { + type: string + function: InferenceChatCompletionUnifiedCompletionToolChoiceFunction +} -export interface InferenceGetRequest extends RequestBase { - task_type?: InferenceTaskType - inference_id?: Id +export interface InferenceChatCompletionUnifiedCompletionToolChoiceFunction { + name: string } -export interface InferenceGetResponse { - endpoints: InferenceInferenceEndpointInfo[] +export interface InferenceChatCompletionUnifiedCompletionToolFunction { + description?: string + name: string + parameters?: any + strict?: boolean } -export interface InferenceInferenceRequest extends RequestBase { - task_type?: InferenceTaskType +export type InferenceChatCompletionUnifiedCompletionToolType = string | InferenceChatCompletionUnifiedCompletionToolChoice + +export interface InferenceChatCompletionUnifiedContentObject { + text: string + type: string +} + +export interface InferenceChatCompletionUnifiedMessage { + content?: InferenceChatCompletionUnifiedMessageContent + role: string + tool_call_id?: Id + tool_calls?: InferenceChatCompletionUnifiedToolCall[] +} + +export type InferenceChatCompletionUnifiedMessageContent = string | InferenceChatCompletionUnifiedContentObject[] + +export interface InferenceChatCompletionUnifiedRequest extends RequestBase { inference_id: Id timeout?: Duration body?: { - query?: string - input: string | string[] - task_settings?: InferenceTaskSettings + messages: InferenceChatCompletionUnifiedMessage[] + model?: string + max_completion_tokens?: long + stop?: string[] + temperature?: float + tool_choice?: InferenceChatCompletionUnifiedCompletionToolType + tools?: InferenceChatCompletionUnifiedCompletionTool[] + top_p?: float } } -export type InferenceInferenceResponse = InferenceInferenceResult +export type InferenceChatCompletionUnifiedResponse = StreamResult -export interface InferencePutRequest extends RequestBase { - task_type?: InferenceTaskType - inference_id: Id - body?: InferenceInferenceEndpoint +export interface InferenceChatCompletionUnifiedToolCall { + id: Id + function: InferenceChatCompletionUnifiedToolCallFunction + type: string } -export type InferencePutResponse = InferenceInferenceEndpointInfo +export interface InferenceChatCompletionUnifiedToolCallFunction { + arguments: string + name: string +} -export interface InferenceStreamInferenceRequest extends RequestBase { +export interface InferenceCompletionRequest extends RequestBase { inference_id: Id - task_type?: InferenceTaskType + timeout?: Duration body?: { input: string | string[] + task_settings?: InferenceTaskSettings } } -export type InferenceStreamInferenceResponse = StreamResult +export type InferenceCompletionResponse = InferenceCompletionInferenceResult -export interface InferenceUnifiedInferenceCompletionTool { - type: string - function: InferenceUnifiedInferenceCompletionToolFunction +export interface InferenceDeleteRequest extends RequestBase { + task_type?: InferenceTaskType + inference_id: Id + dry_run?: boolean + force?: boolean } -export interface InferenceUnifiedInferenceCompletionToolChoice { - type: string - function: InferenceUnifiedInferenceCompletionToolChoiceFunction -} +export type InferenceDeleteResponse = InferenceDeleteInferenceEndpointResult -export interface InferenceUnifiedInferenceCompletionToolChoiceFunction { - name: string +export interface InferenceGetRequest extends RequestBase { + task_type?: InferenceTaskType + inference_id?: Id } -export interface InferenceUnifiedInferenceCompletionToolFunction { - description?: string - name: string - parameters?: any - strict?: boolean +export interface InferenceGetResponse { + endpoints: InferenceInferenceEndpointInfo[] } -export type InferenceUnifiedInferenceCompletionToolType = string | InferenceUnifiedInferenceCompletionToolChoice - -export interface InferenceUnifiedInferenceContentObject { - text: string - type: string +export interface InferencePutRequest extends RequestBase { + task_type?: InferenceTaskType + inference_id: Id + body?: InferenceInferenceEndpoint } -export interface InferenceUnifiedInferenceMessage { - content?: InferenceUnifiedInferenceMessageContent - role: string - tool_call_id?: Id - tool_calls?: InferenceUnifiedInferenceToolCall[] +export type InferencePutResponse = InferenceInferenceEndpointInfo + +export interface InferenceRerankRequest extends RequestBase { + inference_id: Id + timeout?: Duration + body?: { + query: string + input: string | string[] + task_settings?: InferenceTaskSettings + } } -export type InferenceUnifiedInferenceMessageContent = string | InferenceUnifiedInferenceContentObject[] +export type InferenceRerankResponse = InferenceRerankedInferenceResult -export interface InferenceUnifiedInferenceRequest extends RequestBase { - task_type?: InferenceTaskType +export interface InferenceSparseEmbeddingRequest extends RequestBase { inference_id: Id timeout?: Duration body?: { - messages: InferenceUnifiedInferenceMessage[] - model?: string - max_completion_tokens?: long - stop?: string[] - temperature?: float - tool_choice?: InferenceUnifiedInferenceCompletionToolType - tools?: InferenceUnifiedInferenceCompletionTool[] - top_p?: float + input: string | string[] + task_settings?: InferenceTaskSettings } } -export type InferenceUnifiedInferenceResponse = StreamResult +export type InferenceSparseEmbeddingResponse = InferenceSparseEmbeddingInferenceResult -export interface InferenceUnifiedInferenceToolCall { - id: Id - function: InferenceUnifiedInferenceToolCallFunction - type: string +export interface InferenceStreamCompletionRequest extends RequestBase { + inference_id: Id + body?: { + input: string | string[] + task_settings?: InferenceTaskSettings + } } -export interface InferenceUnifiedInferenceToolCallFunction { - arguments: string - name: string +export type InferenceStreamCompletionResponse = StreamResult + +export interface InferenceTextEmbeddingRequest extends RequestBase { + inference_id: Id + timeout?: Duration + body?: { + input: string | string[] + task_settings?: InferenceTaskSettings + } } +export type InferenceTextEmbeddingResponse = InferenceTextEmbeddingInferenceResult + export interface InferenceUpdateRequest extends RequestBase { inference_id: Id task_type?: InferenceTaskType diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index d1e7acb909..eca3a30e82 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -311,6 +311,7 @@ inference-api-get,https://www.elastic.co/guide/en/elasticsearch/reference/{branc inference-api-post,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html inference-api-put,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html inference-api-stream,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html +inference-api-chat-completion,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/chat-completion-inference-api.html inference-api-update,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/update-inference-api.html inference-processor,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-processor.html info-api,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/info-api.html diff --git a/specification/_json_spec/inference.chat_completion_unified.json b/specification/_json_spec/inference.chat_completion_unified.json new file mode 100644 index 0000000000..52a50c72e3 --- /dev/null +++ b/specification/_json_spec/inference.chat_completion_unified.json @@ -0,0 +1,31 @@ +{ + "inference.chat_completion_unified": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html", + "description": "Perform chat completion inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["text/event-stream"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/chat_completion/{inference_id}/_unified", + "methods": ["POST"], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/specification/_json_spec/inference.inference.json b/specification/_json_spec/inference.completion.json similarity index 55% rename from specification/_json_spec/inference.inference.json rename to specification/_json_spec/inference.completion.json index bf1282dfaa..1a1ebdcb32 100644 --- a/specification/_json_spec/inference.inference.json +++ b/specification/_json_spec/inference.completion.json @@ -1,8 +1,8 @@ { - "inference.inference": { + "inference.completion": { "documentation": { "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", - "description": "Perform inference" + "description": "Perform completion inference" }, "stability": "stable", "visibility": "public", @@ -13,7 +13,7 @@ "url": { "paths": [ { - "path": "/_inference/{inference_id}", + "path": "/_inference/completion/{inference_id}", "methods": ["POST"], "parts": { "inference_id": { @@ -21,20 +21,6 @@ "description": "The inference Id" } } - }, - { - "path": "/_inference/{task_type}/{inference_id}", - "methods": ["POST"], - "parts": { - "task_type": { - "type": "string", - "description": "The task type" - }, - "inference_id": { - "type": "string", - "description": "The inference Id" - } - } } ] }, diff --git a/specification/_json_spec/inference.rerank.json b/specification/_json_spec/inference.rerank.json new file mode 100644 index 0000000000..ac9601852a --- /dev/null +++ b/specification/_json_spec/inference.rerank.json @@ -0,0 +1,31 @@ +{ + "inference.rerank": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform reranking inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/rerank/{inference_id}", + "methods": ["POST"], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/specification/_json_spec/inference.sparse_embedding.json b/specification/_json_spec/inference.sparse_embedding.json new file mode 100644 index 0000000000..48e9748cf0 --- /dev/null +++ b/specification/_json_spec/inference.sparse_embedding.json @@ -0,0 +1,31 @@ +{ + "inference.sparse_embedding": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform sparse embedding inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/sparse_embedding/{inference_id}", + "methods": ["POST"], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/specification/_json_spec/inference.stream_inference.json b/specification/_json_spec/inference.stream_completion.json similarity index 58% rename from specification/_json_spec/inference.stream_inference.json rename to specification/_json_spec/inference.stream_completion.json index 03fa95f2ce..370fc90b6f 100644 --- a/specification/_json_spec/inference.stream_inference.json +++ b/specification/_json_spec/inference.stream_completion.json @@ -1,5 +1,5 @@ { - "inference.stream_inference": { + "inference.stream_completion": { "documentation": { "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html", "description": "Perform streaming inference" @@ -13,7 +13,7 @@ "url": { "paths": [ { - "path": "/_inference/{inference_id}/_stream", + "path": "/_inference/completion/{inference_id}/_stream", "methods": ["POST"], "parts": { "inference_id": { @@ -21,20 +21,6 @@ "description": "The inference Id" } } - }, - { - "path": "/_inference/{task_type}/{inference_id}/_stream", - "methods": ["POST"], - "parts": { - "task_type": { - "type": "string", - "description": "The task type" - }, - "inference_id": { - "type": "string", - "description": "The inference Id" - } - } } ] }, diff --git a/specification/_json_spec/inference.text_embedding.json b/specification/_json_spec/inference.text_embedding.json new file mode 100644 index 0000000000..2f7c43c38a --- /dev/null +++ b/specification/_json_spec/inference.text_embedding.json @@ -0,0 +1,31 @@ +{ + "inference.text_embedding": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform text embedding inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/text_embedding/{inference_id}", + "methods": ["POST"], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/specification/_json_spec/inference.unified_inference.json b/specification/_json_spec/inference.unified_inference.json deleted file mode 100644 index 84182d19f8..0000000000 --- a/specification/_json_spec/inference.unified_inference.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "inference.unified_inference": { - "documentation": { - "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html", - "description": "Perform inference using the Unified Schema" - }, - "stability": "stable", - "visibility": "public", - "headers": { - "accept": ["text/event-stream"], - "content_type": ["application/json"] - }, - "url": { - "paths": [ - { - "path": "/_inference/{inference_id}/_unified", - "methods": ["POST"], - "parts": { - "inference_id": { - "type": "string", - "description": "The inference Id" - } - } - }, - { - "path": "/_inference/{task_type}/{inference_id}/_unified", - "methods": ["POST"], - "parts": { - "task_type": { - "type": "string", - "description": "The task type" - }, - "inference_id": { - "type": "string", - "description": "The inference Id" - } - } - } - ] - }, - "body": { - "description": "The inference payload" - } - } -} diff --git a/specification/inference/_types/Results.ts b/specification/inference/_types/Results.ts index 1a35289bab..25c53ded85 100644 --- a/specification/inference/_types/Results.ts +++ b/specification/inference/_types/Results.ts @@ -37,6 +37,14 @@ export class SparseEmbeddingResult { embedding: SparseVector } +/** + * The response format for the sparse embedding request. + */ +export class SparseEmbeddingInferenceResult { + // TODO should we make this optional if we ever support multiple encoding types? So we can make it a variant + sparse_embedding: Array +} + /** * Text Embedding results containing bytes are represented as Dense * Vectors of bytes. @@ -57,6 +65,15 @@ export class TextEmbeddingResult { embedding: DenseVector } +/** + * TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants + * @variants container + */ +export class TextEmbeddingInferenceResult { + text_embedding_bytes?: Array + text_embedding?: Array +} + /** * The completion result object */ @@ -64,6 +81,13 @@ export class CompletionResult { result: string } +/** + * Defines the completion result. + */ +export class CompletionInferenceResult { + completion: Array +} + /** * The rerank result object representing a single ranked document * id: the original index of the document in the request @@ -77,15 +101,10 @@ export class RankedDocument { } /** - * InferenceResult is an aggregation of mutually exclusive variants - * @variants container + * Defines the response for a rerank request. */ -export class InferenceResult { - text_embedding_bytes?: Array - text_embedding?: Array - sparse_embedding?: Array - completion?: Array - rerank?: Array +export class RerankedInferenceResult { + rerank: Array } /** diff --git a/specification/inference/unified_inference/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts similarity index 92% rename from specification/inference/unified_inference/UnifiedRequest.ts rename to specification/inference/chat_completion_unified/UnifiedRequest.ts index 268b543ebc..84fba28208 100644 --- a/specification/inference/unified_inference/UnifiedRequest.ts +++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts @@ -17,7 +17,6 @@ * under the License. */ -import { TaskType } from '@inference/_types/TaskType' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' import { RequestBase } from '@_types/Base' import { Id } from '@_types/common' @@ -25,27 +24,20 @@ import { float, long } from '@_types/Numeric' import { Duration } from '@_types/Time' /** - * Perform inference on the service using the Unified Schema - * @rest_spec_name inference.unified_inference + * Perform chat completion inference + * @rest_spec_name inference.chat_completion_unified * @availability stack since=8.18.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public + * @doc_id inference-api-chat-completion */ export interface Request extends RequestBase { urls: [ { - path: '/_inference/{inference_id}/_unified' - methods: ['POST'] - }, - { - path: '/_inference/{task_type}/{inference_id}/_unified' + path: '/_inference/chat_completion/{inference_id}/_unified' methods: ['POST'] } ] path_parts: { - /** - * The task type - */ - task_type?: TaskType /** * The inference Id */ diff --git a/specification/inference/unified_inference/UnifiedResponse.ts b/specification/inference/chat_completion_unified/UnifiedResponse.ts similarity index 100% rename from specification/inference/unified_inference/UnifiedResponse.ts rename to specification/inference/chat_completion_unified/UnifiedResponse.ts diff --git a/specification/inference/completion/CompletionRequest.ts b/specification/inference/completion/CompletionRequest.ts new file mode 100644 index 0000000000..cbcf938a15 --- /dev/null +++ b/specification/inference/completion/CompletionRequest.ts @@ -0,0 +1,63 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { TaskSettings } from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { Duration } from '@_types/Time' + +/** + * Perform completion inference on the service + * @rest_spec_name inference.completion + * @availability stack since=8.11.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @doc_id inference-api-post + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/completion/{inference_id}' + methods: ['POST'] + } + ] + path_parts: { + /** + * The inference Id + */ + inference_id: Id + } + query_parameters: { + /** + * Specifies the amount of time to wait for the inference request to complete. + * @server_default 30s + */ + timeout?: Duration + } + body: { + /** + * Inference input. + * Either a string or an array of strings. + */ + input: string | Array + /** + * Optional task settings + */ + task_settings?: TaskSettings + } +} diff --git a/specification/inference/completion/CompletionResponse.ts b/specification/inference/completion/CompletionResponse.ts new file mode 100644 index 0000000000..f852232d0f --- /dev/null +++ b/specification/inference/completion/CompletionResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { CompletionInferenceResult } from '@inference/_types/Results' + +export class Response { + body: CompletionInferenceResult +} diff --git a/specification/inference/inference/InferenceRequest.ts b/specification/inference/rerank/RerankRequest.ts similarity index 61% rename from specification/inference/inference/InferenceRequest.ts rename to specification/inference/rerank/RerankRequest.ts index c646fd5356..666356025f 100644 --- a/specification/inference/inference/InferenceRequest.ts +++ b/specification/inference/rerank/RerankRequest.ts @@ -18,21 +18,13 @@ */ import { TaskSettings } from '@inference/_types/Services' -import { TaskType } from '@inference/_types/TaskType' import { RequestBase } from '@_types/Base' import { Id } from '@_types/common' import { Duration } from '@_types/Time' /** - * Perform inference on the service. - * - * This API enables you to use machine learning models to perform specific tasks on data that you provide as an input. - * It returns a response with the results of the tasks. - * The inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API. - * - * > info - * > The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs. - * @rest_spec_name inference.inference + * Perform rereanking inference on the service + * @rest_spec_name inference.rerank * @availability stack since=8.11.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public * @cluster_privileges monitor_inference @@ -41,19 +33,11 @@ import { Duration } from '@_types/Time' export interface Request extends RequestBase { urls: [ { - path: '/_inference/{inference_id}' - methods: ['POST'] - }, - { - path: '/_inference/{task_type}/{inference_id}' + path: '/_inference/rerank/{inference_id}' methods: ['POST'] } ] path_parts: { - /** - * The type of inference task that the model performs. - */ - task_type?: TaskType /** * The unique identifier for the inference endpoint. */ @@ -68,10 +52,9 @@ export interface Request extends RequestBase { } body: { /** - * The query input, which is required only for the `rerank` task. - * It is not required for other tasks. + * Query input. */ - query?: string + query: string /** * The text on which you want to perform the inference task. * It can be a single string or an array. diff --git a/specification/inference/inference/InferenceResponse.ts b/specification/inference/rerank/RerankResponse.ts similarity index 89% rename from specification/inference/inference/InferenceResponse.ts rename to specification/inference/rerank/RerankResponse.ts index 842d9a4f27..f0a4b48a67 100644 --- a/specification/inference/inference/InferenceResponse.ts +++ b/specification/inference/rerank/RerankResponse.ts @@ -17,8 +17,8 @@ * under the License. */ -import { InferenceResult } from '@inference/_types/Results' +import { RerankedInferenceResult } from '@inference/_types/Results' export class Response { - body: InferenceResult + body: RerankedInferenceResult } diff --git a/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts b/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts new file mode 100644 index 0000000000..90ce321816 --- /dev/null +++ b/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts @@ -0,0 +1,63 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { TaskSettings } from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { Duration } from '@_types/Time' + +/** + * Perform sparse embedding inference on the service + * @rest_spec_name inference.sparse_embedding + * @availability stack since=8.11.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @doc_id inference-api-post + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/sparse_embedding/{inference_id}' + methods: ['POST'] + } + ] + path_parts: { + /** + * The inference Id + */ + inference_id: Id + } + query_parameters: { + /** + * Specifies the amount of time to wait for the inference request to complete. + * @server_default 30s + */ + timeout?: Duration + } + body: { + /** + * Inference input. + * Either a string or an array of strings. + */ + input: string | Array + /** + * Optional task settings + */ + task_settings?: TaskSettings + } +} diff --git a/specification/inference/sparse_embedding/SparseEmbeddingResponse.ts b/specification/inference/sparse_embedding/SparseEmbeddingResponse.ts new file mode 100644 index 0000000000..3a7e0de59a --- /dev/null +++ b/specification/inference/sparse_embedding/SparseEmbeddingResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { SparseEmbeddingInferenceResult } from '@inference/_types/Results' + +export class Response { + body: SparseEmbeddingInferenceResult +} diff --git a/specification/inference/stream_inference/StreamInferenceRequest.ts b/specification/inference/stream_completion/StreamInferenceRequest.ts similarity index 88% rename from specification/inference/stream_inference/StreamInferenceRequest.ts rename to specification/inference/stream_completion/StreamInferenceRequest.ts index 0bb675c5fb..ae83157ffb 100644 --- a/specification/inference/stream_inference/StreamInferenceRequest.ts +++ b/specification/inference/stream_completion/StreamInferenceRequest.ts @@ -17,7 +17,7 @@ * under the License. */ -import { TaskType } from '@inference/_types/TaskType' +import { TaskSettings } from '@inference/_types/Services' import { RequestBase } from '@_types/Base' import { Id } from '@_types/common' @@ -29,7 +29,7 @@ import { Id } from '@_types/common' * IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs. * * This API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming. - * @rest_spec_name inference.stream_inference + * @rest_spec_name inference.stream_completion * @availability stack since=8.16.0 stability=stable visibility=public * @cluster_privileges monitor_inference * @doc_id inference-api-stream @@ -37,11 +37,7 @@ import { Id } from '@_types/common' export interface Request extends RequestBase { urls: [ { - path: '/_inference/{inference_id}/_stream' - methods: ['POST'] - }, - { - path: '/_inference/{task_type}/{inference_id}/_stream' + path: '/_inference/completion/{inference_id}/_stream' methods: ['POST'] } ] @@ -50,10 +46,6 @@ export interface Request extends RequestBase { * The unique identifier for the inference endpoint. */ inference_id: Id - /** - * The type of task that the model performs. - */ - task_type?: TaskType } body: { /** @@ -63,5 +55,9 @@ export interface Request extends RequestBase { * NOTE: Inference endpoints for the completion task type currently only support a single string as input. */ input: string | string[] + /** + * Optional task settings + */ + task_settings?: TaskSettings } } diff --git a/specification/inference/stream_inference/StreamInferenceResponse.ts b/specification/inference/stream_completion/StreamInferenceResponse.ts similarity index 100% rename from specification/inference/stream_inference/StreamInferenceResponse.ts rename to specification/inference/stream_completion/StreamInferenceResponse.ts diff --git a/specification/inference/stream_inference/examples/request/StreamInferenceRequestExample1.yaml b/specification/inference/stream_completion/examples/request/StreamInferenceRequestExample1.yaml similarity index 100% rename from specification/inference/stream_inference/examples/request/StreamInferenceRequestExample1.yaml rename to specification/inference/stream_completion/examples/request/StreamInferenceRequestExample1.yaml diff --git a/specification/inference/text_embedding/TextEmbeddingRequest.ts b/specification/inference/text_embedding/TextEmbeddingRequest.ts new file mode 100644 index 0000000000..f707cb997e --- /dev/null +++ b/specification/inference/text_embedding/TextEmbeddingRequest.ts @@ -0,0 +1,63 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { TaskSettings } from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { Duration } from '@_types/Time' + +/** + * Perform text embedding inference on the service + * @rest_spec_name inference.text_embedding + * @availability stack since=8.11.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @doc_id inference-api-post + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/text_embedding/{inference_id}' + methods: ['POST'] + } + ] + path_parts: { + /** + * The inference Id + */ + inference_id: Id + } + query_parameters: { + /** + * Specifies the amount of time to wait for the inference request to complete. + * @server_default 30s + */ + timeout?: Duration + } + body: { + /** + * Inference input. + * Either a string or an array of strings. + */ + input: string | Array + /** + * Optional task settings + */ + task_settings?: TaskSettings + } +} diff --git a/specification/inference/text_embedding/TextEmbeddingResponse.ts b/specification/inference/text_embedding/TextEmbeddingResponse.ts new file mode 100644 index 0000000000..c5cb85bd66 --- /dev/null +++ b/specification/inference/text_embedding/TextEmbeddingResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { TextEmbeddingInferenceResult } from '@inference/_types/Results' + +export class Response { + body: TextEmbeddingInferenceResult +}