From 687063f01ac7f276ba6d43708c80d00bd5c66425 Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Thu, 16 Jan 2025 15:49:12 -0500 Subject: [PATCH 1/5] Refactoring inference endpoints --- .../inference.chat_completion_inference.json | 31 +++++++++ .../_json_spec/inference.completion.json | 31 +++++++++ .../_json_spec/inference.rerank.json | 31 +++++++++ .../inference.sparse_embedding.json | 31 +++++++++ ....json => inference.stream_completion.json} | 16 +---- .../_json_spec/inference.text_embedding.json | 31 +++++++++ .../inference.unified_inference.json | 45 ------------- specification/inference/_types/Results.ts | 31 +++++++++ .../UnifiedRequest.ts | 11 +--- .../UnifiedResponse.ts | 0 .../inference/completion/CompletionRequest.ts | 62 +++++++++++++++++ .../completion/CompletionResponse.ts | 24 +++++++ .../inference/rerank/RerankRequest.ts | 66 +++++++++++++++++++ .../inference/rerank/RerankResponse.ts | 24 +++++++ .../SparseEmbeddingRequest.ts | 62 +++++++++++++++++ .../SparseEmbeddingResponse.ts | 24 +++++++ .../StreamInferenceRequest.ts | 16 ++--- .../StreamInferenceResponse.ts | 0 .../StreamInferenceRequestExample1.yaml | 0 .../text_embedding/TextEmbeddingRequest.ts | 62 +++++++++++++++++ .../text_embedding/TextEmbeddingResponse.ts | 24 +++++++ 21 files changed, 542 insertions(+), 80 deletions(-) create mode 100644 specification/_json_spec/inference.chat_completion_inference.json create mode 100644 specification/_json_spec/inference.completion.json create mode 100644 specification/_json_spec/inference.rerank.json create mode 100644 specification/_json_spec/inference.sparse_embedding.json rename specification/_json_spec/{inference.stream_inference.json => inference.stream_completion.json} (64%) create mode 100644 specification/_json_spec/inference.text_embedding.json delete mode 100644 specification/_json_spec/inference.unified_inference.json rename specification/inference/{unified_inference => chat_completion_unified}/UnifiedRequest.ts (94%) rename specification/inference/{unified_inference => chat_completion_unified}/UnifiedResponse.ts (100%) create mode 100644 specification/inference/completion/CompletionRequest.ts create mode 100644 specification/inference/completion/CompletionResponse.ts create mode 100644 specification/inference/rerank/RerankRequest.ts create mode 100644 specification/inference/rerank/RerankResponse.ts create mode 100644 specification/inference/sparse_embedding/SparseEmbeddingRequest.ts create mode 100644 specification/inference/sparse_embedding/SparseEmbeddingResponse.ts rename specification/inference/{stream_inference => stream_completion}/StreamInferenceRequest.ts (90%) rename specification/inference/{stream_inference => stream_completion}/StreamInferenceResponse.ts (100%) rename specification/inference/{stream_inference => stream_completion}/examples/request/StreamInferenceRequestExample1.yaml (100%) create mode 100644 specification/inference/text_embedding/TextEmbeddingRequest.ts create mode 100644 specification/inference/text_embedding/TextEmbeddingResponse.ts diff --git a/specification/_json_spec/inference.chat_completion_inference.json b/specification/_json_spec/inference.chat_completion_inference.json new file mode 100644 index 0000000000..52a50c72e3 --- /dev/null +++ b/specification/_json_spec/inference.chat_completion_inference.json @@ -0,0 +1,31 @@ +{ + "inference.chat_completion_unified": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html", + "description": "Perform chat completion inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["text/event-stream"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/chat_completion/{inference_id}/_unified", + "methods": ["POST"], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/specification/_json_spec/inference.completion.json b/specification/_json_spec/inference.completion.json new file mode 100644 index 0000000000..d3e9de4b5d --- /dev/null +++ b/specification/_json_spec/inference.completion.json @@ -0,0 +1,31 @@ +{ + "inference.inference": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform completion inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/completion/{inference_id}", + "methods": ["POST"], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/specification/_json_spec/inference.rerank.json b/specification/_json_spec/inference.rerank.json new file mode 100644 index 0000000000..802dbd211e --- /dev/null +++ b/specification/_json_spec/inference.rerank.json @@ -0,0 +1,31 @@ +{ + "inference.inference": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform reranking inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/rerank/{inference_id}", + "methods": ["POST"], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/specification/_json_spec/inference.sparse_embedding.json b/specification/_json_spec/inference.sparse_embedding.json new file mode 100644 index 0000000000..6f540fa625 --- /dev/null +++ b/specification/_json_spec/inference.sparse_embedding.json @@ -0,0 +1,31 @@ +{ + "inference.inference": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform sparse embedding inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/sparse_embedding/{inference_id}", + "methods": ["POST"], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/specification/_json_spec/inference.stream_inference.json b/specification/_json_spec/inference.stream_completion.json similarity index 64% rename from specification/_json_spec/inference.stream_inference.json rename to specification/_json_spec/inference.stream_completion.json index 03fa95f2ce..ca813208ad 100644 --- a/specification/_json_spec/inference.stream_inference.json +++ b/specification/_json_spec/inference.stream_completion.json @@ -1,5 +1,5 @@ { - "inference.stream_inference": { + "inference.stream_completion": { "documentation": { "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html", "description": "Perform streaming inference" @@ -12,24 +12,10 @@ }, "url": { "paths": [ - { - "path": "/_inference/{inference_id}/_stream", - "methods": ["POST"], - "parts": { - "inference_id": { - "type": "string", - "description": "The inference Id" - } - } - }, { "path": "/_inference/{task_type}/{inference_id}/_stream", "methods": ["POST"], "parts": { - "task_type": { - "type": "string", - "description": "The task type" - }, "inference_id": { "type": "string", "description": "The inference Id" diff --git a/specification/_json_spec/inference.text_embedding.json b/specification/_json_spec/inference.text_embedding.json new file mode 100644 index 0000000000..1721834057 --- /dev/null +++ b/specification/_json_spec/inference.text_embedding.json @@ -0,0 +1,31 @@ +{ + "inference.inference": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform text embedding inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/text_embedding/{inference_id}", + "methods": ["POST"], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/specification/_json_spec/inference.unified_inference.json b/specification/_json_spec/inference.unified_inference.json deleted file mode 100644 index 84182d19f8..0000000000 --- a/specification/_json_spec/inference.unified_inference.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "inference.unified_inference": { - "documentation": { - "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html", - "description": "Perform inference using the Unified Schema" - }, - "stability": "stable", - "visibility": "public", - "headers": { - "accept": ["text/event-stream"], - "content_type": ["application/json"] - }, - "url": { - "paths": [ - { - "path": "/_inference/{inference_id}/_unified", - "methods": ["POST"], - "parts": { - "inference_id": { - "type": "string", - "description": "The inference Id" - } - } - }, - { - "path": "/_inference/{task_type}/{inference_id}/_unified", - "methods": ["POST"], - "parts": { - "task_type": { - "type": "string", - "description": "The task type" - }, - "inference_id": { - "type": "string", - "description": "The inference Id" - } - } - } - ] - }, - "body": { - "description": "The inference payload" - } - } -} diff --git a/specification/inference/_types/Results.ts b/specification/inference/_types/Results.ts index 1a35289bab..fd13d71bb5 100644 --- a/specification/inference/_types/Results.ts +++ b/specification/inference/_types/Results.ts @@ -37,6 +37,14 @@ export class SparseEmbeddingResult { embedding: SparseVector } +/** + * The response format for the sparse embedding request. + */ +export class SparseEmbeddingInferenceResult { + // TODO should we make this optional if we ever support multiple encoding types? So we can make it a variant + sparse_embedding: Array +} + /** * Text Embedding results containing bytes are represented as Dense * Vectors of bytes. @@ -57,6 +65,15 @@ export class TextEmbeddingResult { embedding: DenseVector } +/** + * TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants + * @variants container + */ +export class TextEmbeddingInferenceResult { + text_embedding_bytes?: Array + text_embedding?: Array +} + /** * The completion result object */ @@ -64,6 +81,13 @@ export class CompletionResult { result: string } +/** + * Defines the completion result. + */ +export class CompletionInferenceResult { + completion: Array +} + /** * The rerank result object representing a single ranked document * id: the original index of the document in the request @@ -76,6 +100,13 @@ export class RankedDocument { text?: string } +/** + * Defines the response for a rerank request. + */ +export class RerankedInferenceResult { + rerank: Array +} + /** * InferenceResult is an aggregation of mutually exclusive variants * @variants container diff --git a/specification/inference/unified_inference/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts similarity index 94% rename from specification/inference/unified_inference/UnifiedRequest.ts rename to specification/inference/chat_completion_unified/UnifiedRequest.ts index 268b543ebc..c0efa319db 100644 --- a/specification/inference/unified_inference/UnifiedRequest.ts +++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts @@ -17,7 +17,6 @@ * under the License. */ -import { TaskType } from '@inference/_types/TaskType' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' import { RequestBase } from '@_types/Base' import { Id } from '@_types/common' @@ -33,19 +32,11 @@ import { Duration } from '@_types/Time' export interface Request extends RequestBase { urls: [ { - path: '/_inference/{inference_id}/_unified' - methods: ['POST'] - }, - { - path: '/_inference/{task_type}/{inference_id}/_unified' + path: '/_inference/chat_completion/{inference_id}/_unified' methods: ['POST'] } ] path_parts: { - /** - * The task type - */ - task_type?: TaskType /** * The inference Id */ diff --git a/specification/inference/unified_inference/UnifiedResponse.ts b/specification/inference/chat_completion_unified/UnifiedResponse.ts similarity index 100% rename from specification/inference/unified_inference/UnifiedResponse.ts rename to specification/inference/chat_completion_unified/UnifiedResponse.ts diff --git a/specification/inference/completion/CompletionRequest.ts b/specification/inference/completion/CompletionRequest.ts new file mode 100644 index 0000000000..a3f63c1edf --- /dev/null +++ b/specification/inference/completion/CompletionRequest.ts @@ -0,0 +1,62 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { TaskSettings } from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { Duration } from '@_types/Time' + +/** + * Perform inference on the service + * @rest_spec_name inference.inference + * @availability stack since=8.11.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/completion/{inference_id}' + methods: ['POST'] + } + ] + path_parts: { + /** + * The inference Id + */ + inference_id: Id + } + query_parameters: { + /** + * Specifies the amount of time to wait for the inference request to complete. + * @server_default 30s + */ + timeout?: Duration + } + body: { + /** + * Inference input. + * Either a string or an array of strings. + */ + input: string | Array + /** + * Optional task settings + */ + task_settings?: TaskSettings + } +} diff --git a/specification/inference/completion/CompletionResponse.ts b/specification/inference/completion/CompletionResponse.ts new file mode 100644 index 0000000000..f852232d0f --- /dev/null +++ b/specification/inference/completion/CompletionResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { CompletionInferenceResult } from '@inference/_types/Results' + +export class Response { + body: CompletionInferenceResult +} diff --git a/specification/inference/rerank/RerankRequest.ts b/specification/inference/rerank/RerankRequest.ts new file mode 100644 index 0000000000..8fcc1bec62 --- /dev/null +++ b/specification/inference/rerank/RerankRequest.ts @@ -0,0 +1,66 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { TaskSettings } from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { Duration } from '@_types/Time' + +/** + * Perform inference on the service + * @rest_spec_name inference.inference + * @availability stack since=8.11.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/rerank/{inference_id}' + methods: ['POST'] + } + ] + path_parts: { + /** + * The inference Id + */ + inference_id: Id + } + query_parameters: { + /** + * Specifies the amount of time to wait for the inference request to complete. + * @server_default 30s + */ + timeout?: Duration + } + body: { + /** + * Query input. + */ + query: string + /** + * Inference input. + * Either a string or an array of strings. + */ + input: string | Array + /** + * Optional task settings + */ + task_settings?: TaskSettings + } +} diff --git a/specification/inference/rerank/RerankResponse.ts b/specification/inference/rerank/RerankResponse.ts new file mode 100644 index 0000000000..f0a4b48a67 --- /dev/null +++ b/specification/inference/rerank/RerankResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { RerankedInferenceResult } from '@inference/_types/Results' + +export class Response { + body: RerankedInferenceResult +} diff --git a/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts b/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts new file mode 100644 index 0000000000..613e672598 --- /dev/null +++ b/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts @@ -0,0 +1,62 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { TaskSettings } from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { Duration } from '@_types/Time' + +/** + * Perform inference on the service + * @rest_spec_name inference.inference + * @availability stack since=8.11.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/sparse_embedding/{inference_id}' + methods: ['POST'] + } + ] + path_parts: { + /** + * The inference Id + */ + inference_id: Id + } + query_parameters: { + /** + * Specifies the amount of time to wait for the inference request to complete. + * @server_default 30s + */ + timeout?: Duration + } + body: { + /** + * Inference input. + * Either a string or an array of strings. + */ + input: string | Array + /** + * Optional task settings + */ + task_settings?: TaskSettings + } +} diff --git a/specification/inference/sparse_embedding/SparseEmbeddingResponse.ts b/specification/inference/sparse_embedding/SparseEmbeddingResponse.ts new file mode 100644 index 0000000000..3a7e0de59a --- /dev/null +++ b/specification/inference/sparse_embedding/SparseEmbeddingResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { SparseEmbeddingInferenceResult } from '@inference/_types/Results' + +export class Response { + body: SparseEmbeddingInferenceResult +} diff --git a/specification/inference/stream_inference/StreamInferenceRequest.ts b/specification/inference/stream_completion/StreamInferenceRequest.ts similarity index 90% rename from specification/inference/stream_inference/StreamInferenceRequest.ts rename to specification/inference/stream_completion/StreamInferenceRequest.ts index 0bb675c5fb..72d4318590 100644 --- a/specification/inference/stream_inference/StreamInferenceRequest.ts +++ b/specification/inference/stream_completion/StreamInferenceRequest.ts @@ -17,7 +17,7 @@ * under the License. */ -import { TaskType } from '@inference/_types/TaskType' +import { TaskSettings } from '@inference/_types/Services' import { RequestBase } from '@_types/Base' import { Id } from '@_types/common' @@ -37,11 +37,7 @@ import { Id } from '@_types/common' export interface Request extends RequestBase { urls: [ { - path: '/_inference/{inference_id}/_stream' - methods: ['POST'] - }, - { - path: '/_inference/{task_type}/{inference_id}/_stream' + path: '/_inference/completion/{inference_id}/_stream' methods: ['POST'] } ] @@ -50,10 +46,6 @@ export interface Request extends RequestBase { * The unique identifier for the inference endpoint. */ inference_id: Id - /** - * The type of task that the model performs. - */ - task_type?: TaskType } body: { /** @@ -63,5 +55,9 @@ export interface Request extends RequestBase { * NOTE: Inference endpoints for the completion task type currently only support a single string as input. */ input: string | string[] + /** + * Optional task settings + */ + task_settings?: TaskSettings } } diff --git a/specification/inference/stream_inference/StreamInferenceResponse.ts b/specification/inference/stream_completion/StreamInferenceResponse.ts similarity index 100% rename from specification/inference/stream_inference/StreamInferenceResponse.ts rename to specification/inference/stream_completion/StreamInferenceResponse.ts diff --git a/specification/inference/stream_inference/examples/request/StreamInferenceRequestExample1.yaml b/specification/inference/stream_completion/examples/request/StreamInferenceRequestExample1.yaml similarity index 100% rename from specification/inference/stream_inference/examples/request/StreamInferenceRequestExample1.yaml rename to specification/inference/stream_completion/examples/request/StreamInferenceRequestExample1.yaml diff --git a/specification/inference/text_embedding/TextEmbeddingRequest.ts b/specification/inference/text_embedding/TextEmbeddingRequest.ts new file mode 100644 index 0000000000..a96e56a794 --- /dev/null +++ b/specification/inference/text_embedding/TextEmbeddingRequest.ts @@ -0,0 +1,62 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { TaskSettings } from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { Duration } from '@_types/Time' + +/** + * Perform inference on the service + * @rest_spec_name inference.inference + * @availability stack since=8.11.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/text_embedding/{inference_id}' + methods: ['POST'] + } + ] + path_parts: { + /** + * The inference Id + */ + inference_id: Id + } + query_parameters: { + /** + * Specifies the amount of time to wait for the inference request to complete. + * @server_default 30s + */ + timeout?: Duration + } + body: { + /** + * Inference input. + * Either a string or an array of strings. + */ + input: string | Array + /** + * Optional task settings + */ + task_settings?: TaskSettings + } +} diff --git a/specification/inference/text_embedding/TextEmbeddingResponse.ts b/specification/inference/text_embedding/TextEmbeddingResponse.ts new file mode 100644 index 0000000000..c5cb85bd66 --- /dev/null +++ b/specification/inference/text_embedding/TextEmbeddingResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { TextEmbeddingInferenceResult } from '@inference/_types/Results' + +export class Response { + body: TextEmbeddingInferenceResult +} From 0442e31a5e2271a176783b9817bd388c46acf434 Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Thu, 16 Jan 2025 16:06:47 -0500 Subject: [PATCH 2/5] Fixing stream completion url and removing the old url and class --- .../inference.stream_completion.json | 2 +- specification/inference/_types/Results.ts | 12 --- .../inference/inference/InferenceRequest.ts | 76 ------------------- .../inference/inference/InferenceResponse.ts | 24 ------ 4 files changed, 1 insertion(+), 113 deletions(-) delete mode 100644 specification/inference/inference/InferenceRequest.ts delete mode 100644 specification/inference/inference/InferenceResponse.ts diff --git a/specification/_json_spec/inference.stream_completion.json b/specification/_json_spec/inference.stream_completion.json index ca813208ad..370fc90b6f 100644 --- a/specification/_json_spec/inference.stream_completion.json +++ b/specification/_json_spec/inference.stream_completion.json @@ -13,7 +13,7 @@ "url": { "paths": [ { - "path": "/_inference/{task_type}/{inference_id}/_stream", + "path": "/_inference/completion/{inference_id}/_stream", "methods": ["POST"], "parts": { "inference_id": { diff --git a/specification/inference/_types/Results.ts b/specification/inference/_types/Results.ts index fd13d71bb5..25c53ded85 100644 --- a/specification/inference/_types/Results.ts +++ b/specification/inference/_types/Results.ts @@ -107,18 +107,6 @@ export class RerankedInferenceResult { rerank: Array } -/** - * InferenceResult is an aggregation of mutually exclusive variants - * @variants container - */ -export class InferenceResult { - text_embedding_bytes?: Array - text_embedding?: Array - sparse_embedding?: Array - completion?: Array - rerank?: Array -} - /** * Acknowledged response. For dry_run, contains the list of pipelines which reference the inference endpoint */ diff --git a/specification/inference/inference/InferenceRequest.ts b/specification/inference/inference/InferenceRequest.ts deleted file mode 100644 index 6bf28e1de2..0000000000 --- a/specification/inference/inference/InferenceRequest.ts +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to Elasticsearch B.V. under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch B.V. licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import { TaskSettings } from '@inference/_types/Services' -import { TaskType } from '@inference/_types/TaskType' -import { RequestBase } from '@_types/Base' -import { Id } from '@_types/common' -import { Duration } from '@_types/Time' - -/** - * Perform inference on the service - * @rest_spec_name inference.inference - * @availability stack since=8.11.0 stability=stable visibility=public - * @availability serverless stability=stable visibility=public - */ -export interface Request extends RequestBase { - urls: [ - { - path: '/_inference/{inference_id}' - methods: ['POST'] - }, - { - path: '/_inference/{task_type}/{inference_id}' - methods: ['POST'] - } - ] - path_parts: { - /** - * The task type - */ - task_type?: TaskType - /** - * The inference Id - */ - inference_id: Id - } - query_parameters: { - /** - * Specifies the amount of time to wait for the inference request to complete. - * @server_default 30s - */ - timeout?: Duration - } - body: { - /** - * Query input, required for rerank task. - * Not required for other tasks. - */ - query?: string - /** - * Inference input. - * Either a string or an array of strings. - */ - input: string | Array - /** - * Optional task settings - */ - task_settings?: TaskSettings - } -} diff --git a/specification/inference/inference/InferenceResponse.ts b/specification/inference/inference/InferenceResponse.ts deleted file mode 100644 index 842d9a4f27..0000000000 --- a/specification/inference/inference/InferenceResponse.ts +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to Elasticsearch B.V. under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch B.V. licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import { InferenceResult } from '@inference/_types/Results' - -export class Response { - body: InferenceResult -} From 05864d47e844bf714466143507c8d765a58a3207 Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Thu, 16 Jan 2025 16:14:50 -0500 Subject: [PATCH 3/5] generating spec --- output/schema/schema.json | 1709 +++++++++++------ output/typescript/types.ts | 192 +- .../_json_spec/inference.completion.json | 2 +- .../_json_spec/inference.inference.json | 45 - .../_json_spec/inference.rerank.json | 2 +- .../inference.sparse_embedding.json | 2 +- .../_json_spec/inference.text_embedding.json | 2 +- .../chat_completion_unified/UnifiedRequest.ts | 4 +- .../inference/completion/CompletionRequest.ts | 4 +- .../inference/rerank/RerankRequest.ts | 4 +- .../SparseEmbeddingRequest.ts | 4 +- .../StreamInferenceRequest.ts | 2 +- .../text_embedding/TextEmbeddingRequest.ts | 4 +- 13 files changed, 1203 insertions(+), 773 deletions(-) delete mode 100644 specification/_json_spec/inference.inference.json diff --git a/output/schema/schema.json b/output/schema/schema.json index 805c365ece..20ef481a94 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -8637,6 +8637,84 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.18.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Perform chat completion inference", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html", + "name": "inference.chat_completion_unified", + "request": { + "name": "Request", + "namespace": "inference.chat_completion_unified" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.chat_completion_unified" + }, + "responseMediaType": [ + "text/event-stream" + ], + "urls": [ + { + "methods": [ + "POST" + ], + "path": "/_inference/chat_completion/{inference_id}/_unified" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.11.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Perform completion inference on the service", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "name": "inference.completion", + "request": { + "name": "Request", + "namespace": "inference.completion" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.completion" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "POST" + ], + "path": "/_inference/completion/{inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -8739,20 +8817,25 @@ "visibility": "public" } }, - "description": "Perform inference on the service", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", - "name": "inference.inference", + "description": "Create an inference endpoint.\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/put-inference-api.html", + "name": "inference.put", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, "request": { "name": "Request", - "namespace": "inference.inference" + "namespace": "inference.put" }, - "requestBodyRequired": false, + "requestBodyRequired": true, "requestMediaType": [ "application/json" ], "response": { "name": "Response", - "namespace": "inference.inference" + "namespace": "inference.put" }, "responseMediaType": [ "application/json" @@ -8760,13 +8843,13 @@ "urls": [ { "methods": [ - "POST" + "PUT" ], "path": "/_inference/{inference_id}" }, { "methods": [ - "POST" + "PUT" ], "path": "/_inference/{task_type}/{inference_id}" } @@ -8784,25 +8867,20 @@ "visibility": "public" } }, - "description": "Create an inference endpoint.\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/put-inference-api.html", - "name": "inference.put", - "privileges": { - "cluster": [ - "manage_inference" - ] - }, + "description": "Perform rereanking inference on the service", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "name": "inference.rerank", "request": { "name": "Request", - "namespace": "inference.put" + "namespace": "inference.rerank" }, - "requestBodyRequired": true, + "requestBodyRequired": false, "requestMediaType": [ "application/json" ], "response": { "name": "Response", - "namespace": "inference.put" + "namespace": "inference.rerank" }, "responseMediaType": [ "application/json" @@ -8810,15 +8888,48 @@ "urls": [ { "methods": [ - "PUT" + "POST" ], - "path": "/_inference/{inference_id}" + "path": "/_inference/rerank/{inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" }, + "stack": { + "since": "8.11.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Perform sparse embedding inference on the service", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "name": "inference.sparse_embedding", + "request": { + "name": "Request", + "namespace": "inference.sparse_embedding" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.sparse_embedding" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ { "methods": [ - "PUT" + "POST" ], - "path": "/_inference/{task_type}/{inference_id}" + "path": "/_inference/sparse_embedding/{inference_id}" } ] }, @@ -8833,7 +8944,7 @@ "description": "Perform streaming inference.\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", "docId": "inference-api-stream", "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html", - "name": "inference.stream_inference", + "name": "inference.stream_completion", "privileges": { "cluster": [ "monitor_inference" @@ -8841,7 +8952,7 @@ }, "request": { "name": "Request", - "namespace": "inference.stream_inference" + "namespace": "inference.stream_completion" }, "requestBodyRequired": false, "requestMediaType": [ @@ -8849,7 +8960,7 @@ ], "response": { "name": "Response", - "namespace": "inference.stream_inference" + "namespace": "inference.stream_completion" }, "responseMediaType": [ "text/event-stream" @@ -8859,13 +8970,7 @@ "methods": [ "POST" ], - "path": "/_inference/{inference_id}/_stream" - }, - { - "methods": [ - "POST" - ], - "path": "/_inference/{task_type}/{inference_id}/_stream" + "path": "/_inference/completion/{inference_id}/_stream" } ] }, @@ -8876,17 +8981,17 @@ "visibility": "public" }, "stack": { - "since": "8.18.0", + "since": "8.11.0", "stability": "stable", "visibility": "public" } }, - "description": "Perform inference on the service using the Unified Schema", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html", - "name": "inference.unified_inference", + "description": "Perform text embedding inference on the service", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "name": "inference.text_embedding", "request": { "name": "Request", - "namespace": "inference.unified_inference" + "namespace": "inference.text_embedding" }, "requestBodyRequired": false, "requestMediaType": [ @@ -8894,23 +8999,17 @@ ], "response": { "name": "Response", - "namespace": "inference.unified_inference" + "namespace": "inference.text_embedding" }, "responseMediaType": [ - "text/event-stream" + "application/json" ], "urls": [ { "methods": [ "POST" ], - "path": "/_inference/{inference_id}/_unified" - }, - { - "methods": [ - "POST" - ], - "path": "/_inference/{task_type}/{inference_id}/_unified" + "path": "/_inference/text_embedding/{inference_id}" } ] }, @@ -143647,6 +143746,31 @@ }, "specLocation": "indices/validate_query/IndicesValidateQueryResponse.ts#L23-L30" }, + { + "kind": "interface", + "description": "Defines the completion result.", + "name": { + "name": "CompletionInferenceResult", + "namespace": "inference._types" + }, + "properties": [ + { + "name": "completion", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "CompletionResult", + "namespace": "inference._types" + } + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L84-L89" + }, { "kind": "interface", "description": "The completion result object", @@ -143667,7 +143791,7 @@ } } ], - "specLocation": "inference/_types/Results.ts#L60-L65" + "specLocation": "inference/_types/Results.ts#L77-L82" }, { "kind": "interface", @@ -143698,7 +143822,7 @@ } } ], - "specLocation": "inference/_types/Results.ts#L91-L96" + "specLocation": "inference/_types/Results.ts#L110-L115" }, { "kind": "type_alias", @@ -143707,7 +143831,7 @@ "name": "DenseByteVector", "namespace": "inference._types" }, - "specLocation": "inference/_types/Results.ts#L40-L44", + "specLocation": "inference/_types/Results.ts#L48-L52", "type": { "kind": "array_of", "value": { @@ -143826,90 +143950,6 @@ ], "specLocation": "inference/_types/Services.ts#L41-L53" }, - { - "kind": "interface", - "description": "InferenceResult is an aggregation of mutually exclusive variants", - "name": { - "name": "InferenceResult", - "namespace": "inference._types" - }, - "properties": [ - { - "name": "text_embedding_bytes", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "TextEmbeddingByteResult", - "namespace": "inference._types" - } - } - } - }, - { - "name": "text_embedding", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "TextEmbeddingResult", - "namespace": "inference._types" - } - } - } - }, - { - "name": "sparse_embedding", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "SparseEmbeddingResult", - "namespace": "inference._types" - } - } - } - }, - { - "name": "completion", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "CompletionResult", - "namespace": "inference._types" - } - } - } - }, - { - "name": "rerank", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "RankedDocument", - "namespace": "inference._types" - } - } - } - } - ], - "specLocation": "inference/_types/Results.ts#L79-L89", - "variants": { - "kind": "container" - } - }, { "kind": "interface", "description": "The rerank result object representing a single ranked document\nid: the original index of the document in the request\nscore: the score of the document relative to the query\ntext: Optional, the text of the document, if requested", @@ -143952,7 +143992,32 @@ } } ], - "specLocation": "inference/_types/Results.ts#L67-L77" + "specLocation": "inference/_types/Results.ts#L91-L101" + }, + { + "kind": "interface", + "description": "Defines the response for a rerank request.", + "name": { + "name": "RerankedInferenceResult", + "namespace": "inference._types" + }, + "properties": [ + { + "name": "rerank", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "RankedDocument", + "namespace": "inference._types" + } + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L103-L108" }, { "kind": "type_alias", @@ -143965,6 +144030,31 @@ "kind": "user_defined_value" } }, + { + "kind": "interface", + "description": "The response format for the sparse embedding request.", + "name": { + "name": "SparseEmbeddingInferenceResult", + "namespace": "inference._types" + }, + "properties": [ + { + "name": "sparse_embedding", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "SparseEmbeddingResult", + "namespace": "inference._types" + } + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L40-L46" + }, { "kind": "interface", "name": { @@ -144066,506 +144156,78 @@ } } ], - "specLocation": "inference/_types/Results.ts#L46-L51" + "specLocation": "inference/_types/Results.ts#L54-L59" }, { "kind": "interface", - "description": "The text embedding result object", + "description": "TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants", "name": { - "name": "TextEmbeddingResult", + "name": "TextEmbeddingInferenceResult", "namespace": "inference._types" }, "properties": [ { - "name": "embedding", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "DenseVector", - "namespace": "inference._types" - } - } - } - ], - "specLocation": "inference/_types/Results.ts#L53-L58" - }, - { - "kind": "request", - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "no_body" - }, - "description": "Delete an inference endpoint", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "name": { - "name": "Request", - "namespace": "inference.delete" - }, - "path": [ - { - "description": "The task type", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } - }, - { - "description": "The inference Id", - "name": "inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [ - { - "description": "When true, the endpoint is not deleted, and a list of ingest processors which reference this endpoint is returned", - "name": "dry_run", - "required": false, - "serverDefault": false, - "type": { - "kind": "instance_of", - "type": { - "name": "boolean", - "namespace": "_builtins" - } - } - }, - { - "description": "When true, the inference endpoint is forcefully deleted even if it is still being used by ingest processors or semantic text fields", - "name": "force", - "required": false, - "serverDefault": false, - "type": { - "kind": "instance_of", - "type": { - "name": "boolean", - "namespace": "_builtins" - } - } - } - ], - "specLocation": "inference/delete/DeleteRequest.ts#L24-L65" - }, - { - "kind": "response", - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "DeleteInferenceEndpointResult", - "namespace": "inference._types" - } - } - }, - "name": { - "name": "Response", - "namespace": "inference.delete" - }, - "specLocation": "inference/delete/DeleteResponse.ts#L22-L24" - }, - { - "kind": "request", - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "no_body" - }, - "description": "Get an inference endpoint", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "name": { - "name": "Request", - "namespace": "inference.get" - }, - "path": [ - { - "description": "The task type", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } - }, - { - "description": "The inference Id", - "name": "inference_id", + "name": "text_embedding_bytes", "required": false, "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/get/GetRequest.ts#L24-L55" - }, - { - "kind": "response", - "body": { - "kind": "properties", - "properties": [ - { - "name": "endpoints", - "required": true, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - } - } - ] - }, - "name": { - "name": "Response", - "namespace": "inference.get" - }, - "specLocation": "inference/get/GetResponse.ts#L22-L26" - }, - { - "kind": "request", - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "Query input, required for rerank task.\nNot required for other tasks.", - "name": "query", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "Inference input.\nEither a string or an array of strings.", - "name": "input", - "required": true, - "type": { - "kind": "union_of", - "items": [ - { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - }, - { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - ] - } - }, - { - "description": "Optional task settings", - "name": "task_settings", - "required": false, - "type": { + "kind": "array_of", + "value": { "kind": "instance_of", "type": { - "name": "TaskSettings", + "name": "TextEmbeddingByteResult", "namespace": "inference._types" } } } - ] - }, - "description": "Perform inference on the service", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "name": { - "name": "Request", - "namespace": "inference.inference" - }, - "path": [ - { - "description": "The task type", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } }, { - "description": "The inference Id", - "name": "inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [ - { - "description": "Specifies the amount of time to wait for the inference request to complete.", - "name": "timeout", - "required": false, - "serverDefault": "30s", - "type": { - "kind": "instance_of", - "type": { - "name": "Duration", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/inference/InferenceRequest.ts#L26-L76" - }, - { - "kind": "response", - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceResult", - "namespace": "inference._types" - } - } - }, - "name": { - "name": "Response", - "namespace": "inference.inference" - }, - "specLocation": "inference/inference/InferenceResponse.ts#L22-L24" - }, - { - "kind": "request", - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "value", - "codegenName": "inference_config", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpoint", - "namespace": "inference._types" - } - } - }, - "description": "Create an inference endpoint.\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "name": { - "name": "Request", - "namespace": "inference.put" - }, - "path": [ - { - "description": "The task type", - "name": "task_type", + "name": "text_embedding", "required": false, "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } - }, - { - "description": "The inference Id", - "name": "inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "TextEmbeddingResult", + "namespace": "inference._types" + } } } } ], - "query": [], - "specLocation": "inference/put/PutRequest.ts#L25-L64" - }, - { - "kind": "response", - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "name": { - "name": "Response", - "namespace": "inference.put" - }, - "specLocation": "inference/put/PutResponse.ts#L22-L24" + "specLocation": "inference/_types/Results.ts#L68-L75", + "variants": { + "kind": "container" + } }, { - "kind": "request", - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.", - "name": "input", - "required": true, - "type": { - "kind": "union_of", - "items": [ - { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - }, - { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - ] - } - } - ] - }, - "description": "Perform streaming inference.\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, + "kind": "interface", + "description": "The text embedding result object", "name": { - "name": "Request", - "namespace": "inference.stream_inference" + "name": "TextEmbeddingResult", + "namespace": "inference._types" }, - "path": [ + "properties": [ { - "description": "The unique identifier for the inference endpoint.", - "name": "inference_id", + "name": "embedding", "required": true, "type": { "kind": "instance_of", "type": { - "name": "Id", - "namespace": "_types" - } - } - }, - { - "description": "The type of task that the model performs.", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", + "name": "DenseVector", "namespace": "inference._types" } } } ], - "query": [], - "specLocation": "inference/stream_inference/StreamInferenceRequest.ts#L24-L67" - }, - { - "kind": "response", - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "StreamResult", - "namespace": "_types" - } - } - }, - "name": { - "name": "Response", - "namespace": "inference.stream_inference" - }, - "specLocation": "inference/stream_inference/StreamInferenceResponse.ts#L22-L24" + "specLocation": "inference/_types/Results.ts#L61-L66" }, { "kind": "interface", "description": "A list of tools that the model can call.", "name": { "name": "CompletionTool", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -144588,19 +144250,19 @@ "kind": "instance_of", "type": { "name": "CompletionToolFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L223-L235" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L214-L226" }, { "kind": "interface", "description": "Controls which tool is called by the model.", "name": { "name": "CompletionToolChoice", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -144623,19 +144285,19 @@ "kind": "instance_of", "type": { "name": "CompletionToolChoiceFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L186-L198" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L177-L189" }, { "kind": "interface", "description": "The tool choice function.", "name": { "name": "CompletionToolChoiceFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -144651,14 +144313,14 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L175-L184" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L166-L175" }, { "kind": "interface", "description": "The completion tool function definition.", "name": { "name": "CompletionToolFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -144706,7 +144368,7 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L200-L221" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L191-L212" }, { "kind": "type_alias", @@ -144716,9 +144378,9 @@ ], "name": { "name": "CompletionToolType", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L97-L100", + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L88-L91", "type": { "kind": "union_of", "items": [ @@ -144733,7 +144395,7 @@ "kind": "instance_of", "type": { "name": "CompletionToolChoice", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } ] @@ -144744,7 +144406,7 @@ "description": "An object style representation of a single portion of a conversation.", "name": { "name": "ContentObject", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -144772,14 +144434,14 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L102-L114" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L93-L105" }, { "kind": "interface", "description": "An object representing part of the conversation.", "name": { "name": "Message", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -144790,7 +144452,7 @@ "kind": "instance_of", "type": { "name": "MessageContent", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } }, @@ -144828,13 +144490,13 @@ "kind": "instance_of", "type": { "name": "ToolCall", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L153-L173" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L144-L164" }, { "kind": "type_alias", @@ -144844,9 +144506,9 @@ ], "name": { "name": "MessageContent", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L148-L151", + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L139-L142", "type": { "kind": "union_of", "items": [ @@ -144863,7 +144525,7 @@ "kind": "instance_of", "type": { "name": "ContentObject", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } @@ -144888,7 +144550,7 @@ "kind": "instance_of", "type": { "name": "Message", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } @@ -144952,7 +144614,7 @@ "kind": "instance_of", "type": { "name": "CompletionToolType", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } }, @@ -144966,7 +144628,7 @@ "kind": "instance_of", "type": { "name": "CompletionTool", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } @@ -144985,7 +144647,7 @@ } ] }, - "description": "Perform inference on the service using the Unified Schema", + "description": "Perform chat completion inference", "inherits": { "type": { "name": "RequestBase", @@ -144994,21 +144656,9 @@ }, "name": { "name": "Request", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "path": [ - { - "description": "The task type", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } - }, { "description": "The inference Id", "name": "inference_id", @@ -145037,7 +144687,7 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L27-L95" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L26-L86" }, { "kind": "response", @@ -145053,16 +144703,16 @@ }, "name": { "name": "Response", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/unified_inference/UnifiedResponse.ts#L22-L24" + "specLocation": "inference/chat_completion_unified/UnifiedResponse.ts#L22-L24" }, { "kind": "interface", "description": "A tool call generated by the model.", "name": { "name": "ToolCall", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -145085,7 +144735,7 @@ "kind": "instance_of", "type": { "name": "ToolCallFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } }, @@ -145102,14 +144752,14 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L130-L146" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L121-L137" }, { "kind": "interface", "description": "The function that the model called.", "name": { "name": "ToolCallFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -145137,7 +144787,792 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L116-L128" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L107-L119" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "Inference input.\nEither a string or an array of strings.", + "name": "input", + "required": true, + "type": { + "kind": "union_of", + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ] + } + }, + { + "description": "Optional task settings", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskSettings", + "namespace": "inference._types" + } + } + } + ] + }, + "description": "Perform completion inference on the service", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.completion" + }, + "path": [ + { + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "Specifies the amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", + "type": { + "kind": "instance_of", + "type": { + "name": "Duration", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/completion/CompletionRequest.ts#L25-L62" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "CompletionInferenceResult", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.completion" + }, + "specLocation": "inference/completion/CompletionResponse.ts#L22-L24" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "no_body" + }, + "description": "Delete an inference endpoint", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.delete" + }, + "path": [ + { + "description": "The task type", + "name": "task_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskType", + "namespace": "inference._types" + } + } + }, + { + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "When true, the endpoint is not deleted, and a list of ingest processors which reference this endpoint is returned", + "name": "dry_run", + "required": false, + "serverDefault": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + }, + { + "description": "When true, the inference endpoint is forcefully deleted even if it is still being used by ingest processors or semantic text fields", + "name": "force", + "required": false, + "serverDefault": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/delete/DeleteRequest.ts#L24-L65" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "DeleteInferenceEndpointResult", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.delete" + }, + "specLocation": "inference/delete/DeleteResponse.ts#L22-L24" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "no_body" + }, + "description": "Get an inference endpoint", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.get" + }, + "path": [ + { + "description": "The task type", + "name": "task_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskType", + "namespace": "inference._types" + } + } + }, + { + "description": "The inference Id", + "name": "inference_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/get/GetRequest.ts#L24-L55" + }, + { + "kind": "response", + "body": { + "kind": "properties", + "properties": [ + { + "name": "endpoints", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + } + } + ] + }, + "name": { + "name": "Response", + "namespace": "inference.get" + }, + "specLocation": "inference/get/GetResponse.ts#L22-L26" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "value", + "codegenName": "inference_config", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpoint", + "namespace": "inference._types" + } + } + }, + "description": "Create an inference endpoint.\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put" + }, + "path": [ + { + "description": "The task type", + "name": "task_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskType", + "namespace": "inference._types" + } + } + }, + { + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put/PutRequest.ts#L25-L64" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put" + }, + "specLocation": "inference/put/PutResponse.ts#L22-L24" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "Query input.", + "name": "query", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "Inference input.\nEither a string or an array of strings.", + "name": "input", + "required": true, + "type": { + "kind": "union_of", + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ] + } + }, + { + "description": "Optional task settings", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskSettings", + "namespace": "inference._types" + } + } + } + ] + }, + "description": "Perform rereanking inference on the service", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.rerank" + }, + "path": [ + { + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "Specifies the amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", + "type": { + "kind": "instance_of", + "type": { + "name": "Duration", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/rerank/RerankRequest.ts#L25-L66" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "RerankedInferenceResult", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.rerank" + }, + "specLocation": "inference/rerank/RerankResponse.ts#L22-L24" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "Inference input.\nEither a string or an array of strings.", + "name": "input", + "required": true, + "type": { + "kind": "union_of", + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ] + } + }, + { + "description": "Optional task settings", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskSettings", + "namespace": "inference._types" + } + } + } + ] + }, + "description": "Perform sparse embedding inference on the service", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.sparse_embedding" + }, + "path": [ + { + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "Specifies the amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", + "type": { + "kind": "instance_of", + "type": { + "name": "Duration", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/sparse_embedding/SparseEmbeddingRequest.ts#L25-L62" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "SparseEmbeddingInferenceResult", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.sparse_embedding" + }, + "specLocation": "inference/sparse_embedding/SparseEmbeddingResponse.ts#L22-L24" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.", + "name": "input", + "required": true, + "type": { + "kind": "union_of", + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ] + } + }, + { + "description": "Optional task settings", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskSettings", + "namespace": "inference._types" + } + } + } + ] + }, + "description": "Perform streaming inference.\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.stream_completion" + }, + "path": [ + { + "description": "The unique identifier for the inference endpoint.", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/stream_completion/StreamInferenceRequest.ts#L24-L63" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "StreamResult", + "namespace": "_types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.stream_completion" + }, + "specLocation": "inference/stream_completion/StreamInferenceResponse.ts#L22-L24" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "Inference input.\nEither a string or an array of strings.", + "name": "input", + "required": true, + "type": { + "kind": "union_of", + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ] + } + }, + { + "description": "Optional task settings", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskSettings", + "namespace": "inference._types" + } + } + } + ] + }, + "description": "Perform text embedding inference on the service", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.text_embedding" + }, + "path": [ + { + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "Specifies the amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", + "type": { + "kind": "instance_of", + "type": { + "name": "Duration", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/text_embedding/TextEmbeddingRequest.ts#L25-L62" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "TextEmbeddingInferenceResult", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.text_embedding" + }, + "specLocation": "inference/text_embedding/TextEmbeddingResponse.ts#L22-L24" }, { "kind": "request", diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 04b01383b7..0cd6bd655c 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -12843,6 +12843,10 @@ export interface IndicesValidateQueryResponse { error?: string } +export interface InferenceCompletionInferenceResult { + completion: InferenceCompletionResult[] +} + export interface InferenceCompletionResult { result: string } @@ -12866,22 +12870,22 @@ export interface InferenceInferenceEndpointInfo extends InferenceInferenceEndpoi task_type: InferenceTaskType } -export interface InferenceInferenceResult { - text_embedding_bytes?: InferenceTextEmbeddingByteResult[] - text_embedding?: InferenceTextEmbeddingResult[] - sparse_embedding?: InferenceSparseEmbeddingResult[] - completion?: InferenceCompletionResult[] - rerank?: InferenceRankedDocument[] -} - export interface InferenceRankedDocument { index: integer score: float text?: string } +export interface InferenceRerankedInferenceResult { + rerank: InferenceRankedDocument[] +} + export type InferenceServiceSettings = any +export interface InferenceSparseEmbeddingInferenceResult { + sparse_embedding: InferenceSparseEmbeddingResult[] +} + export interface InferenceSparseEmbeddingResult { embedding: InferenceSparseVector } @@ -12896,125 +12900,161 @@ export interface InferenceTextEmbeddingByteResult { embedding: InferenceDenseByteVector } +export interface InferenceTextEmbeddingInferenceResult { + text_embedding_bytes?: InferenceTextEmbeddingByteResult[] + text_embedding?: InferenceTextEmbeddingResult[] +} + export interface InferenceTextEmbeddingResult { embedding: InferenceDenseVector } -export interface InferenceDeleteRequest extends RequestBase { - task_type?: InferenceTaskType - inference_id: Id - dry_run?: boolean - force?: boolean +export interface InferenceChatCompletionUnifiedCompletionTool { + type: string + function: InferenceChatCompletionUnifiedCompletionToolFunction } -export type InferenceDeleteResponse = InferenceDeleteInferenceEndpointResult +export interface InferenceChatCompletionUnifiedCompletionToolChoice { + type: string + function: InferenceChatCompletionUnifiedCompletionToolChoiceFunction +} -export interface InferenceGetRequest extends RequestBase { - task_type?: InferenceTaskType - inference_id?: Id +export interface InferenceChatCompletionUnifiedCompletionToolChoiceFunction { + name: string } -export interface InferenceGetResponse { - endpoints: InferenceInferenceEndpointInfo[] +export interface InferenceChatCompletionUnifiedCompletionToolFunction { + description?: string + name: string + parameters?: any + strict?: boolean } -export interface InferenceInferenceRequest extends RequestBase { - task_type?: InferenceTaskType +export type InferenceChatCompletionUnifiedCompletionToolType = string | InferenceChatCompletionUnifiedCompletionToolChoice + +export interface InferenceChatCompletionUnifiedContentObject { + text: string + type: string +} + +export interface InferenceChatCompletionUnifiedMessage { + content?: InferenceChatCompletionUnifiedMessageContent + role: string + tool_call_id?: Id + tool_calls?: InferenceChatCompletionUnifiedToolCall[] +} + +export type InferenceChatCompletionUnifiedMessageContent = string | InferenceChatCompletionUnifiedContentObject[] + +export interface InferenceChatCompletionUnifiedRequest extends RequestBase { inference_id: Id timeout?: Duration body?: { - query?: string - input: string | string[] - task_settings?: InferenceTaskSettings + messages: InferenceChatCompletionUnifiedMessage[] + model?: string + max_completion_tokens?: long + stop?: string[] + temperature?: float + tool_choice?: InferenceChatCompletionUnifiedCompletionToolType + tools?: InferenceChatCompletionUnifiedCompletionTool[] + top_p?: float } } -export type InferenceInferenceResponse = InferenceInferenceResult +export type InferenceChatCompletionUnifiedResponse = StreamResult -export interface InferencePutRequest extends RequestBase { - task_type?: InferenceTaskType - inference_id: Id - body?: InferenceInferenceEndpoint +export interface InferenceChatCompletionUnifiedToolCall { + id: Id + function: InferenceChatCompletionUnifiedToolCallFunction + type: string } -export type InferencePutResponse = InferenceInferenceEndpointInfo +export interface InferenceChatCompletionUnifiedToolCallFunction { + arguments: string + name: string +} -export interface InferenceStreamInferenceRequest extends RequestBase { +export interface InferenceCompletionRequest extends RequestBase { inference_id: Id - task_type?: InferenceTaskType + timeout?: Duration body?: { input: string | string[] + task_settings?: InferenceTaskSettings } } -export type InferenceStreamInferenceResponse = StreamResult +export type InferenceCompletionResponse = InferenceCompletionInferenceResult -export interface InferenceUnifiedInferenceCompletionTool { - type: string - function: InferenceUnifiedInferenceCompletionToolFunction +export interface InferenceDeleteRequest extends RequestBase { + task_type?: InferenceTaskType + inference_id: Id + dry_run?: boolean + force?: boolean } -export interface InferenceUnifiedInferenceCompletionToolChoice { - type: string - function: InferenceUnifiedInferenceCompletionToolChoiceFunction -} +export type InferenceDeleteResponse = InferenceDeleteInferenceEndpointResult -export interface InferenceUnifiedInferenceCompletionToolChoiceFunction { - name: string +export interface InferenceGetRequest extends RequestBase { + task_type?: InferenceTaskType + inference_id?: Id } -export interface InferenceUnifiedInferenceCompletionToolFunction { - description?: string - name: string - parameters?: any - strict?: boolean +export interface InferenceGetResponse { + endpoints: InferenceInferenceEndpointInfo[] } -export type InferenceUnifiedInferenceCompletionToolType = string | InferenceUnifiedInferenceCompletionToolChoice - -export interface InferenceUnifiedInferenceContentObject { - text: string - type: string +export interface InferencePutRequest extends RequestBase { + task_type?: InferenceTaskType + inference_id: Id + body?: InferenceInferenceEndpoint } -export interface InferenceUnifiedInferenceMessage { - content?: InferenceUnifiedInferenceMessageContent - role: string - tool_call_id?: Id - tool_calls?: InferenceUnifiedInferenceToolCall[] +export type InferencePutResponse = InferenceInferenceEndpointInfo + +export interface InferenceRerankRequest extends RequestBase { + inference_id: Id + timeout?: Duration + body?: { + query: string + input: string | string[] + task_settings?: InferenceTaskSettings + } } -export type InferenceUnifiedInferenceMessageContent = string | InferenceUnifiedInferenceContentObject[] +export type InferenceRerankResponse = InferenceRerankedInferenceResult -export interface InferenceUnifiedInferenceRequest extends RequestBase { - task_type?: InferenceTaskType +export interface InferenceSparseEmbeddingRequest extends RequestBase { inference_id: Id timeout?: Duration body?: { - messages: InferenceUnifiedInferenceMessage[] - model?: string - max_completion_tokens?: long - stop?: string[] - temperature?: float - tool_choice?: InferenceUnifiedInferenceCompletionToolType - tools?: InferenceUnifiedInferenceCompletionTool[] - top_p?: float + input: string | string[] + task_settings?: InferenceTaskSettings } } -export type InferenceUnifiedInferenceResponse = StreamResult +export type InferenceSparseEmbeddingResponse = InferenceSparseEmbeddingInferenceResult -export interface InferenceUnifiedInferenceToolCall { - id: Id - function: InferenceUnifiedInferenceToolCallFunction - type: string +export interface InferenceStreamCompletionRequest extends RequestBase { + inference_id: Id + body?: { + input: string | string[] + task_settings?: InferenceTaskSettings + } } -export interface InferenceUnifiedInferenceToolCallFunction { - arguments: string - name: string +export type InferenceStreamCompletionResponse = StreamResult + +export interface InferenceTextEmbeddingRequest extends RequestBase { + inference_id: Id + timeout?: Duration + body?: { + input: string | string[] + task_settings?: InferenceTaskSettings + } } +export type InferenceTextEmbeddingResponse = InferenceTextEmbeddingInferenceResult + export interface InferenceUpdateRequest extends RequestBase { inference_id: Id task_type?: InferenceTaskType diff --git a/specification/_json_spec/inference.completion.json b/specification/_json_spec/inference.completion.json index d3e9de4b5d..1a1ebdcb32 100644 --- a/specification/_json_spec/inference.completion.json +++ b/specification/_json_spec/inference.completion.json @@ -1,5 +1,5 @@ { - "inference.inference": { + "inference.completion": { "documentation": { "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", "description": "Perform completion inference" diff --git a/specification/_json_spec/inference.inference.json b/specification/_json_spec/inference.inference.json deleted file mode 100644 index bf1282dfaa..0000000000 --- a/specification/_json_spec/inference.inference.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "inference.inference": { - "documentation": { - "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", - "description": "Perform inference" - }, - "stability": "stable", - "visibility": "public", - "headers": { - "accept": ["application/json"], - "content_type": ["application/json"] - }, - "url": { - "paths": [ - { - "path": "/_inference/{inference_id}", - "methods": ["POST"], - "parts": { - "inference_id": { - "type": "string", - "description": "The inference Id" - } - } - }, - { - "path": "/_inference/{task_type}/{inference_id}", - "methods": ["POST"], - "parts": { - "task_type": { - "type": "string", - "description": "The task type" - }, - "inference_id": { - "type": "string", - "description": "The inference Id" - } - } - } - ] - }, - "body": { - "description": "The inference payload" - } - } -} diff --git a/specification/_json_spec/inference.rerank.json b/specification/_json_spec/inference.rerank.json index 802dbd211e..ac9601852a 100644 --- a/specification/_json_spec/inference.rerank.json +++ b/specification/_json_spec/inference.rerank.json @@ -1,5 +1,5 @@ { - "inference.inference": { + "inference.rerank": { "documentation": { "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", "description": "Perform reranking inference" diff --git a/specification/_json_spec/inference.sparse_embedding.json b/specification/_json_spec/inference.sparse_embedding.json index 6f540fa625..48e9748cf0 100644 --- a/specification/_json_spec/inference.sparse_embedding.json +++ b/specification/_json_spec/inference.sparse_embedding.json @@ -1,5 +1,5 @@ { - "inference.inference": { + "inference.sparse_embedding": { "documentation": { "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", "description": "Perform sparse embedding inference" diff --git a/specification/_json_spec/inference.text_embedding.json b/specification/_json_spec/inference.text_embedding.json index 1721834057..2f7c43c38a 100644 --- a/specification/_json_spec/inference.text_embedding.json +++ b/specification/_json_spec/inference.text_embedding.json @@ -1,5 +1,5 @@ { - "inference.inference": { + "inference.text_embedding": { "documentation": { "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", "description": "Perform text embedding inference" diff --git a/specification/inference/chat_completion_unified/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts index c0efa319db..aa275c6903 100644 --- a/specification/inference/chat_completion_unified/UnifiedRequest.ts +++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts @@ -24,8 +24,8 @@ import { float, long } from '@_types/Numeric' import { Duration } from '@_types/Time' /** - * Perform inference on the service using the Unified Schema - * @rest_spec_name inference.unified_inference + * Perform chat completion inference + * @rest_spec_name inference.chat_completion_unified * @availability stack since=8.18.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public */ diff --git a/specification/inference/completion/CompletionRequest.ts b/specification/inference/completion/CompletionRequest.ts index a3f63c1edf..f0b5e7f680 100644 --- a/specification/inference/completion/CompletionRequest.ts +++ b/specification/inference/completion/CompletionRequest.ts @@ -23,8 +23,8 @@ import { Id } from '@_types/common' import { Duration } from '@_types/Time' /** - * Perform inference on the service - * @rest_spec_name inference.inference + * Perform completion inference on the service + * @rest_spec_name inference.completion * @availability stack since=8.11.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public */ diff --git a/specification/inference/rerank/RerankRequest.ts b/specification/inference/rerank/RerankRequest.ts index 8fcc1bec62..a0037dcbea 100644 --- a/specification/inference/rerank/RerankRequest.ts +++ b/specification/inference/rerank/RerankRequest.ts @@ -23,8 +23,8 @@ import { Id } from '@_types/common' import { Duration } from '@_types/Time' /** - * Perform inference on the service - * @rest_spec_name inference.inference + * Perform rereanking inference on the service + * @rest_spec_name inference.rerank * @availability stack since=8.11.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public */ diff --git a/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts b/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts index 613e672598..3986e02959 100644 --- a/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts +++ b/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts @@ -23,8 +23,8 @@ import { Id } from '@_types/common' import { Duration } from '@_types/Time' /** - * Perform inference on the service - * @rest_spec_name inference.inference + * Perform sparse embedding inference on the service + * @rest_spec_name inference.sparse_embedding * @availability stack since=8.11.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public */ diff --git a/specification/inference/stream_completion/StreamInferenceRequest.ts b/specification/inference/stream_completion/StreamInferenceRequest.ts index 72d4318590..ae83157ffb 100644 --- a/specification/inference/stream_completion/StreamInferenceRequest.ts +++ b/specification/inference/stream_completion/StreamInferenceRequest.ts @@ -29,7 +29,7 @@ import { Id } from '@_types/common' * IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs. * * This API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming. - * @rest_spec_name inference.stream_inference + * @rest_spec_name inference.stream_completion * @availability stack since=8.16.0 stability=stable visibility=public * @cluster_privileges monitor_inference * @doc_id inference-api-stream diff --git a/specification/inference/text_embedding/TextEmbeddingRequest.ts b/specification/inference/text_embedding/TextEmbeddingRequest.ts index a96e56a794..ded1c5da4f 100644 --- a/specification/inference/text_embedding/TextEmbeddingRequest.ts +++ b/specification/inference/text_embedding/TextEmbeddingRequest.ts @@ -23,8 +23,8 @@ import { Id } from '@_types/common' import { Duration } from '@_types/Time' /** - * Perform inference on the service - * @rest_spec_name inference.inference + * Perform text embedding inference on the service + * @rest_spec_name inference.text_embedding * @availability stack since=8.11.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public */ From 749c78cd951a8181b21c5481efad29b3f5062463 Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Tue, 28 Jan 2025 13:00:22 -0500 Subject: [PATCH 4/5] Adding doc id --- output/schema/schema.json | 248 +++--------------- specification/_doc_ids/table.csv | 1 + .../chat_completion_unified/UnifiedRequest.ts | 1 + .../inference/completion/CompletionRequest.ts | 1 + .../SparseEmbeddingRequest.ts | 1 + .../text_embedding/TextEmbeddingRequest.ts | 1 + 6 files changed, 39 insertions(+), 214 deletions(-) diff --git a/output/schema/schema.json b/output/schema/schema.json index 0df368d305..b35e34c095 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -8942,7 +8942,8 @@ } }, "description": "Perform chat completion inference", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html", + "docId": "inference-api-chat-completion", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/chat-completion-inference-api.html", "name": "inference.chat_completion_unified", "request": { "name": "Request", @@ -8981,7 +8982,8 @@ } }, "description": "Perform completion inference on the service", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "docId": "inference-api-post", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html", "name": "inference.completion", "request": { "name": "Request", @@ -9111,60 +9113,6 @@ "visibility": "public" } }, -<<<<<<< HEAD -======= - "description": "Perform inference on the service.\n\nThis API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "docId": "inference-api-post", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html", - "name": "inference.inference", - "privileges": { - "cluster": [ - "monitor_inference" - ] - }, - "request": { - "name": "Request", - "namespace": "inference.inference" - }, - "requestBodyRequired": false, - "requestMediaType": [ - "application/json" - ], - "response": { - "name": "Response", - "namespace": "inference.inference" - }, - "responseMediaType": [ - "application/json" - ], - "urls": [ - { - "methods": [ - "POST" - ], - "path": "/_inference/{inference_id}" - }, - { - "methods": [ - "POST" - ], - "path": "/_inference/{task_type}/{inference_id}" - } - ] - }, - { - "availability": { - "serverless": { - "stability": "stable", - "visibility": "public" - }, - "stack": { - "since": "8.11.0", - "stability": "stable", - "visibility": "public" - } - }, ->>>>>>> abca59abff0981501ac8c217f0aa3745ce6030a2 "description": "Create an inference endpoint.\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", "docId": "inference-api-put", "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html", @@ -9217,8 +9165,14 @@ } }, "description": "Perform rereanking inference on the service", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "docId": "inference-api-post", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html", "name": "inference.rerank", + "privileges": { + "cluster": [ + "monitor_inference" + ] + }, "request": { "name": "Request", "namespace": "inference.rerank" @@ -9256,7 +9210,8 @@ } }, "description": "Perform sparse embedding inference on the service", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "docId": "inference-api-post", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html", "name": "inference.sparse_embedding", "request": { "name": "Request", @@ -9336,7 +9291,8 @@ } }, "description": "Perform text embedding inference on the service", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "docId": "inference-api-post", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html", "name": "inference.text_embedding", "request": { "name": "Request", @@ -146190,7 +146146,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L214-L226" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L215-L227" }, { "kind": "interface", @@ -146225,7 +146181,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L177-L189" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L178-L190" }, { "kind": "interface", @@ -146248,7 +146204,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L166-L175" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L167-L176" }, { "kind": "interface", @@ -146303,7 +146259,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L191-L212" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L192-L213" }, { "kind": "type_alias", @@ -146315,7 +146271,7 @@ "name": "CompletionToolType", "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L88-L91", + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L89-L92", "type": { "kind": "union_of", "items": [ @@ -146369,7 +146325,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L93-L105" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L94-L106" }, { "kind": "interface", @@ -146431,7 +146387,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L144-L164" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L145-L165" }, { "kind": "type_alias", @@ -146443,7 +146399,7 @@ "name": "MessageContent", "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L139-L142", + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L140-L143", "type": { "kind": "union_of", "items": [ @@ -146622,7 +146578,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L26-L86" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L26-L87" }, { "kind": "response", @@ -146687,7 +146643,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L121-L137" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L122-L138" }, { "kind": "interface", @@ -146722,7 +146678,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L107-L119" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L108-L120" }, { "kind": "request", @@ -146813,7 +146769,7 @@ } } ], - "specLocation": "inference/completion/CompletionRequest.ts#L25-L62" + "specLocation": "inference/completion/CompletionRequest.ts#L25-L63" }, { "kind": "response", @@ -147007,142 +146963,6 @@ "CommonQueryParameters" ], "body": { -<<<<<<< HEAD -======= - "kind": "properties", - "properties": [ - { - "description": "The query input, which is required only for the `rerank` task.\nIt is not required for other tasks.", - "name": "query", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.", - "name": "input", - "required": true, - "type": { - "kind": "union_of", - "items": [ - { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - }, - { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - ] - } - }, - { - "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskSettings", - "namespace": "inference._types" - } - } - } - ] - }, - "description": "Perform inference on the service.\n\nThis API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "name": { - "name": "Request", - "namespace": "inference.inference" - }, - "path": [ - { - "description": "The type of inference task that the model performs.", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } - }, - { - "description": "The unique identifier for the inference endpoint.", - "name": "inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [ - { - "description": "The amount of time to wait for the inference request to complete.", - "name": "timeout", - "required": false, - "serverDefault": "30s", - "type": { - "kind": "instance_of", - "type": { - "name": "Duration", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/inference/InferenceRequest.ts#L26-L89" - }, - { - "kind": "response", - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceResult", - "namespace": "inference._types" - } - } - }, - "name": { - "name": "Response", - "namespace": "inference.inference" - }, - "specLocation": "inference/inference/InferenceResponse.ts#L22-L24" - }, - { - "kind": "request", - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { ->>>>>>> abca59abff0981501ac8c217f0aa3745ce6030a2 "kind": "value", "codegenName": "inference_config", "value": { @@ -147232,7 +147052,7 @@ } }, { - "description": "Inference input.\nEither a string or an array of strings.", + "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.", "name": "input", "required": true, "type": { @@ -147259,7 +147079,7 @@ } }, { - "description": "Optional task settings", + "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.", "name": "task_settings", "required": false, "type": { @@ -147285,7 +147105,7 @@ }, "path": [ { - "description": "The inference Id", + "description": "The unique identifier for the inference endpoint.", "name": "inference_id", "required": true, "type": { @@ -147299,7 +147119,7 @@ ], "query": [ { - "description": "Specifies the amount of time to wait for the inference request to complete.", + "description": "The amount of time to wait for the inference request to complete.", "name": "timeout", "required": false, "serverDefault": "30s", @@ -147312,7 +147132,7 @@ } } ], - "specLocation": "inference/rerank/RerankRequest.ts#L25-L66" + "specLocation": "inference/rerank/RerankRequest.ts#L25-L72" }, { "kind": "response", @@ -147421,7 +147241,7 @@ } } ], - "specLocation": "inference/sparse_embedding/SparseEmbeddingRequest.ts#L25-L62" + "specLocation": "inference/sparse_embedding/SparseEmbeddingRequest.ts#L25-L63" }, { "kind": "response", @@ -147625,7 +147445,7 @@ } } ], - "specLocation": "inference/text_embedding/TextEmbeddingRequest.ts#L25-L62" + "specLocation": "inference/text_embedding/TextEmbeddingRequest.ts#L25-L63" }, { "kind": "response", diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index d1e7acb909..eca3a30e82 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -311,6 +311,7 @@ inference-api-get,https://www.elastic.co/guide/en/elasticsearch/reference/{branc inference-api-post,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html inference-api-put,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html inference-api-stream,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html +inference-api-chat-completion,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/chat-completion-inference-api.html inference-api-update,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/update-inference-api.html inference-processor,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-processor.html info-api,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/info-api.html diff --git a/specification/inference/chat_completion_unified/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts index aa275c6903..84fba28208 100644 --- a/specification/inference/chat_completion_unified/UnifiedRequest.ts +++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts @@ -28,6 +28,7 @@ import { Duration } from '@_types/Time' * @rest_spec_name inference.chat_completion_unified * @availability stack since=8.18.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public + * @doc_id inference-api-chat-completion */ export interface Request extends RequestBase { urls: [ diff --git a/specification/inference/completion/CompletionRequest.ts b/specification/inference/completion/CompletionRequest.ts index f0b5e7f680..cbcf938a15 100644 --- a/specification/inference/completion/CompletionRequest.ts +++ b/specification/inference/completion/CompletionRequest.ts @@ -27,6 +27,7 @@ import { Duration } from '@_types/Time' * @rest_spec_name inference.completion * @availability stack since=8.11.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public + * @doc_id inference-api-post */ export interface Request extends RequestBase { urls: [ diff --git a/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts b/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts index 3986e02959..90ce321816 100644 --- a/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts +++ b/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts @@ -27,6 +27,7 @@ import { Duration } from '@_types/Time' * @rest_spec_name inference.sparse_embedding * @availability stack since=8.11.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public + * @doc_id inference-api-post */ export interface Request extends RequestBase { urls: [ diff --git a/specification/inference/text_embedding/TextEmbeddingRequest.ts b/specification/inference/text_embedding/TextEmbeddingRequest.ts index ded1c5da4f..f707cb997e 100644 --- a/specification/inference/text_embedding/TextEmbeddingRequest.ts +++ b/specification/inference/text_embedding/TextEmbeddingRequest.ts @@ -27,6 +27,7 @@ import { Duration } from '@_types/Time' * @rest_spec_name inference.text_embedding * @availability stack since=8.11.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public + * @doc_id inference-api-post */ export interface Request extends RequestBase { urls: [ From 02219ba5159b38f6b3a38320fb7c4d01091caae4 Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Tue, 28 Jan 2025 14:29:46 -0500 Subject: [PATCH 5/5] Renaming to match filename --- ...tion_inference.json => inference.chat_completion_unified.json} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename specification/_json_spec/{inference.chat_completion_inference.json => inference.chat_completion_unified.json} (100%) diff --git a/specification/_json_spec/inference.chat_completion_inference.json b/specification/_json_spec/inference.chat_completion_unified.json similarity index 100% rename from specification/_json_spec/inference.chat_completion_inference.json rename to specification/_json_spec/inference.chat_completion_unified.json