From 64d7de01b853e9808365d486eaa6dbb3777bb841 Mon Sep 17 00:00:00 2001 From: lcawl Date: Thu, 16 Jan 2025 00:49:46 -0800 Subject: [PATCH] Add reindex examples --- output/openapi/elasticsearch-openapi.json | 68 +++--- .../elasticsearch-serverless-openapi.json | 62 +++-- output/schema/schema.json | 152 ++++++++----- specification/_doc_ids/table.csv | 2 +- .../_global/reindex/ReindexRequest.ts | 213 +++++++++++++++++- .../_global/reindex/ReindexResponse.ts | 47 ++++ .../request/ReindexRequestExample1.yaml | 16 ++ .../request/ReindexRequestExample10.yaml | 12 + .../request/ReindexRequestExample11.yaml | 11 + .../request/ReindexRequestExample12.yaml | 11 + .../request/ReindexRequestExample13.yaml | 11 + .../request/ReindexRequestExample2.yaml | 19 ++ .../request/ReindexRequestExample3.yaml | 9 + .../request/ReindexRequestExample4.yaml | 11 + .../request/ReindexRequestExample5.yaml | 7 + .../request/ReindexRequestExample6.yaml | 10 + .../request/ReindexRequestExample7.yaml | 9 + .../request/ReindexRequestExample8.yaml | 10 + .../request/ReindexRequestExample9.yaml | 10 + specification/_global/reindex/types.ts | 37 ++- .../ReindexRethrottleRequest.ts | 13 +- specification/_global/update/UpdateRequest.ts | 79 +++++-- .../request/UpdateRequestExample1.yaml | 8 + .../request/UpdateRequestExample10.yaml | 11 + .../request/UpdateRequestExample11.yaml | 9 + .../request/UpdateRequestExample2.yaml | 10 + .../request/UpdateRequestExample3.yaml | 12 + .../request/UpdateRequestExample4.yaml | 6 + .../request/UpdateRequestExample5.yaml | 6 + .../request/UpdateRequestExample6.yaml | 6 + .../request/UpdateRequestExample7.yaml | 10 + .../request/UpdateRequestExample8.yaml | 6 + .../request/UpdateRequestExample9.yaml | 10 + .../response/UpdateResponseExample1.yaml | 10 + specification/_types/Retries.ts | 6 + specification/_types/Scripting.ts | 2 +- 36 files changed, 778 insertions(+), 153 deletions(-) create mode 100644 specification/_global/reindex/examples/request/ReindexRequestExample1.yaml create mode 100644 specification/_global/reindex/examples/request/ReindexRequestExample10.yaml create mode 100644 specification/_global/reindex/examples/request/ReindexRequestExample11.yaml create mode 100644 specification/_global/reindex/examples/request/ReindexRequestExample12.yaml create mode 100644 specification/_global/reindex/examples/request/ReindexRequestExample13.yaml create mode 100644 specification/_global/reindex/examples/request/ReindexRequestExample2.yaml create mode 100644 specification/_global/reindex/examples/request/ReindexRequestExample3.yaml create mode 100644 specification/_global/reindex/examples/request/ReindexRequestExample4.yaml create mode 100644 specification/_global/reindex/examples/request/ReindexRequestExample5.yaml create mode 100644 specification/_global/reindex/examples/request/ReindexRequestExample6.yaml create mode 100644 specification/_global/reindex/examples/request/ReindexRequestExample7.yaml create mode 100644 specification/_global/reindex/examples/request/ReindexRequestExample8.yaml create mode 100644 specification/_global/reindex/examples/request/ReindexRequestExample9.yaml create mode 100644 specification/_global/update/examples/request/UpdateRequestExample1.yaml create mode 100644 specification/_global/update/examples/request/UpdateRequestExample10.yaml create mode 100644 specification/_global/update/examples/request/UpdateRequestExample11.yaml create mode 100644 specification/_global/update/examples/request/UpdateRequestExample2.yaml create mode 100644 specification/_global/update/examples/request/UpdateRequestExample3.yaml create mode 100644 specification/_global/update/examples/request/UpdateRequestExample4.yaml create mode 100644 specification/_global/update/examples/request/UpdateRequestExample5.yaml create mode 100644 specification/_global/update/examples/request/UpdateRequestExample6.yaml create mode 100644 specification/_global/update/examples/request/UpdateRequestExample7.yaml create mode 100644 specification/_global/update/examples/request/UpdateRequestExample8.yaml create mode 100644 specification/_global/update/examples/request/UpdateRequestExample9.yaml create mode 100644 specification/_global/update/examples/response/UpdateResponseExample1.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 142e57dd19..c133843dd0 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -26085,7 +26085,7 @@ "document" ], "summary": "Reindex documents", - "description": "Copies documents from a source to a destination. The source can be any existing index, alias, or data stream. The destination must differ from the source. For example, you cannot reindex a data stream into itself.", + "description": "Copy documents from a source to a destination.\nYou can copy all documents to the destination index or reindex a subset of the documents.\nThe source can be any existing index, alias, or data stream.\nThe destination must differ from the source.\nFor example, you cannot reindex a data stream into itself.\n\nIMPORTANT: Reindex requires `_source` to be enabled for all documents in the source.\nThe destination should be configured as wanted before calling the reindex API.\nReindex does not copy the settings from the source or its associated template.\nMappings, shard counts, and replicas, for example, must be configured ahead of time.\n\nIf the Elasticsearch security features are enabled, you must have the following security privileges:\n\n* The `read` index privilege for the source data stream, index, or alias.\n* The `write` index privilege for the destination data stream, index, or index alias.\n* To automatically create a data stream or index with a reindex API request, you must have the `auto_configure`, `create_index`, or `manage` index privilege for the destination data stream, index, or alias.\n* If reindexing from a remote cluster, the `source.remote.user` must have the `monitor` cluster privilege and the `read` index privilege for the source data stream, index, or alias.\n\nIf reindexing from a remote cluster, you must explicitly allow the remote host in the `reindex.remote.whitelist` setting.\nAutomatic data stream creation requires a matching index template with data stream enabled.\n\nThe `dest` element can be configured like the index API to control optimistic concurrency control.\nOmitting `version_type` or setting it to `internal` causes Elasticsearch to blindly dump documents into the destination, overwriting any that happen to have the same ID.\n\nSetting `version_type` to `external` causes Elasticsearch to preserve the `version` from the source, create any documents that are missing, and update any documents that have an older version in the destination than they do in the source.\n\nSetting `op_type` to `create` causes the reindex API to create only missing documents in the destination.\nAll existing documents will cause a version conflict.\n\nIMPORTANT: Because data streams are append-only, any reindex request to a destination data stream must have an `op_type` of `create`.\nA reindex can only add new documents to a destination data stream.\nIt cannot update existing documents in a destination data stream.\n\nBy default, version conflicts abort the reindex process.\nTo continue reindexing if there are conflicts, set the `conflicts` request body property to `proceed`.\nIn this case, the response includes a count of the version conflicts that were encountered.\nNote that the handling of other error types is unaffected by the `conflicts` property.\nAdditionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than `max_docs` until it has successfully indexed `max_docs` documents into the target or it has gone through every document in the source query.\n\nNOTE: The reindex API makes no effort to handle ID collisions.\nThe last document written will \"win\" but the order isn't usually predictable so it is not a good idea to rely on this behavior.\nInstead, make sure that IDs are unique by using a script.\n\n**Running reindex asynchronously**\n\nIf the request contains `wait_for_completion=false`, Elasticsearch performs some preflight checks, launches the request, and returns a task you can use to cancel or get the status of the task.\nElasticsearch creates a record of this task as a document at `_tasks/`.\n\n**Reindex from multiple sources**\n\nIf you have many sources to reindex it is generally better to reindex them one at a time rather than using a glob pattern to pick up multiple sources.\nThat way you can resume the process if there are any errors by removing the partially completed source and starting over.\nIt also makes parallelizing the process fairly simple: split the list of sources to reindex and run each list in parallel.\n\nFor example, you can use a bash script like this:\n\n```\nfor index in i1 i2 i3 i4 i5; do\n curl -HContent-Type:application/json -XPOST localhost:9200/_reindex?pretty -d'{\n \"source\": {\n \"index\": \"'$index'\"\n },\n \"dest\": {\n \"index\": \"'$index'-reindexed\"\n }\n }'\ndone\n```\n\n** Throttling**\n\nSet `requests_per_second` to any positive decimal number (`1.4`, `6`, `1000`, for example) to throttle the rate at which reindex issues batches of index operations.\nRequests are throttled by padding each batch with a wait time.\nTo turn off throttling, set `requests_per_second` to `-1`.\n\nThe throttling is done by waiting between batches so that the scroll that reindex uses internally can be given a timeout that takes into account the padding.\nThe padding time is the difference between the batch size divided by the `requests_per_second` and the time spent writing.\nBy default the batch size is `1000`, so if `requests_per_second` is set to `500`:\n\n```\ntarget_time = 1000 / 500 per second = 2 seconds\nwait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds\n```\n\nSince the batch is issued as a single bulk request, large batch sizes cause Elasticsearch to create many requests and then wait for a while before starting the next set.\nThis is \"bursty\" instead of \"smooth\".\n\n**Slicing**\n\nReindex supports sliced scroll to parallelize the reindexing process.\nThis parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts.\n\nNOTE: Reindexing from remote clusters does not support manual or automatic slicing.\n\nYou can slice a reindex request manually by providing a slice ID and total number of slices to each request.\nYou can also let reindex automatically parallelize by using sliced scroll to slice on `_id`.\nThe `slices` parameter specifies the number of slices to use.\n\nAdding `slices` to the reindex request just automates the manual process, creating sub-requests which means it has some quirks:\n\n* You can see these requests in the tasks API. These sub-requests are \"child\" tasks of the task for the request with slices.\n* Fetching the status of the task for the request with `slices` only contains the status of completed slices.\n* These sub-requests are individually addressable for things like cancellation and rethrottling.\n* Rethrottling the request with `slices` will rethrottle the unfinished sub-request proportionally.\n* Canceling the request with `slices` will cancel each sub-request.\n* Due to the nature of `slices`, each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution.\n* Parameters like `requests_per_second` and `max_docs` on a request with `slices` are distributed proportionally to each sub-request. Combine that with the previous point about distribution being uneven and you should conclude that using `max_docs` with `slices` might not result in exactly `max_docs` documents being reindexed.\n* Each sub-request gets a slightly different snapshot of the source, though these are all taken at approximately the same time.\n\nIf slicing automatically, setting `slices` to `auto` will choose a reasonable number for most indices.\nIf slicing manually or otherwise tuning automatic slicing, use the following guidelines.\n\nQuery performance is most efficient when the number of slices is equal to the number of shards in the index.\nIf that number is large (for example, `500`), choose a lower number as too many slices will hurt performance.\nSetting slices higher than the number of shards generally does not improve efficiency and adds overhead.\n\nIndexing performance scales linearly across available resources with the number of slices.\n\nWhether query or indexing performance dominates the runtime depends on the documents being reindexed and cluster resources.\n\n**Modify documents during reindexing**\n\nLike `_update_by_query`, reindex operations support a script that modifies the document.\nUnlike `_update_by_query`, the script is allowed to modify the document's metadata.\n\nJust as in `_update_by_query`, you can set `ctx.op` to change the operation that is run on the destination.\nFor example, set `ctx.op` to `noop` if your script decides that the document doesn’t have to be indexed in the destination. This \"no operation\" will be reported in the `noop` counter in the response body.\nSet `ctx.op` to `delete` if your script decides that the document must be deleted from the destination.\nThe deletion will be reported in the `deleted` counter in the response body.\nSetting `ctx.op` to anything else will return an error, as will setting any other field in `ctx`.\n\nThink of the possibilities! Just be careful; you are able to change:\n\n* `_id`\n* `_index`\n* `_version`\n* `_routing`\n\nSetting `_version` to `null` or clearing it from the `ctx` map is just like not sending the version in an indexing request.\nIt will cause the document to be overwritten in the destination regardless of the version on the target or the version type you use in the reindex API.\n\n**Reindex from remote**\n\nReindex supports reindexing from a remote Elasticsearch cluster.\nThe `host` parameter must contain a scheme, host, port, and optional path.\nThe `username` and `password` parameters are optional and when they are present the reindex operation will connect to the remote Elasticsearch node using basic authentication.\nBe sure to use HTTPS when using basic authentication or the password will be sent in plain text.\nThere are a range of settings available to configure the behavior of the HTTPS connection.\n\nWhen using Elastic Cloud, it is also possible to authenticate against the remote cluster through the use of a valid API key.\nRemote hosts must be explicitly allowed with the `reindex.remote.whitelist` setting.\nIt can be set to a comma delimited list of allowed remote host and port combinations.\nScheme is ignored; only the host and port are used.\nFor example:\n\n```\nreindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*\"]\n```\n\nThe list of allowed hosts must be configured on any nodes that will coordinate the reindex.\nThis feature should work with remote clusters of any version of Elasticsearch.\nThis should enable you to upgrade from any version of Elasticsearch to the current version by reindexing from a cluster of the old version.\n\nWARNING: Elasticsearch does not support forward compatibility across major versions.\nFor example, you cannot reindex from a 7.x cluster into a 6.x cluster.\n\nTo enable queries sent to older versions of Elasticsearch, the `query` parameter is sent directly to the remote host without validation or modification.\n\nNOTE: Reindexing from remote clusters does not support manual or automatic slicing.\n\nReindexing from a remote server uses an on-heap buffer that defaults to a maximum size of 100mb.\nIf the remote index includes very large documents you'll need to use a smaller batch size.\nIt is also possible to set the socket read timeout on the remote connection with the `socket_timeout` field and the connection timeout with the `connect_timeout` field.\nBoth default to 30 seconds.\n\n**Configuring SSL parameters**\n\nReindex from remote supports configurable SSL settings.\nThese must be specified in the `elasticsearch.yml` file, with the exception of the secure settings, which you add in the Elasticsearch keystore.\nIt is not possible to configure SSL in the body of the reindex request.", "operationId": "reindex", "parameters": [ { @@ -26101,7 +26101,7 @@ { "in": "query", "name": "requests_per_second", - "description": "The throttle for this request in sub-requests per second.\nDefaults to no throttle.", + "description": "The throttle for this request in sub-requests per second.\nBy default, there is no throttle.", "deprecated": false, "schema": { "type": "number" @@ -26111,7 +26111,7 @@ { "in": "query", "name": "scroll", - "description": "Specifies how long a consistent view of the index should be maintained for scrolled search.", + "description": "The period of time that a consistent view of the index should be maintained for scrolled search.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Duration" @@ -26121,7 +26121,7 @@ { "in": "query", "name": "slices", - "description": "The number of slices this task should be divided into.\nDefaults to 1 slice, meaning the task isn’t sliced into subtasks.", + "description": "The number of slices this task should be divided into.\nIt defaults to one slice, which means the task isn't sliced into subtasks.\n\nReindex supports sliced scroll to parallelize the reindexing process.\nThis parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts.\n\nNOTE: Reindexing from remote clusters does not support manual or automatic slicing.\n\nIf set to `auto`, Elasticsearch chooses the number of slices to use.\nThis setting will use one slice per shard, up to a certain limit.\nIf there are multiple sources, it will choose the number of slices based on the index or backing index with the smallest number of shards.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Slices" @@ -26131,7 +26131,7 @@ { "in": "query", "name": "timeout", - "description": "Period each indexing waits for automatic index creation, dynamic mapping updates, and waiting for active shards.", + "description": "The period each indexing waits for automatic index creation, dynamic mapping updates, and waiting for active shards.\nBy default, Elasticsearch waits for at least one minute before failing.\nThe actual wait time could be longer, particularly when multiple waits occur.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Duration" @@ -26141,7 +26141,7 @@ { "in": "query", "name": "wait_for_active_shards", - "description": "The number of shard copies that must be active before proceeding with the operation.\nSet to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`).", + "description": "The number of shard copies that must be active before proceeding with the operation.\nSet it to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`).\nThe default value is one, which means it waits for each primary shard to be active.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:WaitForActiveShards" @@ -26182,7 +26182,7 @@ "$ref": "#/components/schemas/_global.reindex:Destination" }, "max_docs": { - "description": "The maximum number of documents to reindex.", + "description": "The maximum number of documents to reindex.\nBy default, all documents are reindexed.\nIf it is a value less then or equal to `scroll_size`, a scroll will not be used to retrieve the results for the operation.\n\nIf `conflicts` is set to `proceed`, the reindex operation could attempt to reindex more documents from the source than `max_docs` until it has successfully indexed `max_docs` documents into the target or it has gone through every document in the source query.", "type": "number" }, "script": { @@ -26213,27 +26213,33 @@ "type": "object", "properties": { "batches": { + "description": "The number of scroll responses that were pulled back by the reindex.", "type": "number" }, "created": { + "description": "The number of documents that were successfully created.", "type": "number" }, "deleted": { + "description": "The number of documents that were successfully deleted.", "type": "number" }, "failures": { + "description": "If there were any unrecoverable errors during the process, it is an array of those failures.\nIf this array is not empty, the request ended because of those failures.\nReindex is implemented using batches and any failure causes the entire process to end but all failures in the current batch are collected into the array.\nYou can use the `conflicts` option to prevent the reindex from ending on version conflicts.", "type": "array", "items": { "$ref": "#/components/schemas/_types:BulkIndexByScrollFailure" } }, "noops": { + "description": "The number of documents that were ignored because the script used for the reindex returned a `noop` value for `ctx.op`.", "type": "number" }, "retries": { "$ref": "#/components/schemas/_types:Retries" }, "requests_per_second": { + "description": "The number of requests per second effectively run during the reindex.", "type": "number" }, "slice_id": { @@ -26249,18 +26255,22 @@ "$ref": "#/components/schemas/_types:EpochTimeUnitMillis" }, "timed_out": { + "description": "If any of the requests that ran during the reindex timed out, it is `true`.", "type": "boolean" }, "took": { "$ref": "#/components/schemas/_types:DurationValueUnitMillis" }, "total": { + "description": "The number of documents that were successfully processed.", "type": "number" }, "updated": { + "description": "The number of documents that were successfully updated.\nThat is to say, a document with the same ID already existed before the reindex updated it.", "type": "number" }, "version_conflicts": { + "description": "The number of version conflicts that occurred.", "type": "number" } } @@ -26278,13 +26288,13 @@ "document" ], "summary": "Throttle a reindex operation", - "description": "Change the number of requests per second for a particular reindex operation.", + "description": "Change the number of requests per second for a particular reindex operation.\nFor example:\n\n```\nPOST _reindex/r1A2WoRbTwKZ516z6NEs5A:36619/_rethrottle?requests_per_second=-1\n```\n\nRethrottling that speeds up the query takes effect immediately.\nRethrottling that slows down the query will take effect after completing the current batch.\nThis behavior prevents scroll timeouts.", "operationId": "reindex-rethrottle", "parameters": [ { "in": "path", "name": "task_id", - "description": "Identifier for the task.", + "description": "The task identifier, which can be found by using the tasks API.", "required": true, "deprecated": false, "schema": { @@ -26295,7 +26305,7 @@ { "in": "query", "name": "requests_per_second", - "description": "The throttle for this request in sub-requests per second.", + "description": "The throttle for this request in sub-requests per second.\nIt can be either `-1` to turn off throttling or any decimal number like `1.7` or `12` to throttle to that level.", "deprecated": false, "schema": { "type": "number" @@ -37807,13 +37817,13 @@ "document" ], "summary": "Update a document", - "description": "Updates a document by running a script or passing a partial document.", + "description": "Update a document by running a script or passing a partial document.\n\nIf the Elasticsearch security features are enabled, you must have the `index` or `write` index privilege for the target index or index alias.\n\nThe script can update, delete, or skip modifying the document.\nThe API also supports passing a partial document, which is merged into the existing document.\nTo fully replace an existing document, use the index API.\nThis operation:\n\n* Gets the document (collocated with the shard) from the index.\n* Runs the specified script.\n* Indexes the result.\n\nThe document must still be reindexed, but using this API removes some network roundtrips and reduces chances of version conflicts between the GET and the index operation.\n\nThe `_source` field must be enabled to use this API.\nIn addition to `_source`, you can access the following variables through the `ctx` map: `_index`, `_type`, `_id`, `_version`, `_routing`, and `_now` (the current timestamp).", "operationId": "update", "parameters": [ { "in": "path", "name": "index", - "description": "The name of the index", + "description": "The name of the target index.\nBy default, the index is created automatically if it doesn't exist.", "required": true, "deprecated": false, "schema": { @@ -37824,7 +37834,7 @@ { "in": "path", "name": "id", - "description": "Document ID", + "description": "A unique identifier for the document to be updated.", "required": true, "deprecated": false, "schema": { @@ -37865,7 +37875,7 @@ { "in": "query", "name": "refresh", - "description": "If 'true', Elasticsearch refreshes the affected shards to make this operation\nvisible to search, if 'wait_for' then wait for a refresh to make this operation\nvisible to search, if 'false' do nothing with refreshes.", + "description": "If 'true', Elasticsearch refreshes the affected shards to make this operation visible to search.\nIf 'wait_for', it waits for a refresh to make this operation visible to search.\nIf 'false', it does nothing with refreshes.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Refresh" @@ -37875,7 +37885,7 @@ { "in": "query", "name": "require_alias", - "description": "If true, the destination must be an index alias.", + "description": "If `true`, the destination must be an index alias.", "deprecated": false, "schema": { "type": "boolean" @@ -37885,7 +37895,7 @@ { "in": "query", "name": "retry_on_conflict", - "description": "Specify how many times should the operation be retried when a conflict occurs.", + "description": "The number of times the operation should be retried when a conflict occurs.", "deprecated": false, "schema": { "type": "number" @@ -37895,7 +37905,7 @@ { "in": "query", "name": "routing", - "description": "Custom value used to route operations to a specific shard.", + "description": "A custom value used to route operations to a specific shard.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Routing" @@ -37905,7 +37915,7 @@ { "in": "query", "name": "timeout", - "description": "Period to wait for dynamic mapping updates and active shards.\nThis guarantees Elasticsearch waits for at least the timeout before failing.\nThe actual wait time could be longer, particularly when multiple waits occur.", + "description": "The period to wait for the following operations: dynamic mapping updates and waiting for active shards.\nElasticsearch waits for at least the timeout period before failing.\nThe actual wait time could be longer, particularly when multiple waits occur.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Duration" @@ -37915,7 +37925,7 @@ { "in": "query", "name": "wait_for_active_shards", - "description": "The number of shard copies that must be active before proceeding with the operations.\nSet to 'all' or any positive integer up to the total number of shards in the index\n(number_of_replicas+1). Defaults to 1 meaning the primary shard.", + "description": "The number of copies of each shard that must be active before proceeding with the operation.\nSet to 'all' or any positive integer up to the total number of shards in the index (`number_of_replicas`+1).\nThe default value of `1` means it waits for each primary shard to be active.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:WaitForActiveShards" @@ -37925,7 +37935,7 @@ { "in": "query", "name": "_source", - "description": "Set to false to disable source retrieval. You can also specify a comma-separated\nlist of the fields you want to retrieve.", + "description": "If `false`, source retrieval is turned off.\nYou can also specify a comma-separated list of the fields you want to retrieve.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_global.search._types:SourceConfigParam" @@ -37935,7 +37945,7 @@ { "in": "query", "name": "_source_excludes", - "description": "Specify the source fields you want to exclude.", + "description": "The source fields you want to exclude.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Fields" @@ -37945,7 +37955,7 @@ { "in": "query", "name": "_source_includes", - "description": "Specify the source fields you want to retrieve.", + "description": "The source fields you want to retrieve.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Fields" @@ -37960,29 +37970,29 @@ "type": "object", "properties": { "detect_noop": { - "description": "Set to false to disable setting 'result' in the response\nto 'noop' if no change to the document occurred.", + "description": "If `true`, the `result` in the response is set to `noop` (no operation) when there are no changes to the document.", "type": "boolean" }, "doc": { - "description": "A partial update to an existing document.", + "description": "A partial update to an existing document.\nIf both `doc` and `script` are specified, `doc` is ignored.", "type": "object" }, "doc_as_upsert": { - "description": "Set to true to use the contents of 'doc' as the value of 'upsert'", + "description": "If `true`, use the contents of 'doc' as the value of 'upsert'.\nNOTE: Using ingest pipelines with `doc_as_upsert` is not supported.", "type": "boolean" }, "script": { "$ref": "#/components/schemas/_types:Script" }, "scripted_upsert": { - "description": "Set to true to execute the script whether or not the document exists.", + "description": "If `true`, run the script whether or not the document exists.", "type": "boolean" }, "_source": { "$ref": "#/components/schemas/_global.search._types:SourceConfig" }, "upsert": { - "description": "If the document does not already exist, the contents of 'upsert' are inserted as a\nnew document. If the document exists, the 'script' is executed.", + "description": "If the document does not already exist, the contents of 'upsert' are inserted as a new document.\nIf the document exists, the 'script' is run.", "type": "object" } } @@ -68001,9 +68011,11 @@ "type": "object", "properties": { "bulk": { + "description": "The number of bulk actions retried.", "type": "number" }, "search": { + "description": "The number of search actions retried.", "type": "number" } }, @@ -84517,7 +84529,7 @@ "$ref": "#/components/schemas/_global.reindex:RemoteSource" }, "size": { - "description": "The number of documents to index per batch.\nUse when indexing from remote to ensure that the batches fit within the on-heap buffer, which defaults to a maximum size of 100 MB.", + "description": "The number of documents to index per batch.\nUse it when you are indexing from remote to ensure that the batches fit within the on-heap buffer, which defaults to a maximum size of 100 MB.", "type": "number" }, "slice": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 22ad0f5e13..862a05583f 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -15400,7 +15400,7 @@ "document" ], "summary": "Reindex documents", - "description": "Copies documents from a source to a destination. The source can be any existing index, alias, or data stream. The destination must differ from the source. For example, you cannot reindex a data stream into itself.", + "description": "Copy documents from a source to a destination.\nYou can copy all documents to the destination index or reindex a subset of the documents.\nThe source can be any existing index, alias, or data stream.\nThe destination must differ from the source.\nFor example, you cannot reindex a data stream into itself.\n\nIMPORTANT: Reindex requires `_source` to be enabled for all documents in the source.\nThe destination should be configured as wanted before calling the reindex API.\nReindex does not copy the settings from the source or its associated template.\nMappings, shard counts, and replicas, for example, must be configured ahead of time.\n\nIf the Elasticsearch security features are enabled, you must have the following security privileges:\n\n* The `read` index privilege for the source data stream, index, or alias.\n* The `write` index privilege for the destination data stream, index, or index alias.\n* To automatically create a data stream or index with a reindex API request, you must have the `auto_configure`, `create_index`, or `manage` index privilege for the destination data stream, index, or alias.\n* If reindexing from a remote cluster, the `source.remote.user` must have the `monitor` cluster privilege and the `read` index privilege for the source data stream, index, or alias.\n\nIf reindexing from a remote cluster, you must explicitly allow the remote host in the `reindex.remote.whitelist` setting.\nAutomatic data stream creation requires a matching index template with data stream enabled.\n\nThe `dest` element can be configured like the index API to control optimistic concurrency control.\nOmitting `version_type` or setting it to `internal` causes Elasticsearch to blindly dump documents into the destination, overwriting any that happen to have the same ID.\n\nSetting `version_type` to `external` causes Elasticsearch to preserve the `version` from the source, create any documents that are missing, and update any documents that have an older version in the destination than they do in the source.\n\nSetting `op_type` to `create` causes the reindex API to create only missing documents in the destination.\nAll existing documents will cause a version conflict.\n\nIMPORTANT: Because data streams are append-only, any reindex request to a destination data stream must have an `op_type` of `create`.\nA reindex can only add new documents to a destination data stream.\nIt cannot update existing documents in a destination data stream.\n\nBy default, version conflicts abort the reindex process.\nTo continue reindexing if there are conflicts, set the `conflicts` request body property to `proceed`.\nIn this case, the response includes a count of the version conflicts that were encountered.\nNote that the handling of other error types is unaffected by the `conflicts` property.\nAdditionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than `max_docs` until it has successfully indexed `max_docs` documents into the target or it has gone through every document in the source query.\n\nNOTE: The reindex API makes no effort to handle ID collisions.\nThe last document written will \"win\" but the order isn't usually predictable so it is not a good idea to rely on this behavior.\nInstead, make sure that IDs are unique by using a script.\n\n**Running reindex asynchronously**\n\nIf the request contains `wait_for_completion=false`, Elasticsearch performs some preflight checks, launches the request, and returns a task you can use to cancel or get the status of the task.\nElasticsearch creates a record of this task as a document at `_tasks/`.\n\n**Reindex from multiple sources**\n\nIf you have many sources to reindex it is generally better to reindex them one at a time rather than using a glob pattern to pick up multiple sources.\nThat way you can resume the process if there are any errors by removing the partially completed source and starting over.\nIt also makes parallelizing the process fairly simple: split the list of sources to reindex and run each list in parallel.\n\nFor example, you can use a bash script like this:\n\n```\nfor index in i1 i2 i3 i4 i5; do\n curl -HContent-Type:application/json -XPOST localhost:9200/_reindex?pretty -d'{\n \"source\": {\n \"index\": \"'$index'\"\n },\n \"dest\": {\n \"index\": \"'$index'-reindexed\"\n }\n }'\ndone\n```\n\n** Throttling**\n\nSet `requests_per_second` to any positive decimal number (`1.4`, `6`, `1000`, for example) to throttle the rate at which reindex issues batches of index operations.\nRequests are throttled by padding each batch with a wait time.\nTo turn off throttling, set `requests_per_second` to `-1`.\n\nThe throttling is done by waiting between batches so that the scroll that reindex uses internally can be given a timeout that takes into account the padding.\nThe padding time is the difference between the batch size divided by the `requests_per_second` and the time spent writing.\nBy default the batch size is `1000`, so if `requests_per_second` is set to `500`:\n\n```\ntarget_time = 1000 / 500 per second = 2 seconds\nwait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds\n```\n\nSince the batch is issued as a single bulk request, large batch sizes cause Elasticsearch to create many requests and then wait for a while before starting the next set.\nThis is \"bursty\" instead of \"smooth\".\n\n**Slicing**\n\nReindex supports sliced scroll to parallelize the reindexing process.\nThis parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts.\n\nNOTE: Reindexing from remote clusters does not support manual or automatic slicing.\n\nYou can slice a reindex request manually by providing a slice ID and total number of slices to each request.\nYou can also let reindex automatically parallelize by using sliced scroll to slice on `_id`.\nThe `slices` parameter specifies the number of slices to use.\n\nAdding `slices` to the reindex request just automates the manual process, creating sub-requests which means it has some quirks:\n\n* You can see these requests in the tasks API. These sub-requests are \"child\" tasks of the task for the request with slices.\n* Fetching the status of the task for the request with `slices` only contains the status of completed slices.\n* These sub-requests are individually addressable for things like cancellation and rethrottling.\n* Rethrottling the request with `slices` will rethrottle the unfinished sub-request proportionally.\n* Canceling the request with `slices` will cancel each sub-request.\n* Due to the nature of `slices`, each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution.\n* Parameters like `requests_per_second` and `max_docs` on a request with `slices` are distributed proportionally to each sub-request. Combine that with the previous point about distribution being uneven and you should conclude that using `max_docs` with `slices` might not result in exactly `max_docs` documents being reindexed.\n* Each sub-request gets a slightly different snapshot of the source, though these are all taken at approximately the same time.\n\nIf slicing automatically, setting `slices` to `auto` will choose a reasonable number for most indices.\nIf slicing manually or otherwise tuning automatic slicing, use the following guidelines.\n\nQuery performance is most efficient when the number of slices is equal to the number of shards in the index.\nIf that number is large (for example, `500`), choose a lower number as too many slices will hurt performance.\nSetting slices higher than the number of shards generally does not improve efficiency and adds overhead.\n\nIndexing performance scales linearly across available resources with the number of slices.\n\nWhether query or indexing performance dominates the runtime depends on the documents being reindexed and cluster resources.\n\n**Modify documents during reindexing**\n\nLike `_update_by_query`, reindex operations support a script that modifies the document.\nUnlike `_update_by_query`, the script is allowed to modify the document's metadata.\n\nJust as in `_update_by_query`, you can set `ctx.op` to change the operation that is run on the destination.\nFor example, set `ctx.op` to `noop` if your script decides that the document doesn’t have to be indexed in the destination. This \"no operation\" will be reported in the `noop` counter in the response body.\nSet `ctx.op` to `delete` if your script decides that the document must be deleted from the destination.\nThe deletion will be reported in the `deleted` counter in the response body.\nSetting `ctx.op` to anything else will return an error, as will setting any other field in `ctx`.\n\nThink of the possibilities! Just be careful; you are able to change:\n\n* `_id`\n* `_index`\n* `_version`\n* `_routing`\n\nSetting `_version` to `null` or clearing it from the `ctx` map is just like not sending the version in an indexing request.\nIt will cause the document to be overwritten in the destination regardless of the version on the target or the version type you use in the reindex API.\n\n**Reindex from remote**\n\nReindex supports reindexing from a remote Elasticsearch cluster.\nThe `host` parameter must contain a scheme, host, port, and optional path.\nThe `username` and `password` parameters are optional and when they are present the reindex operation will connect to the remote Elasticsearch node using basic authentication.\nBe sure to use HTTPS when using basic authentication or the password will be sent in plain text.\nThere are a range of settings available to configure the behavior of the HTTPS connection.\n\nWhen using Elastic Cloud, it is also possible to authenticate against the remote cluster through the use of a valid API key.\nRemote hosts must be explicitly allowed with the `reindex.remote.whitelist` setting.\nIt can be set to a comma delimited list of allowed remote host and port combinations.\nScheme is ignored; only the host and port are used.\nFor example:\n\n```\nreindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*\"]\n```\n\nThe list of allowed hosts must be configured on any nodes that will coordinate the reindex.\nThis feature should work with remote clusters of any version of Elasticsearch.\nThis should enable you to upgrade from any version of Elasticsearch to the current version by reindexing from a cluster of the old version.\n\nWARNING: Elasticsearch does not support forward compatibility across major versions.\nFor example, you cannot reindex from a 7.x cluster into a 6.x cluster.\n\nTo enable queries sent to older versions of Elasticsearch, the `query` parameter is sent directly to the remote host without validation or modification.\n\nNOTE: Reindexing from remote clusters does not support manual or automatic slicing.\n\nReindexing from a remote server uses an on-heap buffer that defaults to a maximum size of 100mb.\nIf the remote index includes very large documents you'll need to use a smaller batch size.\nIt is also possible to set the socket read timeout on the remote connection with the `socket_timeout` field and the connection timeout with the `connect_timeout` field.\nBoth default to 30 seconds.\n\n**Configuring SSL parameters**\n\nReindex from remote supports configurable SSL settings.\nThese must be specified in the `elasticsearch.yml` file, with the exception of the secure settings, which you add in the Elasticsearch keystore.\nIt is not possible to configure SSL in the body of the reindex request.", "operationId": "reindex", "parameters": [ { @@ -15416,7 +15416,7 @@ { "in": "query", "name": "requests_per_second", - "description": "The throttle for this request in sub-requests per second.\nDefaults to no throttle.", + "description": "The throttle for this request in sub-requests per second.\nBy default, there is no throttle.", "deprecated": false, "schema": { "type": "number" @@ -15426,7 +15426,7 @@ { "in": "query", "name": "scroll", - "description": "Specifies how long a consistent view of the index should be maintained for scrolled search.", + "description": "The period of time that a consistent view of the index should be maintained for scrolled search.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Duration" @@ -15436,7 +15436,7 @@ { "in": "query", "name": "slices", - "description": "The number of slices this task should be divided into.\nDefaults to 1 slice, meaning the task isn’t sliced into subtasks.", + "description": "The number of slices this task should be divided into.\nIt defaults to one slice, which means the task isn't sliced into subtasks.\n\nReindex supports sliced scroll to parallelize the reindexing process.\nThis parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts.\n\nNOTE: Reindexing from remote clusters does not support manual or automatic slicing.\n\nIf set to `auto`, Elasticsearch chooses the number of slices to use.\nThis setting will use one slice per shard, up to a certain limit.\nIf there are multiple sources, it will choose the number of slices based on the index or backing index with the smallest number of shards.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Slices" @@ -15446,7 +15446,7 @@ { "in": "query", "name": "timeout", - "description": "Period each indexing waits for automatic index creation, dynamic mapping updates, and waiting for active shards.", + "description": "The period each indexing waits for automatic index creation, dynamic mapping updates, and waiting for active shards.\nBy default, Elasticsearch waits for at least one minute before failing.\nThe actual wait time could be longer, particularly when multiple waits occur.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Duration" @@ -15456,7 +15456,7 @@ { "in": "query", "name": "wait_for_active_shards", - "description": "The number of shard copies that must be active before proceeding with the operation.\nSet to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`).", + "description": "The number of shard copies that must be active before proceeding with the operation.\nSet it to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`).\nThe default value is one, which means it waits for each primary shard to be active.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:WaitForActiveShards" @@ -15497,7 +15497,7 @@ "$ref": "#/components/schemas/_global.reindex:Destination" }, "max_docs": { - "description": "The maximum number of documents to reindex.", + "description": "The maximum number of documents to reindex.\nBy default, all documents are reindexed.\nIf it is a value less then or equal to `scroll_size`, a scroll will not be used to retrieve the results for the operation.\n\nIf `conflicts` is set to `proceed`, the reindex operation could attempt to reindex more documents from the source than `max_docs` until it has successfully indexed `max_docs` documents into the target or it has gone through every document in the source query.", "type": "number" }, "script": { @@ -15528,27 +15528,33 @@ "type": "object", "properties": { "batches": { + "description": "The number of scroll responses that were pulled back by the reindex.", "type": "number" }, "created": { + "description": "The number of documents that were successfully created.", "type": "number" }, "deleted": { + "description": "The number of documents that were successfully deleted.", "type": "number" }, "failures": { + "description": "If there were any unrecoverable errors during the process, it is an array of those failures.\nIf this array is not empty, the request ended because of those failures.\nReindex is implemented using batches and any failure causes the entire process to end but all failures in the current batch are collected into the array.\nYou can use the `conflicts` option to prevent the reindex from ending on version conflicts.", "type": "array", "items": { "$ref": "#/components/schemas/_types:BulkIndexByScrollFailure" } }, "noops": { + "description": "The number of documents that were ignored because the script used for the reindex returned a `noop` value for `ctx.op`.", "type": "number" }, "retries": { "$ref": "#/components/schemas/_types:Retries" }, "requests_per_second": { + "description": "The number of requests per second effectively run during the reindex.", "type": "number" }, "slice_id": { @@ -15564,18 +15570,22 @@ "$ref": "#/components/schemas/_types:EpochTimeUnitMillis" }, "timed_out": { + "description": "If any of the requests that ran during the reindex timed out, it is `true`.", "type": "boolean" }, "took": { "$ref": "#/components/schemas/_types:DurationValueUnitMillis" }, "total": { + "description": "The number of documents that were successfully processed.", "type": "number" }, "updated": { + "description": "The number of documents that were successfully updated.\nThat is to say, a document with the same ID already existed before the reindex updated it.", "type": "number" }, "version_conflicts": { + "description": "The number of version conflicts that occurred.", "type": "number" } } @@ -19699,13 +19709,13 @@ "document" ], "summary": "Update a document", - "description": "Updates a document by running a script or passing a partial document.", + "description": "Update a document by running a script or passing a partial document.\n\nIf the Elasticsearch security features are enabled, you must have the `index` or `write` index privilege for the target index or index alias.\n\nThe script can update, delete, or skip modifying the document.\nThe API also supports passing a partial document, which is merged into the existing document.\nTo fully replace an existing document, use the index API.\nThis operation:\n\n* Gets the document (collocated with the shard) from the index.\n* Runs the specified script.\n* Indexes the result.\n\nThe document must still be reindexed, but using this API removes some network roundtrips and reduces chances of version conflicts between the GET and the index operation.\n\nThe `_source` field must be enabled to use this API.\nIn addition to `_source`, you can access the following variables through the `ctx` map: `_index`, `_type`, `_id`, `_version`, `_routing`, and `_now` (the current timestamp).", "operationId": "update", "parameters": [ { "in": "path", "name": "index", - "description": "The name of the index", + "description": "The name of the target index.\nBy default, the index is created automatically if it doesn't exist.", "required": true, "deprecated": false, "schema": { @@ -19716,7 +19726,7 @@ { "in": "path", "name": "id", - "description": "Document ID", + "description": "A unique identifier for the document to be updated.", "required": true, "deprecated": false, "schema": { @@ -19757,7 +19767,7 @@ { "in": "query", "name": "refresh", - "description": "If 'true', Elasticsearch refreshes the affected shards to make this operation\nvisible to search, if 'wait_for' then wait for a refresh to make this operation\nvisible to search, if 'false' do nothing with refreshes.", + "description": "If 'true', Elasticsearch refreshes the affected shards to make this operation visible to search.\nIf 'wait_for', it waits for a refresh to make this operation visible to search.\nIf 'false', it does nothing with refreshes.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Refresh" @@ -19767,7 +19777,7 @@ { "in": "query", "name": "require_alias", - "description": "If true, the destination must be an index alias.", + "description": "If `true`, the destination must be an index alias.", "deprecated": false, "schema": { "type": "boolean" @@ -19777,7 +19787,7 @@ { "in": "query", "name": "retry_on_conflict", - "description": "Specify how many times should the operation be retried when a conflict occurs.", + "description": "The number of times the operation should be retried when a conflict occurs.", "deprecated": false, "schema": { "type": "number" @@ -19787,7 +19797,7 @@ { "in": "query", "name": "routing", - "description": "Custom value used to route operations to a specific shard.", + "description": "A custom value used to route operations to a specific shard.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Routing" @@ -19797,7 +19807,7 @@ { "in": "query", "name": "timeout", - "description": "Period to wait for dynamic mapping updates and active shards.\nThis guarantees Elasticsearch waits for at least the timeout before failing.\nThe actual wait time could be longer, particularly when multiple waits occur.", + "description": "The period to wait for the following operations: dynamic mapping updates and waiting for active shards.\nElasticsearch waits for at least the timeout period before failing.\nThe actual wait time could be longer, particularly when multiple waits occur.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Duration" @@ -19807,7 +19817,7 @@ { "in": "query", "name": "wait_for_active_shards", - "description": "The number of shard copies that must be active before proceeding with the operations.\nSet to 'all' or any positive integer up to the total number of shards in the index\n(number_of_replicas+1). Defaults to 1 meaning the primary shard.", + "description": "The number of copies of each shard that must be active before proceeding with the operation.\nSet to 'all' or any positive integer up to the total number of shards in the index (`number_of_replicas`+1).\nThe default value of `1` means it waits for each primary shard to be active.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:WaitForActiveShards" @@ -19817,7 +19827,7 @@ { "in": "query", "name": "_source", - "description": "Set to false to disable source retrieval. You can also specify a comma-separated\nlist of the fields you want to retrieve.", + "description": "If `false`, source retrieval is turned off.\nYou can also specify a comma-separated list of the fields you want to retrieve.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_global.search._types:SourceConfigParam" @@ -19827,7 +19837,7 @@ { "in": "query", "name": "_source_excludes", - "description": "Specify the source fields you want to exclude.", + "description": "The source fields you want to exclude.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Fields" @@ -19837,7 +19847,7 @@ { "in": "query", "name": "_source_includes", - "description": "Specify the source fields you want to retrieve.", + "description": "The source fields you want to retrieve.", "deprecated": false, "schema": { "$ref": "#/components/schemas/_types:Fields" @@ -19852,29 +19862,29 @@ "type": "object", "properties": { "detect_noop": { - "description": "Set to false to disable setting 'result' in the response\nto 'noop' if no change to the document occurred.", + "description": "If `true`, the `result` in the response is set to `noop` (no operation) when there are no changes to the document.", "type": "boolean" }, "doc": { - "description": "A partial update to an existing document.", + "description": "A partial update to an existing document.\nIf both `doc` and `script` are specified, `doc` is ignored.", "type": "object" }, "doc_as_upsert": { - "description": "Set to true to use the contents of 'doc' as the value of 'upsert'", + "description": "If `true`, use the contents of 'doc' as the value of 'upsert'.\nNOTE: Using ingest pipelines with `doc_as_upsert` is not supported.", "type": "boolean" }, "script": { "$ref": "#/components/schemas/_types:Script" }, "scripted_upsert": { - "description": "Set to true to execute the script whether or not the document exists.", + "description": "If `true`, run the script whether or not the document exists.", "type": "boolean" }, "_source": { "$ref": "#/components/schemas/_global.search._types:SourceConfig" }, "upsert": { - "description": "If the document does not already exist, the contents of 'upsert' are inserted as a\nnew document. If the document exists, the 'script' is executed.", + "description": "If the document does not already exist, the contents of 'upsert' are inserted as a new document.\nIf the document exists, the 'script' is run.", "type": "object" } } @@ -44451,9 +44461,11 @@ "type": "object", "properties": { "bulk": { + "description": "The number of bulk actions retried.", "type": "number" }, "search": { + "description": "The number of search actions retried.", "type": "number" } }, @@ -54008,7 +54020,7 @@ "$ref": "#/components/schemas/_global.reindex:RemoteSource" }, "size": { - "description": "The number of documents to index per batch.\nUse when indexing from remote to ensure that the batches fit within the on-heap buffer, which defaults to a maximum size of 100 MB.", + "description": "The number of documents to index per batch.\nUse it when you are indexing from remote to ensure that the batches fit within the on-heap buffer, which defaults to a maximum size of 100 MB.", "type": "number" }, "slice": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 4dfbc1cd4b..be8f254c7b 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -14624,10 +14624,17 @@ "stability": "stable" } }, - "description": "Reindex documents.\nCopies documents from a source to a destination. The source can be any existing index, alias, or data stream. The destination must differ from the source. For example, you cannot reindex a data stream into itself.", + "description": "Reindex documents.\n\nCopy documents from a source to a destination.\nYou can copy all documents to the destination index or reindex a subset of the documents.\nThe source can be any existing index, alias, or data stream.\nThe destination must differ from the source.\nFor example, you cannot reindex a data stream into itself.\n\nIMPORTANT: Reindex requires `_source` to be enabled for all documents in the source.\nThe destination should be configured as wanted before calling the reindex API.\nReindex does not copy the settings from the source or its associated template.\nMappings, shard counts, and replicas, for example, must be configured ahead of time.\n\nIf the Elasticsearch security features are enabled, you must have the following security privileges:\n\n* The `read` index privilege for the source data stream, index, or alias.\n* The `write` index privilege for the destination data stream, index, or index alias.\n* To automatically create a data stream or index with a reindex API request, you must have the `auto_configure`, `create_index`, or `manage` index privilege for the destination data stream, index, or alias.\n* If reindexing from a remote cluster, the `source.remote.user` must have the `monitor` cluster privilege and the `read` index privilege for the source data stream, index, or alias.\n\nIf reindexing from a remote cluster, you must explicitly allow the remote host in the `reindex.remote.whitelist` setting.\nAutomatic data stream creation requires a matching index template with data stream enabled.\n\nThe `dest` element can be configured like the index API to control optimistic concurrency control.\nOmitting `version_type` or setting it to `internal` causes Elasticsearch to blindly dump documents into the destination, overwriting any that happen to have the same ID.\n\nSetting `version_type` to `external` causes Elasticsearch to preserve the `version` from the source, create any documents that are missing, and update any documents that have an older version in the destination than they do in the source.\n\nSetting `op_type` to `create` causes the reindex API to create only missing documents in the destination.\nAll existing documents will cause a version conflict.\n\nIMPORTANT: Because data streams are append-only, any reindex request to a destination data stream must have an `op_type` of `create`.\nA reindex can only add new documents to a destination data stream.\nIt cannot update existing documents in a destination data stream.\n\nBy default, version conflicts abort the reindex process.\nTo continue reindexing if there are conflicts, set the `conflicts` request body property to `proceed`.\nIn this case, the response includes a count of the version conflicts that were encountered.\nNote that the handling of other error types is unaffected by the `conflicts` property.\nAdditionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than `max_docs` until it has successfully indexed `max_docs` documents into the target or it has gone through every document in the source query.\n\nNOTE: The reindex API makes no effort to handle ID collisions.\nThe last document written will \"win\" but the order isn't usually predictable so it is not a good idea to rely on this behavior.\nInstead, make sure that IDs are unique by using a script.\n\n**Running reindex asynchronously**\n\nIf the request contains `wait_for_completion=false`, Elasticsearch performs some preflight checks, launches the request, and returns a task you can use to cancel or get the status of the task.\nElasticsearch creates a record of this task as a document at `_tasks/`.\n\n**Reindex from multiple sources**\n\nIf you have many sources to reindex it is generally better to reindex them one at a time rather than using a glob pattern to pick up multiple sources.\nThat way you can resume the process if there are any errors by removing the partially completed source and starting over.\nIt also makes parallelizing the process fairly simple: split the list of sources to reindex and run each list in parallel.\n\nFor example, you can use a bash script like this:\n\n```\nfor index in i1 i2 i3 i4 i5; do\n curl -HContent-Type:application/json -XPOST localhost:9200/_reindex?pretty -d'{\n \"source\": {\n \"index\": \"'$index'\"\n },\n \"dest\": {\n \"index\": \"'$index'-reindexed\"\n }\n }'\ndone\n```\n\n** Throttling**\n\nSet `requests_per_second` to any positive decimal number (`1.4`, `6`, `1000`, for example) to throttle the rate at which reindex issues batches of index operations.\nRequests are throttled by padding each batch with a wait time.\nTo turn off throttling, set `requests_per_second` to `-1`.\n\nThe throttling is done by waiting between batches so that the scroll that reindex uses internally can be given a timeout that takes into account the padding.\nThe padding time is the difference between the batch size divided by the `requests_per_second` and the time spent writing.\nBy default the batch size is `1000`, so if `requests_per_second` is set to `500`:\n\n```\ntarget_time = 1000 / 500 per second = 2 seconds\nwait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds\n```\n\nSince the batch is issued as a single bulk request, large batch sizes cause Elasticsearch to create many requests and then wait for a while before starting the next set.\nThis is \"bursty\" instead of \"smooth\".\n\n**Slicing**\n\nReindex supports sliced scroll to parallelize the reindexing process.\nThis parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts.\n\nNOTE: Reindexing from remote clusters does not support manual or automatic slicing.\n\nYou can slice a reindex request manually by providing a slice ID and total number of slices to each request.\nYou can also let reindex automatically parallelize by using sliced scroll to slice on `_id`.\nThe `slices` parameter specifies the number of slices to use.\n\nAdding `slices` to the reindex request just automates the manual process, creating sub-requests which means it has some quirks:\n\n* You can see these requests in the tasks API. These sub-requests are \"child\" tasks of the task for the request with slices.\n* Fetching the status of the task for the request with `slices` only contains the status of completed slices.\n* These sub-requests are individually addressable for things like cancellation and rethrottling.\n* Rethrottling the request with `slices` will rethrottle the unfinished sub-request proportionally.\n* Canceling the request with `slices` will cancel each sub-request.\n* Due to the nature of `slices`, each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution.\n* Parameters like `requests_per_second` and `max_docs` on a request with `slices` are distributed proportionally to each sub-request. Combine that with the previous point about distribution being uneven and you should conclude that using `max_docs` with `slices` might not result in exactly `max_docs` documents being reindexed.\n* Each sub-request gets a slightly different snapshot of the source, though these are all taken at approximately the same time.\n\nIf slicing automatically, setting `slices` to `auto` will choose a reasonable number for most indices.\nIf slicing manually or otherwise tuning automatic slicing, use the following guidelines.\n\nQuery performance is most efficient when the number of slices is equal to the number of shards in the index.\nIf that number is large (for example, `500`), choose a lower number as too many slices will hurt performance.\nSetting slices higher than the number of shards generally does not improve efficiency and adds overhead.\n\nIndexing performance scales linearly across available resources with the number of slices.\n\nWhether query or indexing performance dominates the runtime depends on the documents being reindexed and cluster resources.\n\n**Modify documents during reindexing**\n\nLike `_update_by_query`, reindex operations support a script that modifies the document.\nUnlike `_update_by_query`, the script is allowed to modify the document's metadata.\n\nJust as in `_update_by_query`, you can set `ctx.op` to change the operation that is run on the destination.\nFor example, set `ctx.op` to `noop` if your script decides that the document doesn’t have to be indexed in the destination. This \"no operation\" will be reported in the `noop` counter in the response body.\nSet `ctx.op` to `delete` if your script decides that the document must be deleted from the destination.\nThe deletion will be reported in the `deleted` counter in the response body.\nSetting `ctx.op` to anything else will return an error, as will setting any other field in `ctx`.\n\nThink of the possibilities! Just be careful; you are able to change:\n\n* `_id`\n* `_index`\n* `_version`\n* `_routing`\n\nSetting `_version` to `null` or clearing it from the `ctx` map is just like not sending the version in an indexing request.\nIt will cause the document to be overwritten in the destination regardless of the version on the target or the version type you use in the reindex API.\n\n**Reindex from remote**\n\nReindex supports reindexing from a remote Elasticsearch cluster.\nThe `host` parameter must contain a scheme, host, port, and optional path.\nThe `username` and `password` parameters are optional and when they are present the reindex operation will connect to the remote Elasticsearch node using basic authentication.\nBe sure to use HTTPS when using basic authentication or the password will be sent in plain text.\nThere are a range of settings available to configure the behavior of the HTTPS connection.\n\nWhen using Elastic Cloud, it is also possible to authenticate against the remote cluster through the use of a valid API key.\nRemote hosts must be explicitly allowed with the `reindex.remote.whitelist` setting.\nIt can be set to a comma delimited list of allowed remote host and port combinations.\nScheme is ignored; only the host and port are used.\nFor example:\n\n```\nreindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*\"]\n```\n\nThe list of allowed hosts must be configured on any nodes that will coordinate the reindex.\nThis feature should work with remote clusters of any version of Elasticsearch.\nThis should enable you to upgrade from any version of Elasticsearch to the current version by reindexing from a cluster of the old version.\n\nWARNING: Elasticsearch does not support forward compatibility across major versions.\nFor example, you cannot reindex from a 7.x cluster into a 6.x cluster.\n\nTo enable queries sent to older versions of Elasticsearch, the `query` parameter is sent directly to the remote host without validation or modification.\n\nNOTE: Reindexing from remote clusters does not support manual or automatic slicing.\n\nReindexing from a remote server uses an on-heap buffer that defaults to a maximum size of 100mb.\nIf the remote index includes very large documents you'll need to use a smaller batch size.\nIt is also possible to set the socket read timeout on the remote connection with the `socket_timeout` field and the connection timeout with the `connect_timeout` field.\nBoth default to 30 seconds.\n\n**Configuring SSL parameters**\n\nReindex from remote supports configurable SSL settings.\nThese must be specified in the `elasticsearch.yml` file, with the exception of the secure settings, which you add in the Elasticsearch keystore.\nIt is not possible to configure SSL in the body of the reindex request.", + "docId": "docs-reindex", "docTag": "document", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/docs-reindex.html", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/docs-reindex.html", "name": "reindex", + "privileges": { + "index": [ + "read", + "write" + ] + }, "request": { "name": "Request", "namespace": "_global.reindex" @@ -14663,9 +14670,10 @@ "stability": "stable" } }, - "description": "Throttle a reindex operation.\n\nChange the number of requests per second for a particular reindex operation.", + "description": "Throttle a reindex operation.\n\nChange the number of requests per second for a particular reindex operation.\nFor example:\n\n```\nPOST _reindex/r1A2WoRbTwKZ516z6NEs5A:36619/_rethrottle?requests_per_second=-1\n```\n\nRethrottling that speeds up the query takes effect immediately.\nRethrottling that slows down the query will take effect after completing the current batch.\nThis behavior prevents scroll timeouts.", + "docId": "docs-reindex", "docTag": "document", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/docs-reindex.html", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/docs-reindex.html", "name": "reindex_rethrottle", "request": { "name": "Request", @@ -21228,10 +21236,16 @@ "stability": "stable" } }, - "description": "Update a document.\nUpdates a document by running a script or passing a partial document.", + "description": "Update a document.\n\nUpdate a document by running a script or passing a partial document.\n\nIf the Elasticsearch security features are enabled, you must have the `index` or `write` index privilege for the target index or index alias.\n\nThe script can update, delete, or skip modifying the document.\nThe API also supports passing a partial document, which is merged into the existing document.\nTo fully replace an existing document, use the index API.\nThis operation:\n\n* Gets the document (collocated with the shard) from the index.\n* Runs the specified script.\n* Indexes the result.\n\nThe document must still be reindexed, but using this API removes some network roundtrips and reduces chances of version conflicts between the GET and the index operation.\n\nThe `_source` field must be enabled to use this API.\nIn addition to `_source`, you can access the following variables through the `ctx` map: `_index`, `_type`, `_id`, `_version`, `_routing`, and `_now` (the current timestamp).", + "docId": "docs-update", "docTag": "document", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/docs-update.html", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/docs-update.html", "name": "update", + "privileges": { + "index": [ + "write" + ] + }, "request": { "name": "Request", "namespace": "_global.update" @@ -32168,7 +32182,7 @@ } }, { - "description": "Set to `create` to only index documents that do not already exist.\nImportant: To reindex to a data stream destination, this argument must be `create`.", + "description": "If it is `create`, the operation will only index documents that do not already exist (also known as \"put if absent\").\n\nIMPORTANT: To reindex to a data stream destination, this argument must be `create`.", "name": "op_type", "required": false, "serverDefault": "index", @@ -32193,7 +32207,7 @@ } }, { - "description": "By default, a document's routing is preserved unless it’s changed by the script.\nSet to `discard` to set routing to `null`, or `=value` to route using the specified `value`.", + "description": "By default, a document's routing is preserved unless it's changed by the script.\nIf it is `keep`, the routing on the bulk request sent for each match is set to the routing on the match.\nIf it is `discard`, the routing on the bulk request sent for each match is set to `null`.\nIf it is `=value`, the routing on the bulk request sent for each match is set to all value specified after the equals sign (`=`).", "name": "routing", "required": false, "serverDefault": "keep", @@ -32218,7 +32232,7 @@ } } ], - "specLocation": "_global/reindex/types.ts#L39-L64" + "specLocation": "_global/reindex/types.ts#L39-L67" }, { "kind": "interface", @@ -32228,9 +32242,10 @@ }, "properties": [ { - "description": "The remote connection timeout.\nDefaults to 30 seconds.", + "description": "The remote connection timeout.", "name": "connect_timeout", "required": false, + "serverDefault": "30s", "type": { "kind": "instance_of", "type": { @@ -32263,7 +32278,7 @@ } }, { - "description": "The URL for the remote instance of Elasticsearch that you want to index from.", + "description": "The URL for the remote instance of Elasticsearch that you want to index from.\nThis information is required when you're indexing from remote.", "name": "host", "required": true, "type": { @@ -32299,9 +32314,10 @@ } }, { - "description": "The remote socket read timeout. Defaults to 30 seconds.", + "description": "The remote socket read timeout.", "name": "socket_timeout", "required": false, + "serverDefault": "30s", "type": { "kind": "instance_of", "type": { @@ -32311,7 +32327,7 @@ } } ], - "specLocation": "_global/reindex/types.ts#L99-L125" + "specLocation": "_global/reindex/types.ts#L112-L140" }, { "kind": "request", @@ -32322,7 +32338,7 @@ "kind": "properties", "properties": [ { - "description": "Set to proceed to continue reindexing even if there are conflicts.", + "description": "Indicates whether to continue reindexing even when there are conflicts.", "name": "conflicts", "required": false, "serverDefault": "abort", @@ -32347,7 +32363,7 @@ } }, { - "description": "The maximum number of documents to reindex.", + "description": "The maximum number of documents to reindex.\nBy default, all documents are reindexed.\nIf it is a value less then or equal to `scroll_size`, a scroll will not be used to retrieve the results for the operation.\n\nIf `conflicts` is set to `proceed`, the reindex operation could attempt to reindex more documents from the source than `max_docs` until it has successfully indexed `max_docs` documents into the target or it has gone through every document in the source query.", "name": "max_docs", "required": false, "type": { @@ -32395,7 +32411,7 @@ } ] }, - "description": "Reindex documents.\nCopies documents from a source to a destination. The source can be any existing index, alias, or data stream. The destination must differ from the source. For example, you cannot reindex a data stream into itself.", + "description": "Reindex documents.\n\nCopy documents from a source to a destination.\nYou can copy all documents to the destination index or reindex a subset of the documents.\nThe source can be any existing index, alias, or data stream.\nThe destination must differ from the source.\nFor example, you cannot reindex a data stream into itself.\n\nIMPORTANT: Reindex requires `_source` to be enabled for all documents in the source.\nThe destination should be configured as wanted before calling the reindex API.\nReindex does not copy the settings from the source or its associated template.\nMappings, shard counts, and replicas, for example, must be configured ahead of time.\n\nIf the Elasticsearch security features are enabled, you must have the following security privileges:\n\n* The `read` index privilege for the source data stream, index, or alias.\n* The `write` index privilege for the destination data stream, index, or index alias.\n* To automatically create a data stream or index with a reindex API request, you must have the `auto_configure`, `create_index`, or `manage` index privilege for the destination data stream, index, or alias.\n* If reindexing from a remote cluster, the `source.remote.user` must have the `monitor` cluster privilege and the `read` index privilege for the source data stream, index, or alias.\n\nIf reindexing from a remote cluster, you must explicitly allow the remote host in the `reindex.remote.whitelist` setting.\nAutomatic data stream creation requires a matching index template with data stream enabled.\n\nThe `dest` element can be configured like the index API to control optimistic concurrency control.\nOmitting `version_type` or setting it to `internal` causes Elasticsearch to blindly dump documents into the destination, overwriting any that happen to have the same ID.\n\nSetting `version_type` to `external` causes Elasticsearch to preserve the `version` from the source, create any documents that are missing, and update any documents that have an older version in the destination than they do in the source.\n\nSetting `op_type` to `create` causes the reindex API to create only missing documents in the destination.\nAll existing documents will cause a version conflict.\n\nIMPORTANT: Because data streams are append-only, any reindex request to a destination data stream must have an `op_type` of `create`.\nA reindex can only add new documents to a destination data stream.\nIt cannot update existing documents in a destination data stream.\n\nBy default, version conflicts abort the reindex process.\nTo continue reindexing if there are conflicts, set the `conflicts` request body property to `proceed`.\nIn this case, the response includes a count of the version conflicts that were encountered.\nNote that the handling of other error types is unaffected by the `conflicts` property.\nAdditionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than `max_docs` until it has successfully indexed `max_docs` documents into the target or it has gone through every document in the source query.\n\nNOTE: The reindex API makes no effort to handle ID collisions.\nThe last document written will \"win\" but the order isn't usually predictable so it is not a good idea to rely on this behavior.\nInstead, make sure that IDs are unique by using a script.\n\n**Running reindex asynchronously**\n\nIf the request contains `wait_for_completion=false`, Elasticsearch performs some preflight checks, launches the request, and returns a task you can use to cancel or get the status of the task.\nElasticsearch creates a record of this task as a document at `_tasks/`.\n\n**Reindex from multiple sources**\n\nIf you have many sources to reindex it is generally better to reindex them one at a time rather than using a glob pattern to pick up multiple sources.\nThat way you can resume the process if there are any errors by removing the partially completed source and starting over.\nIt also makes parallelizing the process fairly simple: split the list of sources to reindex and run each list in parallel.\n\nFor example, you can use a bash script like this:\n\n```\nfor index in i1 i2 i3 i4 i5; do\n curl -HContent-Type:application/json -XPOST localhost:9200/_reindex?pretty -d'{\n \"source\": {\n \"index\": \"'$index'\"\n },\n \"dest\": {\n \"index\": \"'$index'-reindexed\"\n }\n }'\ndone\n```\n\n** Throttling**\n\nSet `requests_per_second` to any positive decimal number (`1.4`, `6`, `1000`, for example) to throttle the rate at which reindex issues batches of index operations.\nRequests are throttled by padding each batch with a wait time.\nTo turn off throttling, set `requests_per_second` to `-1`.\n\nThe throttling is done by waiting between batches so that the scroll that reindex uses internally can be given a timeout that takes into account the padding.\nThe padding time is the difference between the batch size divided by the `requests_per_second` and the time spent writing.\nBy default the batch size is `1000`, so if `requests_per_second` is set to `500`:\n\n```\ntarget_time = 1000 / 500 per second = 2 seconds\nwait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds\n```\n\nSince the batch is issued as a single bulk request, large batch sizes cause Elasticsearch to create many requests and then wait for a while before starting the next set.\nThis is \"bursty\" instead of \"smooth\".\n\n**Slicing**\n\nReindex supports sliced scroll to parallelize the reindexing process.\nThis parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts.\n\nNOTE: Reindexing from remote clusters does not support manual or automatic slicing.\n\nYou can slice a reindex request manually by providing a slice ID and total number of slices to each request.\nYou can also let reindex automatically parallelize by using sliced scroll to slice on `_id`.\nThe `slices` parameter specifies the number of slices to use.\n\nAdding `slices` to the reindex request just automates the manual process, creating sub-requests which means it has some quirks:\n\n* You can see these requests in the tasks API. These sub-requests are \"child\" tasks of the task for the request with slices.\n* Fetching the status of the task for the request with `slices` only contains the status of completed slices.\n* These sub-requests are individually addressable for things like cancellation and rethrottling.\n* Rethrottling the request with `slices` will rethrottle the unfinished sub-request proportionally.\n* Canceling the request with `slices` will cancel each sub-request.\n* Due to the nature of `slices`, each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution.\n* Parameters like `requests_per_second` and `max_docs` on a request with `slices` are distributed proportionally to each sub-request. Combine that with the previous point about distribution being uneven and you should conclude that using `max_docs` with `slices` might not result in exactly `max_docs` documents being reindexed.\n* Each sub-request gets a slightly different snapshot of the source, though these are all taken at approximately the same time.\n\nIf slicing automatically, setting `slices` to `auto` will choose a reasonable number for most indices.\nIf slicing manually or otherwise tuning automatic slicing, use the following guidelines.\n\nQuery performance is most efficient when the number of slices is equal to the number of shards in the index.\nIf that number is large (for example, `500`), choose a lower number as too many slices will hurt performance.\nSetting slices higher than the number of shards generally does not improve efficiency and adds overhead.\n\nIndexing performance scales linearly across available resources with the number of slices.\n\nWhether query or indexing performance dominates the runtime depends on the documents being reindexed and cluster resources.\n\n**Modify documents during reindexing**\n\nLike `_update_by_query`, reindex operations support a script that modifies the document.\nUnlike `_update_by_query`, the script is allowed to modify the document's metadata.\n\nJust as in `_update_by_query`, you can set `ctx.op` to change the operation that is run on the destination.\nFor example, set `ctx.op` to `noop` if your script decides that the document doesn’t have to be indexed in the destination. This \"no operation\" will be reported in the `noop` counter in the response body.\nSet `ctx.op` to `delete` if your script decides that the document must be deleted from the destination.\nThe deletion will be reported in the `deleted` counter in the response body.\nSetting `ctx.op` to anything else will return an error, as will setting any other field in `ctx`.\n\nThink of the possibilities! Just be careful; you are able to change:\n\n* `_id`\n* `_index`\n* `_version`\n* `_routing`\n\nSetting `_version` to `null` or clearing it from the `ctx` map is just like not sending the version in an indexing request.\nIt will cause the document to be overwritten in the destination regardless of the version on the target or the version type you use in the reindex API.\n\n**Reindex from remote**\n\nReindex supports reindexing from a remote Elasticsearch cluster.\nThe `host` parameter must contain a scheme, host, port, and optional path.\nThe `username` and `password` parameters are optional and when they are present the reindex operation will connect to the remote Elasticsearch node using basic authentication.\nBe sure to use HTTPS when using basic authentication or the password will be sent in plain text.\nThere are a range of settings available to configure the behavior of the HTTPS connection.\n\nWhen using Elastic Cloud, it is also possible to authenticate against the remote cluster through the use of a valid API key.\nRemote hosts must be explicitly allowed with the `reindex.remote.whitelist` setting.\nIt can be set to a comma delimited list of allowed remote host and port combinations.\nScheme is ignored; only the host and port are used.\nFor example:\n\n```\nreindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*\"]\n```\n\nThe list of allowed hosts must be configured on any nodes that will coordinate the reindex.\nThis feature should work with remote clusters of any version of Elasticsearch.\nThis should enable you to upgrade from any version of Elasticsearch to the current version by reindexing from a cluster of the old version.\n\nWARNING: Elasticsearch does not support forward compatibility across major versions.\nFor example, you cannot reindex from a 7.x cluster into a 6.x cluster.\n\nTo enable queries sent to older versions of Elasticsearch, the `query` parameter is sent directly to the remote host without validation or modification.\n\nNOTE: Reindexing from remote clusters does not support manual or automatic slicing.\n\nReindexing from a remote server uses an on-heap buffer that defaults to a maximum size of 100mb.\nIf the remote index includes very large documents you'll need to use a smaller batch size.\nIt is also possible to set the socket read timeout on the remote connection with the `socket_timeout` field and the connection timeout with the `connect_timeout` field.\nBoth default to 30 seconds.\n\n**Configuring SSL parameters**\n\nReindex from remote supports configurable SSL settings.\nThese must be specified in the `elasticsearch.yml` file, with the exception of the secure settings, which you add in the Elasticsearch keystore.\nIt is not possible to configure SSL in the body of the reindex request.", "inherits": { "type": { "name": "RequestBase", @@ -32422,7 +32438,7 @@ } }, { - "description": "The throttle for this request in sub-requests per second.\nDefaults to no throttle.", + "description": "The throttle for this request in sub-requests per second.\nBy default, there is no throttle.", "name": "requests_per_second", "required": false, "serverDefault": -1, @@ -32435,7 +32451,7 @@ } }, { - "description": "Specifies how long a consistent view of the index should be maintained for scrolled search.", + "description": "The period of time that a consistent view of the index should be maintained for scrolled search.", "name": "scroll", "required": false, "type": { @@ -32447,7 +32463,9 @@ } }, { - "description": "The number of slices this task should be divided into.\nDefaults to 1 slice, meaning the task isn’t sliced into subtasks.", + "description": "The number of slices this task should be divided into.\nIt defaults to one slice, which means the task isn't sliced into subtasks.\n\nReindex supports sliced scroll to parallelize the reindexing process.\nThis parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts.\n\nNOTE: Reindexing from remote clusters does not support manual or automatic slicing.\n\nIf set to `auto`, Elasticsearch chooses the number of slices to use.\nThis setting will use one slice per shard, up to a certain limit.\nIf there are multiple sources, it will choose the number of slices based on the index or backing index with the smallest number of shards.", + "extDocId": "slice-scroll", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/paginate-search-results.html#slice-scroll", "name": "slices", "required": false, "serverDefault": "1", @@ -32460,7 +32478,7 @@ } }, { - "description": "Period each indexing waits for automatic index creation, dynamic mapping updates, and waiting for active shards.", + "description": "The period each indexing waits for automatic index creation, dynamic mapping updates, and waiting for active shards.\nBy default, Elasticsearch waits for at least one minute before failing.\nThe actual wait time could be longer, particularly when multiple waits occur.", "name": "timeout", "required": false, "serverDefault": "1m", @@ -32473,7 +32491,7 @@ } }, { - "description": "The number of shard copies that must be active before proceeding with the operation.\nSet to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`).", + "description": "The number of shard copies that must be active before proceeding with the operation.\nSet it to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`).\nThe default value is one, which means it waits for each primary shard to be active.", "name": "wait_for_active_shards", "required": false, "serverDefault": "1", @@ -32512,7 +32530,7 @@ } } ], - "specLocation": "_global/reindex/ReindexRequest.ts#L27-L110" + "specLocation": "_global/reindex/ReindexRequest.ts#L27-L309" }, { "kind": "response", @@ -32520,6 +32538,7 @@ "kind": "properties", "properties": [ { + "description": "The number of scroll responses that were pulled back by the reindex.", "name": "batches", "required": false, "type": { @@ -32531,6 +32550,7 @@ } }, { + "description": "The number of documents that were successfully created.", "name": "created", "required": false, "type": { @@ -32542,6 +32562,7 @@ } }, { + "description": "The number of documents that were successfully deleted.", "name": "deleted", "required": false, "type": { @@ -32553,6 +32574,7 @@ } }, { + "description": "If there were any unrecoverable errors during the process, it is an array of those failures.\nIf this array is not empty, the request ended because of those failures.\nReindex is implemented using batches and any failure causes the entire process to end but all failures in the current batch are collected into the array.\nYou can use the `conflicts` option to prevent the reindex from ending on version conflicts.", "name": "failures", "required": false, "type": { @@ -32567,6 +32589,7 @@ } }, { + "description": "The number of documents that were ignored because the script used for the reindex returned a `noop` value for `ctx.op`.", "name": "noops", "required": false, "type": { @@ -32578,6 +32601,7 @@ } }, { + "description": "The number of retries attempted by reindex.", "name": "retries", "required": false, "type": { @@ -32589,6 +32613,7 @@ } }, { + "description": "The number of requests per second effectively run during the reindex.", "name": "requests_per_second", "required": false, "type": { @@ -32622,6 +32647,7 @@ } }, { + "description": "The number of milliseconds the request slept to conform to `requests_per_second`.", "name": "throttled_millis", "required": false, "type": { @@ -32642,6 +32668,7 @@ } }, { + "description": "This field should always be equal to zero in a reindex response.\nIt has meaning only when using the task API, where it indicates the next time (in milliseconds since epoch) that a throttled request will be run again in order to conform to `requests_per_second`.", "name": "throttled_until_millis", "required": false, "type": { @@ -32662,6 +32689,7 @@ } }, { + "description": "If any of the requests that ran during the reindex timed out, it is `true`.", "name": "timed_out", "required": false, "type": { @@ -32673,6 +32701,7 @@ } }, { + "description": "The total milliseconds the entire operation took.", "name": "took", "required": false, "type": { @@ -32693,6 +32722,7 @@ } }, { + "description": "The number of documents that were successfully processed.", "name": "total", "required": false, "type": { @@ -32704,6 +32734,7 @@ } }, { + "description": "The number of documents that were successfully updated.\nThat is to say, a document with the same ID already existed before the reindex updated it.", "name": "updated", "required": false, "type": { @@ -32715,6 +32746,7 @@ } }, { + "description": "The number of version conflicts that occurred.", "name": "version_conflicts", "required": false, "type": { @@ -32731,7 +32763,7 @@ "name": "Response", "namespace": "_global.reindex" }, - "specLocation": "_global/reindex/ReindexResponse.ts#L26-L45" + "specLocation": "_global/reindex/ReindexResponse.ts#L26-L92" }, { "kind": "interface", @@ -32741,7 +32773,7 @@ }, "properties": [ { - "description": "The name of the data stream, index, or alias you are copying from.\nAccepts a comma-separated list to reindex from multiple sources.", + "description": "The name of the data stream, index, or alias you are copying from.\nIt accepts a comma-separated list to reindex from multiple sources.", "name": "index", "required": true, "type": { @@ -32753,7 +32785,7 @@ } }, { - "description": "Specifies the documents to reindex using the Query DSL.", + "description": "The documents to reindex, which is defined with Query DSL.", "name": "query", "required": false, "type": { @@ -32777,9 +32809,10 @@ } }, { - "description": "The number of documents to index per batch.\nUse when indexing from remote to ensure that the batches fit within the on-heap buffer, which defaults to a maximum size of 100 MB.", + "description": "The number of documents to index per batch.\nUse it when you are indexing from remote to ensure that the batches fit within the on-heap buffer, which defaults to a maximum size of 100 MB.", "name": "size", "required": false, + "serverDefault": 1000, "type": { "kind": "instance_of", "type": { @@ -32801,6 +32834,11 @@ } }, { + "deprecation": { + "description": "", + "version": "7.6.0" + }, + "description": "A comma-separated list of `:` pairs to sort by before indexing.\nUse it in conjunction with `max_docs` to control what documents are reindexed.\n\nWARNING: Sort in reindex is deprecated.\nSorting in reindex was never guaranteed to index documents in order and prevents further development of reindex such as resilience and performance improvements.\nIf used in combination with `max_docs`, consider using a query filter instead.", "name": "sort", "required": false, "type": { @@ -32813,7 +32851,7 @@ }, { "codegenName": "source_fields", - "description": "If `true` reindexes all source fields.\nSet to a list to reindex select fields.", + "description": "If `true`, reindex all source fields.\nSet it to a list to reindex select fields.", "name": "_source", "required": false, "serverDefault": "true", @@ -32837,7 +32875,7 @@ } } ], - "specLocation": "_global/reindex/types.ts#L66-L97" + "specLocation": "_global/reindex/types.ts#L69-L110" }, { "kind": "interface", @@ -33205,7 +33243,7 @@ "body": { "kind": "no_body" }, - "description": "Throttle a reindex operation.\n\nChange the number of requests per second for a particular reindex operation.", + "description": "Throttle a reindex operation.\n\nChange the number of requests per second for a particular reindex operation.\nFor example:\n\n```\nPOST _reindex/r1A2WoRbTwKZ516z6NEs5A:36619/_rethrottle?requests_per_second=-1\n```\n\nRethrottling that speeds up the query takes effect immediately.\nRethrottling that slows down the query will take effect after completing the current batch.\nThis behavior prevents scroll timeouts.", "inherits": { "type": { "name": "RequestBase", @@ -33218,7 +33256,7 @@ }, "path": [ { - "description": "Identifier for the task.", + "description": "The task identifier, which can be found by using the tasks API.", "name": "task_id", "required": true, "type": { @@ -33232,7 +33270,7 @@ ], "query": [ { - "description": "The throttle for this request in sub-requests per second.", + "description": "The throttle for this request in sub-requests per second.\nIt can be either `-1` to turn off throttling or any decimal number like `1.7` or `12` to throttle to that level.", "name": "requests_per_second", "required": false, "type": { @@ -33244,7 +33282,7 @@ } } ], - "specLocation": "_global/reindex_rethrottle/ReindexRethrottleRequest.ts#L24-L52" + "specLocation": "_global/reindex_rethrottle/ReindexRethrottleRequest.ts#L24-L63" }, { "kind": "response", @@ -42816,7 +42854,7 @@ "kind": "properties", "properties": [ { - "description": "Set to false to disable setting 'result' in the response\nto 'noop' if no change to the document occurred.", + "description": "If `true`, the `result` in the response is set to `noop` (no operation) when there are no changes to the document.", "name": "detect_noop", "required": false, "serverDefault": true, @@ -42829,7 +42867,7 @@ } }, { - "description": "A partial update to an existing document.", + "description": "A partial update to an existing document.\nIf both `doc` and `script` are specified, `doc` is ignored.", "name": "doc", "required": false, "type": { @@ -42841,7 +42879,7 @@ } }, { - "description": "Set to true to use the contents of 'doc' as the value of 'upsert'", + "description": "If `true`, use the contents of 'doc' as the value of 'upsert'.\nNOTE: Using ingest pipelines with `doc_as_upsert` is not supported.", "name": "doc_as_upsert", "required": false, "serverDefault": false, @@ -42854,7 +42892,7 @@ } }, { - "description": "Script to execute to update the document.", + "description": "The script to run to update the document.", "name": "script", "required": false, "type": { @@ -42866,7 +42904,7 @@ } }, { - "description": "Set to true to execute the script whether or not the document exists.", + "description": "If `true`, run the script whether or not the document exists.", "name": "scripted_upsert", "required": false, "serverDefault": false, @@ -42879,7 +42917,7 @@ } }, { - "description": "Set to false to disable source retrieval. You can also specify a comma-separated\nlist of the fields you want to retrieve.", + "description": "If `false`, turn off source retrieval.\nYou can also specify a comma-separated list of the fields you want to retrieve.", "name": "_source", "required": false, "serverDefault": "true", @@ -42892,7 +42930,7 @@ } }, { - "description": "If the document does not already exist, the contents of 'upsert' are inserted as a\nnew document. If the document exists, the 'script' is executed.", + "description": "If the document does not already exist, the contents of 'upsert' are inserted as a new document.\nIf the document exists, the 'script' is run.", "name": "upsert", "required": false, "type": { @@ -42905,7 +42943,7 @@ } ] }, - "description": "Update a document.\nUpdates a document by running a script or passing a partial document.", + "description": "Update a document.\n\nUpdate a document by running a script or passing a partial document.\n\nIf the Elasticsearch security features are enabled, you must have the `index` or `write` index privilege for the target index or index alias.\n\nThe script can update, delete, or skip modifying the document.\nThe API also supports passing a partial document, which is merged into the existing document.\nTo fully replace an existing document, use the index API.\nThis operation:\n\n* Gets the document (collocated with the shard) from the index.\n* Runs the specified script.\n* Indexes the result.\n\nThe document must still be reindexed, but using this API removes some network roundtrips and reduces chances of version conflicts between the GET and the index operation.\n\nThe `_source` field must be enabled to use this API.\nIn addition to `_source`, you can access the following variables through the `ctx` map: `_index`, `_type`, `_id`, `_version`, `_routing`, and `_now` (the current timestamp).", "generics": [ { "name": "TDocument", @@ -42928,7 +42966,7 @@ }, "path": [ { - "description": "Document ID", + "description": "A unique identifier for the document to be updated.", "name": "id", "required": true, "type": { @@ -42940,7 +42978,7 @@ } }, { - "description": "The name of the index", + "description": "The name of the target index.\nBy default, the index is created automatically if it doesn't exist.", "name": "index", "required": true, "type": { @@ -42955,6 +42993,8 @@ "query": [ { "description": "Only perform the operation if the document has this primary term.", + "extDocId": "optimistic-concurrency", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/optimistic-concurrency-control.html", "name": "if_primary_term", "required": false, "type": { @@ -42967,6 +43007,8 @@ }, { "description": "Only perform the operation if the document has this sequence number.", + "extDocId": "optimistic-concurrency", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/optimistic-concurrency-control.html", "name": "if_seq_no", "required": false, "type": { @@ -42991,7 +43033,7 @@ } }, { - "description": "If 'true', Elasticsearch refreshes the affected shards to make this operation\nvisible to search, if 'wait_for' then wait for a refresh to make this operation\nvisible to search, if 'false' do nothing with refreshes.", + "description": "If 'true', Elasticsearch refreshes the affected shards to make this operation visible to search.\nIf 'wait_for', it waits for a refresh to make this operation visible to search.\nIf 'false', it does nothing with refreshes.", "name": "refresh", "required": false, "serverDefault": "false", @@ -43004,7 +43046,7 @@ } }, { - "description": "If true, the destination must be an index alias.", + "description": "If `true`, the destination must be an index alias.", "name": "require_alias", "required": false, "serverDefault": false, @@ -43017,7 +43059,7 @@ } }, { - "description": "Specify how many times should the operation be retried when a conflict occurs.", + "description": "The number of times the operation should be retried when a conflict occurs.", "name": "retry_on_conflict", "required": false, "serverDefault": 0, @@ -43030,7 +43072,7 @@ } }, { - "description": "Custom value used to route operations to a specific shard.", + "description": "A custom value used to route operations to a specific shard.", "name": "routing", "required": false, "type": { @@ -43042,7 +43084,7 @@ } }, { - "description": "Period to wait for dynamic mapping updates and active shards.\nThis guarantees Elasticsearch waits for at least the timeout before failing.\nThe actual wait time could be longer, particularly when multiple waits occur.", + "description": "The period to wait for the following operations: dynamic mapping updates and waiting for active shards.\nElasticsearch waits for at least the timeout period before failing.\nThe actual wait time could be longer, particularly when multiple waits occur.", "name": "timeout", "required": false, "serverDefault": "1m", @@ -43055,7 +43097,7 @@ } }, { - "description": "The number of shard copies that must be active before proceeding with the operations.\nSet to 'all' or any positive integer up to the total number of shards in the index\n(number_of_replicas+1). Defaults to 1 meaning the primary shard.", + "description": "The number of copies of each shard that must be active before proceeding with the operation.\nSet to 'all' or any positive integer up to the total number of shards in the index (`number_of_replicas`+1).\nThe default value of `1` means it waits for each primary shard to be active.", "name": "wait_for_active_shards", "required": false, "serverDefault": "1", @@ -43068,7 +43110,7 @@ } }, { - "description": "Set to false to disable source retrieval. You can also specify a comma-separated\nlist of the fields you want to retrieve.", + "description": "If `false`, source retrieval is turned off.\nYou can also specify a comma-separated list of the fields you want to retrieve.", "name": "_source", "required": false, "serverDefault": "true", @@ -43081,7 +43123,7 @@ } }, { - "description": "Specify the source fields you want to exclude.", + "description": "The source fields you want to exclude.", "name": "_source_excludes", "required": false, "type": { @@ -43093,7 +43135,7 @@ } }, { - "description": "Specify the source fields you want to retrieve.", + "description": "The source fields you want to retrieve.", "name": "_source_includes", "required": false, "type": { @@ -43105,7 +43147,7 @@ } } ], - "specLocation": "_global/update/UpdateRequest.ts#L38-L160" + "specLocation": "_global/update/UpdateRequest.ts#L38-L189" }, { "kind": "response", @@ -49525,6 +49567,7 @@ }, "properties": [ { + "description": "The number of bulk actions retried.", "name": "bulk", "required": true, "type": { @@ -49536,6 +49579,7 @@ } }, { + "description": "The number of search actions retried.", "name": "search", "required": true, "type": { @@ -49547,7 +49591,7 @@ } } ], - "specLocation": "_types/Retries.ts#L22-L25" + "specLocation": "_types/Retries.ts#L22-L31" }, { "kind": "interface", @@ -51471,7 +51515,7 @@ }, "properties": [ { - "description": "Specifies the language the script is written in.", + "description": "The language the script is written in.", "name": "lang", "required": true, "type": { diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index ef7f79f530..edca6d356a 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -148,7 +148,6 @@ docs-multi-termvectors,https://www.elastic.co/guide/en/elasticsearch/reference/{ docs-reindex,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/docs-reindex.html docs-termvectors,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/docs-termvectors.html docs-update-by-query,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/docs-update-by-query.html -docs-update-by-query,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/docs-update-by-query.html docs-update,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/docs-update.html document-input-parameters,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/query-dsl-mlt-query.html#_document_input_parameters dot-expand-processor,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/dot-expand-processor.html @@ -680,6 +679,7 @@ set-processor,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/s shape,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/shape.html simulate-ingest-api,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/simulate-ingest-api.html simulate-pipeline-api,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/simulate-pipeline-api.html +slice-scroll,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/paginate-search-results.html#slice-scroll slm-api-delete-policy,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/slm-api-delete-policy.html slm-api-execute-lifecycle,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/slm-api-execute-lifecycle.html slm-api-execute-retention,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/slm-api-execute-retention.html diff --git a/specification/_global/reindex/ReindexRequest.ts b/specification/_global/reindex/ReindexRequest.ts index 945fd2ddf3..feccc910c8 100644 --- a/specification/_global/reindex/ReindexRequest.ts +++ b/specification/_global/reindex/ReindexRequest.ts @@ -26,11 +26,193 @@ import { Destination, Source } from './types' /** * Reindex documents. - * Copies documents from a source to a destination. The source can be any existing index, alias, or data stream. The destination must differ from the source. For example, you cannot reindex a data stream into itself. + * + * Copy documents from a source to a destination. + * You can copy all documents to the destination index or reindex a subset of the documents. + * The source can be any existing index, alias, or data stream. + * The destination must differ from the source. + * For example, you cannot reindex a data stream into itself. + * + * IMPORTANT: Reindex requires `_source` to be enabled for all documents in the source. + * The destination should be configured as wanted before calling the reindex API. + * Reindex does not copy the settings from the source or its associated template. + * Mappings, shard counts, and replicas, for example, must be configured ahead of time. + * + * If the Elasticsearch security features are enabled, you must have the following security privileges: + * + * * The `read` index privilege for the source data stream, index, or alias. + * * The `write` index privilege for the destination data stream, index, or index alias. + * * To automatically create a data stream or index with a reindex API request, you must have the `auto_configure`, `create_index`, or `manage` index privilege for the destination data stream, index, or alias. + * * If reindexing from a remote cluster, the `source.remote.user` must have the `monitor` cluster privilege and the `read` index privilege for the source data stream, index, or alias. + * + * If reindexing from a remote cluster, you must explicitly allow the remote host in the `reindex.remote.whitelist` setting. + * Automatic data stream creation requires a matching index template with data stream enabled. + * + * The `dest` element can be configured like the index API to control optimistic concurrency control. + * Omitting `version_type` or setting it to `internal` causes Elasticsearch to blindly dump documents into the destination, overwriting any that happen to have the same ID. + * + * Setting `version_type` to `external` causes Elasticsearch to preserve the `version` from the source, create any documents that are missing, and update any documents that have an older version in the destination than they do in the source. + * + * Setting `op_type` to `create` causes the reindex API to create only missing documents in the destination. + * All existing documents will cause a version conflict. + * + * IMPORTANT: Because data streams are append-only, any reindex request to a destination data stream must have an `op_type` of `create`. + * A reindex can only add new documents to a destination data stream. + * It cannot update existing documents in a destination data stream. + * + * By default, version conflicts abort the reindex process. + * To continue reindexing if there are conflicts, set the `conflicts` request body property to `proceed`. + * In this case, the response includes a count of the version conflicts that were encountered. + * Note that the handling of other error types is unaffected by the `conflicts` property. + * Additionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than `max_docs` until it has successfully indexed `max_docs` documents into the target or it has gone through every document in the source query. + * + * NOTE: The reindex API makes no effort to handle ID collisions. + * The last document written will "win" but the order isn't usually predictable so it is not a good idea to rely on this behavior. + * Instead, make sure that IDs are unique by using a script. + * + * **Running reindex asynchronously** + * + * If the request contains `wait_for_completion=false`, Elasticsearch performs some preflight checks, launches the request, and returns a task you can use to cancel or get the status of the task. + * Elasticsearch creates a record of this task as a document at `_tasks/`. + * + * **Reindex from multiple sources** + * + * If you have many sources to reindex it is generally better to reindex them one at a time rather than using a glob pattern to pick up multiple sources. + * That way you can resume the process if there are any errors by removing the partially completed source and starting over. + * It also makes parallelizing the process fairly simple: split the list of sources to reindex and run each list in parallel. + * + * For example, you can use a bash script like this: + * + * ``` + * for index in i1 i2 i3 i4 i5; do + * curl -HContent-Type:application/json -XPOST localhost:9200/_reindex?pretty -d'{ + * "source": { + * "index": "'$index'" + * }, + * "dest": { + * "index": "'$index'-reindexed" + * } + * }' + * done + * ``` + * + * ** Throttling** + * + * Set `requests_per_second` to any positive decimal number (`1.4`, `6`, `1000`, for example) to throttle the rate at which reindex issues batches of index operations. + * Requests are throttled by padding each batch with a wait time. + * To turn off throttling, set `requests_per_second` to `-1`. + * + * The throttling is done by waiting between batches so that the scroll that reindex uses internally can be given a timeout that takes into account the padding. + * The padding time is the difference between the batch size divided by the `requests_per_second` and the time spent writing. + * By default the batch size is `1000`, so if `requests_per_second` is set to `500`: + * + * ``` + * target_time = 1000 / 500 per second = 2 seconds + * wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds + * ``` + * + * Since the batch is issued as a single bulk request, large batch sizes cause Elasticsearch to create many requests and then wait for a while before starting the next set. + * This is "bursty" instead of "smooth". + * + * **Slicing** + * + * Reindex supports sliced scroll to parallelize the reindexing process. + * This parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts. + * + * NOTE: Reindexing from remote clusters does not support manual or automatic slicing. + * + * You can slice a reindex request manually by providing a slice ID and total number of slices to each request. + * You can also let reindex automatically parallelize by using sliced scroll to slice on `_id`. + * The `slices` parameter specifies the number of slices to use. + * + * Adding `slices` to the reindex request just automates the manual process, creating sub-requests which means it has some quirks: + * + * * You can see these requests in the tasks API. These sub-requests are "child" tasks of the task for the request with slices. + * * Fetching the status of the task for the request with `slices` only contains the status of completed slices. + * * These sub-requests are individually addressable for things like cancellation and rethrottling. + * * Rethrottling the request with `slices` will rethrottle the unfinished sub-request proportionally. + * * Canceling the request with `slices` will cancel each sub-request. + * * Due to the nature of `slices`, each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution. + * * Parameters like `requests_per_second` and `max_docs` on a request with `slices` are distributed proportionally to each sub-request. Combine that with the previous point about distribution being uneven and you should conclude that using `max_docs` with `slices` might not result in exactly `max_docs` documents being reindexed. + * * Each sub-request gets a slightly different snapshot of the source, though these are all taken at approximately the same time. + * + * If slicing automatically, setting `slices` to `auto` will choose a reasonable number for most indices. + * If slicing manually or otherwise tuning automatic slicing, use the following guidelines. + * + * Query performance is most efficient when the number of slices is equal to the number of shards in the index. + * If that number is large (for example, `500`), choose a lower number as too many slices will hurt performance. + * Setting slices higher than the number of shards generally does not improve efficiency and adds overhead. + * + * Indexing performance scales linearly across available resources with the number of slices. + * + * Whether query or indexing performance dominates the runtime depends on the documents being reindexed and cluster resources. + * + * **Modify documents during reindexing** + * + * Like `_update_by_query`, reindex operations support a script that modifies the document. + * Unlike `_update_by_query`, the script is allowed to modify the document's metadata. + * + * Just as in `_update_by_query`, you can set `ctx.op` to change the operation that is run on the destination. + * For example, set `ctx.op` to `noop` if your script decides that the document doesn’t have to be indexed in the destination. This "no operation" will be reported in the `noop` counter in the response body. + * Set `ctx.op` to `delete` if your script decides that the document must be deleted from the destination. + * The deletion will be reported in the `deleted` counter in the response body. + * Setting `ctx.op` to anything else will return an error, as will setting any other field in `ctx`. + * + * Think of the possibilities! Just be careful; you are able to change: + * + * * `_id` + * * `_index` + * * `_version` + * * `_routing` + * + * Setting `_version` to `null` or clearing it from the `ctx` map is just like not sending the version in an indexing request. + * It will cause the document to be overwritten in the destination regardless of the version on the target or the version type you use in the reindex API. + * + * **Reindex from remote** + * + * Reindex supports reindexing from a remote Elasticsearch cluster. + * The `host` parameter must contain a scheme, host, port, and optional path. + * The `username` and `password` parameters are optional and when they are present the reindex operation will connect to the remote Elasticsearch node using basic authentication. + * Be sure to use HTTPS when using basic authentication or the password will be sent in plain text. + * There are a range of settings available to configure the behavior of the HTTPS connection. + * + * When using Elastic Cloud, it is also possible to authenticate against the remote cluster through the use of a valid API key. + * Remote hosts must be explicitly allowed with the `reindex.remote.whitelist` setting. + * It can be set to a comma delimited list of allowed remote host and port combinations. + * Scheme is ignored; only the host and port are used. + * For example: + * + * ``` + * reindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*"] + * ``` + * + * The list of allowed hosts must be configured on any nodes that will coordinate the reindex. + * This feature should work with remote clusters of any version of Elasticsearch. + * This should enable you to upgrade from any version of Elasticsearch to the current version by reindexing from a cluster of the old version. + * + * WARNING: Elasticsearch does not support forward compatibility across major versions. + * For example, you cannot reindex from a 7.x cluster into a 6.x cluster. + * + * To enable queries sent to older versions of Elasticsearch, the `query` parameter is sent directly to the remote host without validation or modification. + * + * NOTE: Reindexing from remote clusters does not support manual or automatic slicing. + * + * Reindexing from a remote server uses an on-heap buffer that defaults to a maximum size of 100mb. + * If the remote index includes very large documents you'll need to use a smaller batch size. + * It is also possible to set the socket read timeout on the remote connection with the `socket_timeout` field and the connection timeout with the `connect_timeout` field. + * Both default to 30 seconds. + * + * **Configuring SSL parameters** + * + * Reindex from remote supports configurable SSL settings. + * These must be specified in the `elasticsearch.yml` file, with the exception of the secure settings, which you add in the Elasticsearch keystore. + * It is not possible to configure SSL in the body of the reindex request. * @rest_spec_name reindex * @availability stack since=2.3.0 stability=stable * @availability serverless stability=stable visibility=public + * @index_privileges read, write * @doc_tag document + * @doc_id docs-reindex */ export interface Request extends RequestBase { urls: [ @@ -47,28 +229,41 @@ export interface Request extends RequestBase { refresh?: boolean /** * The throttle for this request in sub-requests per second. - * Defaults to no throttle. + * By default, there is no throttle. * @server_default -1 */ requests_per_second?: float /** - * Specifies how long a consistent view of the index should be maintained for scrolled search. + * The period of time that a consistent view of the index should be maintained for scrolled search. */ scroll?: Duration /** * The number of slices this task should be divided into. - * Defaults to 1 slice, meaning the task isn’t sliced into subtasks. + * It defaults to one slice, which means the task isn't sliced into subtasks. + * + * Reindex supports sliced scroll to parallelize the reindexing process. + * This parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts. + * + * NOTE: Reindexing from remote clusters does not support manual or automatic slicing. + * + * If set to `auto`, Elasticsearch chooses the number of slices to use. + * This setting will use one slice per shard, up to a certain limit. + * If there are multiple sources, it will choose the number of slices based on the index or backing index with the smallest number of shards. * @server_default 1 + * @ext_doc_id slice-scroll */ slices?: Slices /** - * Period each indexing waits for automatic index creation, dynamic mapping updates, and waiting for active shards. + * The period each indexing waits for automatic index creation, dynamic mapping updates, and waiting for active shards. + * By default, Elasticsearch waits for at least one minute before failing. + * The actual wait time could be longer, particularly when multiple waits occur. * @server_default 1m */ timeout?: Duration /** * The number of shard copies that must be active before proceeding with the operation. - * Set to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`). + * Set it to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`). + * The default value is one, which means it waits for each primary shard to be active. * @server_default 1 */ wait_for_active_shards?: WaitForActiveShards @@ -85,7 +280,7 @@ export interface Request extends RequestBase { } body: { /** - * Set to proceed to continue reindexing even if there are conflicts. + * Indicates whether to continue reindexing even when there are conflicts. * @server_default abort */ conflicts?: Conflicts @@ -95,6 +290,10 @@ export interface Request extends RequestBase { dest: Destination /** * The maximum number of documents to reindex. + * By default, all documents are reindexed. + * If it is a value less then or equal to `scroll_size`, a scroll will not be used to retrieve the results for the operation. + * + * If `conflicts` is set to `proceed`, the reindex operation could attempt to reindex more documents from the source than `max_docs` until it has successfully indexed `max_docs` documents into the target or it has gone through every document in the source query. */ max_docs?: long /** diff --git a/specification/_global/reindex/ReindexResponse.ts b/specification/_global/reindex/ReindexResponse.ts index 3c8e50452f..d48e68c6e2 100644 --- a/specification/_global/reindex/ReindexResponse.ts +++ b/specification/_global/reindex/ReindexResponse.ts @@ -25,21 +25,68 @@ import { DurationValue, EpochTime, UnitMillis } from '@_types/Time' export class Response { body: { + /** + * The number of scroll responses that were pulled back by the reindex. + */ batches?: long + /** + * The number of documents that were successfully created. + */ created?: long + /** + * The number of documents that were successfully deleted. + */ deleted?: long + /** + * If there were any unrecoverable errors during the process, it is an array of those failures. + * If this array is not empty, the request ended because of those failures. + * Reindex is implemented using batches and any failure causes the entire process to end but all failures in the current batch are collected into the array. + * You can use the `conflicts` option to prevent the reindex from ending on version conflicts. + */ failures?: BulkIndexByScrollFailure[] + /** + * The number of documents that were ignored because the script used for the reindex returned a `noop` value for `ctx.op`. + */ noops?: long + /** + * The number of retries attempted by reindex. + */ retries?: Retries + /** + * The number of requests per second effectively run during the reindex. + */ requests_per_second?: float slice_id?: integer task?: TaskId + /** + * The number of milliseconds the request slept to conform to `requests_per_second`. + */ throttled_millis?: EpochTime + /** + * This field should always be equal to zero in a reindex response. + * It has meaning only when using the task API, where it indicates the next time (in milliseconds since epoch) that a throttled request will be run again in order to conform to `requests_per_second`. + */ throttled_until_millis?: EpochTime + /** + * If any of the requests that ran during the reindex timed out, it is `true`. + */ timed_out?: boolean + /** + * The total milliseconds the entire operation took. + */ took?: DurationValue + /** + * The number of documents that were successfully processed. + */ total?: long + /** + * The number of documents that were successfully updated. + * That is to say, a document with the same ID already existed before the reindex updated it. + */ updated?: long + /** + * The number of version conflicts that occurred. + */ version_conflicts?: long } } diff --git a/specification/_global/reindex/examples/request/ReindexRequestExample1.yaml b/specification/_global/reindex/examples/request/ReindexRequestExample1.yaml new file mode 100644 index 0000000000..b30e00d9a6 --- /dev/null +++ b/specification/_global/reindex/examples/request/ReindexRequestExample1.yaml @@ -0,0 +1,16 @@ +summary: Reindex multiple sources +# method_request: POST _reindex +description: > + Run `POST _reindex` to reindex from multiple sources. + The `index` attribute in source can be a list, which enables you to copy from lots of sources in one request. + This example copies documents from the `my-index-000001` and `my-index-000002` indices. +# type: request +value: |- + { + "source": { + "index": ["my-index-000001", "my-index-000002"] + }, + "dest": { + "index": "my-new-index-000002" + } + } diff --git a/specification/_global/reindex/examples/request/ReindexRequestExample10.yaml b/specification/_global/reindex/examples/request/ReindexRequestExample10.yaml new file mode 100644 index 0000000000..3b5f2cc9a5 --- /dev/null +++ b/specification/_global/reindex/examples/request/ReindexRequestExample10.yaml @@ -0,0 +1,12 @@ +summary: Reindex with Painless +# method_request: POST _reindex +description: > + You can use Painless to reindex daily indices to apply a new template to the existing documents. + The script extracts the date from the index name and creates a new index with `-1` appended. + For example, all data from `metricbeat-2016.05.31` will be reindexed into `metricbeat-2016.05.31-1`. +# type: request +value: + "{\n \"source\": {\n \"index\": \"metricbeat-*\"\n },\n \"dest\": {\n\ + \ \"index\": \"metricbeat\"\n },\n \"script\": {\n \"lang\": \"painless\"\ + ,\n \"source\": \"ctx._index = 'metricbeat-' + (ctx._index.substring('metricbeat-'.length(),\ + \ ctx._index.length())) + '-1'\"\n }\n}" diff --git a/specification/_global/reindex/examples/request/ReindexRequestExample11.yaml b/specification/_global/reindex/examples/request/ReindexRequestExample11.yaml new file mode 100644 index 0000000000..352e94818d --- /dev/null +++ b/specification/_global/reindex/examples/request/ReindexRequestExample11.yaml @@ -0,0 +1,11 @@ +summary: Reindex a random subset +# method_request: POST _reindex +description: > + Run `POST _reindex` to extract a random subset of the source for testing. + You might need to adjust the `min_score` value depending on the relative amount of data extracted from source. +# type: request +value: + "{\n \"max_docs\": 10,\n \"source\": {\n \"index\": \"my-index-000001\"\ + ,\n \"query\": {\n \"function_score\" : {\n \"random_score\" : {},\n\ + \ \"min_score\" : 0.9\n }\n }\n },\n \"dest\": {\n \"index\"\ + : \"my-new-index-000001\"\n }\n}" diff --git a/specification/_global/reindex/examples/request/ReindexRequestExample12.yaml b/specification/_global/reindex/examples/request/ReindexRequestExample12.yaml new file mode 100644 index 0000000000..0bc53d0337 --- /dev/null +++ b/specification/_global/reindex/examples/request/ReindexRequestExample12.yaml @@ -0,0 +1,11 @@ +summary: Reindex modified documents +# method_request: POST _reindex +description: > + Run `POST _reindex` to modify documents during reindexing. + This example bumps the version of the source document. +# type: request +value: + "{\n \"source\": {\n \"index\": \"my-index-000001\"\n },\n \"dest\":\ + \ {\n \"index\": \"my-new-index-000001\",\n \"version_type\": \"external\"\ + \n },\n \"script\": {\n \"source\": \"if (ctx._source.foo == 'bar') {ctx._version++;\ + \ ctx._source.remove('foo')}\",\n \"lang\": \"painless\"\n }\n}" diff --git a/specification/_global/reindex/examples/request/ReindexRequestExample13.yaml b/specification/_global/reindex/examples/request/ReindexRequestExample13.yaml new file mode 100644 index 0000000000..16718e5733 --- /dev/null +++ b/specification/_global/reindex/examples/request/ReindexRequestExample13.yaml @@ -0,0 +1,11 @@ +summary: Reindex from remote on Elastic Cloud +# method_request: POST _reindex +description: > + When using Elastic Cloud, you can run `POST _reindex` and authenticate against a remote cluster with an API key. +# type: request +value: + "{\n \"source\": {\n \"remote\": {\n \"host\": \"http://otherhost:9200\"\ + ,\n \"username\": \"user\",\n \"password\": \"pass\"\n },\n \"index\"\ + : \"my-index-000001\",\n \"query\": {\n \"match\": {\n \"test\":\ + \ \"data\"\n }\n }\n },\n \"dest\": {\n \"index\": \"my-new-index-000001\"\ + \n }\n}" diff --git a/specification/_global/reindex/examples/request/ReindexRequestExample2.yaml b/specification/_global/reindex/examples/request/ReindexRequestExample2.yaml new file mode 100644 index 0000000000..dd7c59be91 --- /dev/null +++ b/specification/_global/reindex/examples/request/ReindexRequestExample2.yaml @@ -0,0 +1,19 @@ +summary: Manual slicing +# method_request: POST _reindex +description: > + Run `POST _reindex` to slice a reindex request manually. + Provide a slice ID and total number of slices to each request. +# type: request +value: |- + { + "source": { + "index": "my-index-000001", + "slice": { + "id": 0, + "max": 2 + } + }, + "dest": { + "index": "my-new-index-000001" + } + } diff --git a/specification/_global/reindex/examples/request/ReindexRequestExample3.yaml b/specification/_global/reindex/examples/request/ReindexRequestExample3.yaml new file mode 100644 index 0000000000..6739f7efcd --- /dev/null +++ b/specification/_global/reindex/examples/request/ReindexRequestExample3.yaml @@ -0,0 +1,9 @@ +summary: Automatic slicing +# method_request: POST _reindex?slices=5&refresh +description: > + Run `POST _reindex?slices=5&refresh` to automatically parallelize using sliced scroll to slice on `_id`. + The `slices` parameter specifies the number of slices to use. +# type: request +value: + "{\n \"source\": {\n \"index\": \"my-index-000001\"\n },\n \"dest\":\ + \ {\n \"index\": \"my-new-index-000001\"\n }\n}" diff --git a/specification/_global/reindex/examples/request/ReindexRequestExample4.yaml b/specification/_global/reindex/examples/request/ReindexRequestExample4.yaml new file mode 100644 index 0000000000..62ef40fca3 --- /dev/null +++ b/specification/_global/reindex/examples/request/ReindexRequestExample4.yaml @@ -0,0 +1,11 @@ +summary: Routing +# method_request: POST _reindex +description: > + By default if reindex sees a document with routing then the routing is preserved unless it's changed by the script. + You can set `routing` on the `dest` request to change this behavior. + In this example, run `POST _reindex` to copy all documents from the `source` with the company name `cat` into the `dest` with routing set to `cat`. +# type: request +value: + "{\n \"source\": {\n \"index\": \"source\",\n \"query\": {\n \"\ + match\": {\n \"company\": \"cat\"\n }\n }\n },\n \"dest\": {\n\ + \ \"index\": \"dest\",\n \"routing\": \"=cat\"\n }\n}" diff --git a/specification/_global/reindex/examples/request/ReindexRequestExample5.yaml b/specification/_global/reindex/examples/request/ReindexRequestExample5.yaml new file mode 100644 index 0000000000..9cc24461c2 --- /dev/null +++ b/specification/_global/reindex/examples/request/ReindexRequestExample5.yaml @@ -0,0 +1,7 @@ +summary: Ingest pipelines +# method_request: POST _reindex +description: Run `POST _reindex` and use the ingest pipelines feature. +# type: request +value: + "{\n \"source\": {\n \"index\": \"source\"\n },\n \"dest\": {\n \"\ + index\": \"dest\",\n \"pipeline\": \"some_ingest_pipeline\"\n }\n}" diff --git a/specification/_global/reindex/examples/request/ReindexRequestExample6.yaml b/specification/_global/reindex/examples/request/ReindexRequestExample6.yaml new file mode 100644 index 0000000000..c87ff330e1 --- /dev/null +++ b/specification/_global/reindex/examples/request/ReindexRequestExample6.yaml @@ -0,0 +1,10 @@ +summary: Reindex with a query +# method_request: POST _reindex +description: > + Run `POST _reindex` and add a query to the `source` to limit the documents to reindex. + For example, this request copies documents into `my-new-index-000001` only if they have a `user.id` of `kimchy`. +# type: request +value: + "{\n \"source\": {\n \"index\": \"my-index-000001\",\n \"query\": {\n\ + \ \"term\": {\n \"user.id\": \"kimchy\"\n }\n }\n },\n \"\ + dest\": {\n \"index\": \"my-new-index-000001\"\n }\n}" diff --git a/specification/_global/reindex/examples/request/ReindexRequestExample7.yaml b/specification/_global/reindex/examples/request/ReindexRequestExample7.yaml new file mode 100644 index 0000000000..ed868d28d5 --- /dev/null +++ b/specification/_global/reindex/examples/request/ReindexRequestExample7.yaml @@ -0,0 +1,9 @@ +summary: Reindex with max_docs +# method_request: POST _reindex +description: > + You can limit the number of processed documents by setting `max_docs`. + For example, run `POST _reindex` to copy a single document from `my-index-000001` to `my-new-index-000001`. +# type: request +value: + "{\n \"max_docs\": 1,\n \"source\": {\n \"index\": \"my-index-000001\"\ + \n },\n \"dest\": {\n \"index\": \"my-new-index-000001\"\n }\n}" diff --git a/specification/_global/reindex/examples/request/ReindexRequestExample8.yaml b/specification/_global/reindex/examples/request/ReindexRequestExample8.yaml new file mode 100644 index 0000000000..b15f45ed5c --- /dev/null +++ b/specification/_global/reindex/examples/request/ReindexRequestExample8.yaml @@ -0,0 +1,10 @@ +summary: Reindex selected fields +# method_request: POST _reindex +description: > + You can use source filtering to reindex a subset of the fields in the original documents. + For example, run `POST _reindex` the reindex only the `user.id` and `_doc` fields of each document. +# type: request +value: + "{\n \"source\": {\n \"index\": \"my-index-000001\",\n \"_source\":\ + \ [\"user.id\", \"_doc\"]\n },\n \"dest\": {\n \"index\": \"my-new-index-000001\"\ + \n }\n}" diff --git a/specification/_global/reindex/examples/request/ReindexRequestExample9.yaml b/specification/_global/reindex/examples/request/ReindexRequestExample9.yaml new file mode 100644 index 0000000000..4db7e1ad72 --- /dev/null +++ b/specification/_global/reindex/examples/request/ReindexRequestExample9.yaml @@ -0,0 +1,10 @@ +summary: Reindex new field names +# method_request: POST _reindex +description: > + A reindex operation can build a copy of an index with renamed fields. + If your index has documents with `text` and `flag` fields, you can change the latter field name to `tag` during the reindex. +# type: request +value: + "{\n \"source\": {\n \"index\": \"my-index-000001\"\n },\n \"dest\":\ + \ {\n \"index\": \"my-new-index-000001\"\n },\n \"script\": {\n \"source\"\ + : \"ctx._source.tag = ctx._source.remove(\\\"flag\\\")\"\n }\n}" diff --git a/specification/_global/reindex/types.ts b/specification/_global/reindex/types.ts index 678c40e338..395da4278c 100644 --- a/specification/_global/reindex/types.ts +++ b/specification/_global/reindex/types.ts @@ -42,8 +42,9 @@ export class Destination { */ index: IndexName /** - * Set to `create` to only index documents that do not already exist. - * Important: To reindex to a data stream destination, this argument must be `create`. + * If it is `create`, the operation will only index documents that do not already exist (also known as "put if absent"). + * + * IMPORTANT: To reindex to a data stream destination, this argument must be `create`. * @server_default index */ op_type?: OpType @@ -52,8 +53,10 @@ export class Destination { */ pipeline?: string /** - * By default, a document's routing is preserved unless it’s changed by the script. - * Set to `discard` to set routing to `null`, or `=value` to route using the specified `value`. + * By default, a document's routing is preserved unless it's changed by the script. + * If it is `keep`, the routing on the bulk request sent for each match is set to the routing on the match. + * If it is `discard`, the routing on the bulk request sent for each match is set to `null`. + * If it is `=value`, the routing on the bulk request sent for each match is set to all value specified after the equals sign (`=`). * @server_default keep */ routing?: Routing @@ -66,11 +69,11 @@ export class Destination { export class Source { /** * The name of the data stream, index, or alias you are copying from. - * Accepts a comma-separated list to reindex from multiple sources. + * It accepts a comma-separated list to reindex from multiple sources. */ index: Indices /** - * Specifies the documents to reindex using the Query DSL. + * The documents to reindex, which is defined with Query DSL. */ query?: QueryContainer /** @@ -79,17 +82,27 @@ export class Source { remote?: RemoteSource /** * The number of documents to index per batch. - * Use when indexing from remote to ensure that the batches fit within the on-heap buffer, which defaults to a maximum size of 100 MB. + * Use it when you are indexing from remote to ensure that the batches fit within the on-heap buffer, which defaults to a maximum size of 100 MB. + * @server_default 1000 */ size?: integer /** * Slice the reindex request manually using the provided slice ID and total number of slices. */ slice?: SlicedScroll + /** + * A comma-separated list of `:` pairs to sort by before indexing. + * Use it in conjunction with `max_docs` to control what documents are reindexed. + * + * WARNING: Sort in reindex is deprecated. + * Sorting in reindex was never guaranteed to index documents in order and prevents further development of reindex such as resilience and performance improvements. + * If used in combination with `max_docs`, consider using a query filter instead. + * @deprecated 7.6.0 + */ sort?: Sort /** - * If `true` reindexes all source fields. - * Set to a list to reindex select fields. + * If `true`, reindex all source fields. + * Set it to a list to reindex select fields. * @server_default true * @codegen_name source_fields */ _source?: Fields @@ -99,7 +112,7 @@ export class Source { export class RemoteSource { /** * The remote connection timeout. - * Defaults to 30 seconds. + * @server_default 30s */ connect_timeout?: Duration /** @@ -108,6 +121,7 @@ export class RemoteSource { headers?: Dictionary /** * The URL for the remote instance of Elasticsearch that you want to index from. + * This information is required when you're indexing from remote. */ host: Host /** @@ -119,7 +133,8 @@ export class RemoteSource { */ password?: Password /** - * The remote socket read timeout. Defaults to 30 seconds. + * The remote socket read timeout. + * @server_default 30s */ socket_timeout?: Duration } diff --git a/specification/_global/reindex_rethrottle/ReindexRethrottleRequest.ts b/specification/_global/reindex_rethrottle/ReindexRethrottleRequest.ts index 4f99aa145e..027aaa5e17 100644 --- a/specification/_global/reindex_rethrottle/ReindexRethrottleRequest.ts +++ b/specification/_global/reindex_rethrottle/ReindexRethrottleRequest.ts @@ -25,10 +25,20 @@ import { float } from '@_types/Numeric' * Throttle a reindex operation. * * Change the number of requests per second for a particular reindex operation. + * For example: + * + * ``` + * POST _reindex/r1A2WoRbTwKZ516z6NEs5A:36619/_rethrottle?requests_per_second=-1 + * ``` + * + * Rethrottling that speeds up the query takes effect immediately. + * Rethrottling that slows down the query will take effect after completing the current batch. + * This behavior prevents scroll timeouts. * @rest_spec_name reindex_rethrottle * @availability stack since=2.4.0 stability=stable * @availability serverless stability=stable visibility=private * @doc_tag document + * @doc_id docs-reindex */ export interface Request extends RequestBase { urls: [ @@ -39,13 +49,14 @@ export interface Request extends RequestBase { ] path_parts: { /** - * Identifier for the task. + * The task identifier, which can be found by using the tasks API. */ task_id: Id } query_parameters: { /** * The throttle for this request in sub-requests per second. + * It can be either `-1` to turn off throttling or any decimal number like `1.7` or `12` to throttle to that level. */ requests_per_second?: float } diff --git a/specification/_global/update/UpdateRequest.ts b/specification/_global/update/UpdateRequest.ts index 09fda97395..d61706c18f 100644 --- a/specification/_global/update/UpdateRequest.ts +++ b/specification/_global/update/UpdateRequest.ts @@ -37,11 +37,30 @@ import { Duration } from '@_types/Time' /** * Update a document. - * Updates a document by running a script or passing a partial document. + * + * Update a document by running a script or passing a partial document. + * + * If the Elasticsearch security features are enabled, you must have the `index` or `write` index privilege for the target index or index alias. + * + * The script can update, delete, or skip modifying the document. + * The API also supports passing a partial document, which is merged into the existing document. + * To fully replace an existing document, use the index API. + * This operation: + * + * * Gets the document (collocated with the shard) from the index. + * * Runs the specified script. + * * Indexes the result. + * + * The document must still be reindexed, but using this API removes some network roundtrips and reduces chances of version conflicts between the GET and the index operation. + * + * The `_source` field must be enabled to use this API. + * In addition to `_source`, you can access the following variables through the `ctx` map: `_index`, `_type`, `_id`, `_version`, `_routing`, and `_now` (the current timestamp). * @rest_spec_name update * @availability stack stability=stable * @availability serverless stability=stable visibility=public + * @index_privileges write * @doc_tag document + * @doc_id docs-update */ export interface Request extends RequestBase { urls: [ @@ -51,16 +70,25 @@ export interface Request extends RequestBase { } ] path_parts: { + /** + * A unique identifier for the document to be updated. + */ id: Id + /** + * The name of the target index. + * By default, the index is created automatically if it doesn't exist. + */ index: IndexName } query_parameters: { /** * Only perform the operation if the document has this primary term. + * @ext_doc_id optimistic-concurrency */ if_primary_term?: long /** * Only perform the operation if the document has this sequence number. + * @ext_doc_id optimistic-concurrency */ if_seq_no?: SequenceNumber /** @@ -69,90 +97,91 @@ export interface Request extends RequestBase { */ lang?: string /** - * If 'true', Elasticsearch refreshes the affected shards to make this operation - * visible to search, if 'wait_for' then wait for a refresh to make this operation - * visible to search, if 'false' do nothing with refreshes. + * If 'true', Elasticsearch refreshes the affected shards to make this operation visible to search. + * If 'wait_for', it waits for a refresh to make this operation visible to search. + * If 'false', it does nothing with refreshes. * @server_default false */ refresh?: Refresh /** - * If true, the destination must be an index alias. + * If `true`, the destination must be an index alias. * @server_default false */ require_alias?: boolean /** - * Specify how many times should the operation be retried when a conflict occurs. + * The number of times the operation should be retried when a conflict occurs. * @server_default 0 */ retry_on_conflict?: integer /** - * Custom value used to route operations to a specific shard. + * A custom value used to route operations to a specific shard. */ routing?: Routing /** - * Period to wait for dynamic mapping updates and active shards. - * This guarantees Elasticsearch waits for at least the timeout before failing. + * The period to wait for the following operations: dynamic mapping updates and waiting for active shards. + * Elasticsearch waits for at least the timeout period before failing. * The actual wait time could be longer, particularly when multiple waits occur. * @server_default 1m */ timeout?: Duration /** - * The number of shard copies that must be active before proceeding with the operations. - * Set to 'all' or any positive integer up to the total number of shards in the index - * (number_of_replicas+1). Defaults to 1 meaning the primary shard. + * The number of copies of each shard that must be active before proceeding with the operation. + * Set to 'all' or any positive integer up to the total number of shards in the index (`number_of_replicas`+1). + * The default value of `1` means it waits for each primary shard to be active. * @server_default 1 */ wait_for_active_shards?: WaitForActiveShards /** - * Set to false to disable source retrieval. You can also specify a comma-separated - * list of the fields you want to retrieve. + * If `false`, source retrieval is turned off. + * You can also specify a comma-separated list of the fields you want to retrieve. * @server_default true */ _source?: SourceConfigParam /** - * Specify the source fields you want to exclude. + * The source fields you want to exclude. */ _source_excludes?: Fields /** - * Specify the source fields you want to retrieve. + * The source fields you want to retrieve. */ _source_includes?: Fields } body: { /** - * Set to false to disable setting 'result' in the response - * to 'noop' if no change to the document occurred. + * If `true`, the `result` in the response is set to `noop` (no operation) when there are no changes to the document. * @server_default true */ detect_noop?: boolean /** * A partial update to an existing document. + * If both `doc` and `script` are specified, `doc` is ignored. * @prop_serializer SourceFormatter`1 */ doc?: TPartialDocument /** - * Set to true to use the contents of 'doc' as the value of 'upsert' + * If `true`, use the contents of 'doc' as the value of 'upsert'. + * NOTE: Using ingest pipelines with `doc_as_upsert` is not supported. * @server_default false */ doc_as_upsert?: boolean /** - * Script to execute to update the document. + * The script to run to update the document. */ script?: Script /** - * Set to true to execute the script whether or not the document exists. + * If `true`, run the script whether or not the document exists. * @server_default false */ scripted_upsert?: boolean /** - * Set to false to disable source retrieval. You can also specify a comma-separated - * list of the fields you want to retrieve. + * If `false`, turn off source retrieval. + * You can also specify a comma-separated list of the fields you want to retrieve. * @server_default true */ _source?: SourceConfig /** - * If the document does not already exist, the contents of 'upsert' are inserted as a - * new document. If the document exists, the 'script' is executed. + * If the document does not already exist, the contents of 'upsert' are inserted as a new document. + * If the document exists, the 'script' is run. * @prop_serializer SourceFormatter`1 */ upsert?: TDocument diff --git a/specification/_global/update/examples/request/UpdateRequestExample1.yaml b/specification/_global/update/examples/request/UpdateRequestExample1.yaml new file mode 100644 index 0000000000..f0268d534b --- /dev/null +++ b/specification/_global/update/examples/request/UpdateRequestExample1.yaml @@ -0,0 +1,8 @@ +summary: Update a counter with a script +# method_request: POST test/_update/1 +description: Run `POST test/_update/1` to increment a counter by using a script. +# type: request +value: + "{\n \"script\" : {\n \"source\": \"ctx._source.counter += params.count\"\ + ,\n \"lang\": \"painless\",\n \"params\" : {\n \"count\" : 4\n }\n\ + \ }\n}" diff --git a/specification/_global/update/examples/request/UpdateRequestExample10.yaml b/specification/_global/update/examples/request/UpdateRequestExample10.yaml new file mode 100644 index 0000000000..3a90656d9e --- /dev/null +++ b/specification/_global/update/examples/request/UpdateRequestExample10.yaml @@ -0,0 +1,11 @@ +summary: Scripted upsert +# method_request: POST test/_update/1 +description: > + Run `POST test/_update/1` to perform a scripted upsert. + When `scripted_upsert` is `true`, the script runs whether or not the document exists. +# type: request +value: + "{\n \"scripted_upsert\": true,\n \"script\": {\n \"source\": \"\"\"\n\ + \ if ( ctx.op == 'create' ) {\n ctx._source.counter = params.count\n\ + \ } else {\n ctx._source.counter += params.count\n }\n \"\"\"\ + ,\n \"params\": {\n \"count\": 4\n }\n },\n \"upsert\": {}\n}" diff --git a/specification/_global/update/examples/request/UpdateRequestExample11.yaml b/specification/_global/update/examples/request/UpdateRequestExample11.yaml new file mode 100644 index 0000000000..130e9f8b76 --- /dev/null +++ b/specification/_global/update/examples/request/UpdateRequestExample11.yaml @@ -0,0 +1,9 @@ +summary: Doc as upsert +# method_request: POST test/_update/1 +description: > + Run `POST test/_update/1` to perform a doc as upsert. + Instead of sending a partial `doc` plus an `upsert` doc, you can set `doc_as_upsert` to `true` to use the contents of `doc` as the `upsert` value. +# type: request +value: + "{\n \"doc\": {\n \"name\": \"new_name\"\n },\n \"doc_as_upsert\": true\n\ + }" diff --git a/specification/_global/update/examples/request/UpdateRequestExample2.yaml b/specification/_global/update/examples/request/UpdateRequestExample2.yaml new file mode 100644 index 0000000000..18d537b6f9 --- /dev/null +++ b/specification/_global/update/examples/request/UpdateRequestExample2.yaml @@ -0,0 +1,10 @@ +summary: Add a tag with a script +# method_request: POST test/_update/1 +description: > + Run `POST test/_update/1` to use a script to add a tag to a list of tags. + In this example, it is just a list, so the tag is added even it exists. +# type: request +value: + "{\n \"script\": {\n \"source\": \"ctx._source.tags.add(params.tag)\",\n\ + \ \"lang\": \"painless\",\n \"params\": {\n \"tag\": \"blue\"\n }\n\ + \ }\n}" diff --git a/specification/_global/update/examples/request/UpdateRequestExample3.yaml b/specification/_global/update/examples/request/UpdateRequestExample3.yaml new file mode 100644 index 0000000000..e7d10056fb --- /dev/null +++ b/specification/_global/update/examples/request/UpdateRequestExample3.yaml @@ -0,0 +1,12 @@ +summary: Remove a tag with a script +# method_request: POST test/_update/1 +description: > + Run `POST test/_update/1` to use a script to remove a tag from a list of tags. + The Painless function to remove a tag takes the array index of the element you want to remove. + To avoid a possible runtime error, you first need to make sure the tag exists. + If the list contains duplicates of the tag, this script just removes one occurrence. +# type: request +value: + "{\n \"script\": {\n \"source\": \"if (ctx._source.tags.contains(params.tag))\ + \ { ctx._source.tags.remove(ctx._source.tags.indexOf(params.tag)) }\",\n \"lang\"\ + : \"painless\",\n \"params\": {\n \"tag\": \"blue\"\n }\n }\n}" diff --git a/specification/_global/update/examples/request/UpdateRequestExample4.yaml b/specification/_global/update/examples/request/UpdateRequestExample4.yaml new file mode 100644 index 0000000000..006d9f536c --- /dev/null +++ b/specification/_global/update/examples/request/UpdateRequestExample4.yaml @@ -0,0 +1,6 @@ +summary: Add fields with a script +# method_request: POST test/_update/1 +description: > + Run `POST test/_update/1` to use a script to add a field `new_field` to the document. +# type: request +value: "{\n \"script\" : \"ctx._source.new_field = 'value_of_new_field'\"\n}" diff --git a/specification/_global/update/examples/request/UpdateRequestExample5.yaml b/specification/_global/update/examples/request/UpdateRequestExample5.yaml new file mode 100644 index 0000000000..5f0a334f7e --- /dev/null +++ b/specification/_global/update/examples/request/UpdateRequestExample5.yaml @@ -0,0 +1,6 @@ +summary: Remove fields with a script +# method_request: POST test/_update/1 +description: > + Run `POST test/_update/1` to use a script to remove a field `new_field` from the document. +# type: request +value: "{\n \"script\" : \"ctx._source.remove('new_field')\"\n}" diff --git a/specification/_global/update/examples/request/UpdateRequestExample6.yaml b/specification/_global/update/examples/request/UpdateRequestExample6.yaml new file mode 100644 index 0000000000..074785f143 --- /dev/null +++ b/specification/_global/update/examples/request/UpdateRequestExample6.yaml @@ -0,0 +1,6 @@ +summary: Remove subfields with a script +# method_request: POST test/_update/1 +description: > + Run `POST test/_update/1` to use a script to remove a subfield from an object field. +# type: request +value: "{\n \"script\": \"ctx._source['my-object'].remove('my-subfield')\"\n}" diff --git a/specification/_global/update/examples/request/UpdateRequestExample7.yaml b/specification/_global/update/examples/request/UpdateRequestExample7.yaml new file mode 100644 index 0000000000..b0c7438984 --- /dev/null +++ b/specification/_global/update/examples/request/UpdateRequestExample7.yaml @@ -0,0 +1,10 @@ +summary: Change the operation with a script +# method_request: POST test/_update/1 +description: > + Run `POST test/_update/1` to change the operation that runs from within the script. + For example, this request deletes the document if the `tags` field contains `green`, otherwise it does nothing (`noop`). +# type: request +value: + "{\n \"script\": {\n \"source\": \"if (ctx._source.tags.contains(params.tag))\ + \ { ctx.op = 'delete' } else { ctx.op = 'noop' }\",\n \"lang\": \"painless\"\ + ,\n \"params\": {\n \"tag\": \"green\"\n }\n }\n}" diff --git a/specification/_global/update/examples/request/UpdateRequestExample8.yaml b/specification/_global/update/examples/request/UpdateRequestExample8.yaml new file mode 100644 index 0000000000..d46fe73164 --- /dev/null +++ b/specification/_global/update/examples/request/UpdateRequestExample8.yaml @@ -0,0 +1,6 @@ +summary: Update part of a document +# method_request: POST test/_update/1 +description: > + Run `POST test/_update/1` to do a partial update that adds a new field to the existing document. +# type: request +value: "{\n \"doc\": {\n \"name\": \"new_name\"\n }\n}" diff --git a/specification/_global/update/examples/request/UpdateRequestExample9.yaml b/specification/_global/update/examples/request/UpdateRequestExample9.yaml new file mode 100644 index 0000000000..bf2e4662d1 --- /dev/null +++ b/specification/_global/update/examples/request/UpdateRequestExample9.yaml @@ -0,0 +1,10 @@ +summary: Upsert +# method_request: POST test/_update/1 +description: > + Run `POST test/_update/1` to perfom an upsert. + If the document does not already exist, the contents of the upsert element are inserted as a new document. If the document exists, the script is run. +# type: request +value: + "{\n \"script\": {\n \"source\": \"ctx._source.counter += params.count\"\ + ,\n \"lang\": \"painless\",\n \"params\": {\n \"count\": 4\n }\n \ + \ },\n \"upsert\": {\n \"counter\": 1\n }\n}" diff --git a/specification/_global/update/examples/response/UpdateResponseExample1.yaml b/specification/_global/update/examples/response/UpdateResponseExample1.yaml new file mode 100644 index 0000000000..b8ed42a503 --- /dev/null +++ b/specification/_global/update/examples/response/UpdateResponseExample1.yaml @@ -0,0 +1,10 @@ +summary: Detect noop updates +description: > + By default updates that don't change anything detect that they don't change anything and return `"result": "noop"`. +# type: response +# response_code: '' +value: + "{\n \"_shards\": {\n \"total\": 0,\n \"successful\": 0,\n\ + \ \"failed\": 0\n },\n \"_index\": \"test\",\n \"_id\": \"1\",\n \ + \ \"_version\": 2,\n \"_primary_term\": 1,\n \"_seq_no\": 1,\n \"result\"\ + : \"noop\"\n}" diff --git a/specification/_types/Retries.ts b/specification/_types/Retries.ts index 0a744f85ae..03c767e67b 100644 --- a/specification/_types/Retries.ts +++ b/specification/_types/Retries.ts @@ -20,6 +20,12 @@ import { long } from './Numeric' export class Retries { + /** + * The number of bulk actions retried. + */ bulk: long + /** + * The number of search actions retried. + */ search: long } diff --git a/specification/_types/Scripting.ts b/specification/_types/Scripting.ts index 8532f867f4..685d4ee789 100644 --- a/specification/_types/Scripting.ts +++ b/specification/_types/Scripting.ts @@ -46,7 +46,7 @@ export enum ScriptLanguage { export class StoredScript { /** - * Specifies the language the script is written in. + * The language the script is written in. */ lang: ScriptLanguage options?: Dictionary