From 6e300663d8d5b7a4cb18322533924ac18c64a7cb Mon Sep 17 00:00:00 2001
From: David Olaru <dolaru@elastic.co>
Date: Mon, 30 Oct 2023 17:32:30 +0000
Subject: [PATCH 1/2] Add missing param `scores` on
 `ml.put_trained_model_vocabulary`

This adds the missing `scores` parameter that was introduced in v8.9.0.

[Docs](https://www.elastic.co/guide/en/elasticsearch/reference/8.9/put-trained-model-vocabulary.html#ml-put-trained-model-vocabulary-request-body):
> (Optional, array) Vocabulary value scores used by sentence-piece tokenization. Must have the same length as vocabulary. Required for unigram sentence-piece tokenized models like XLMRoberta and T5.
---
 elasticsearch/_async/client/ml.py | 6 ++++++
 elasticsearch/_sync/client/ml.py  | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/elasticsearch/_async/client/ml.py b/elasticsearch/_async/client/ml.py
index 9c1339aa1..285059d74 100644
--- a/elasticsearch/_async/client/ml.py
+++ b/elasticsearch/_async/client/ml.py
@@ -3302,6 +3302,7 @@ async def put_trained_model_vocabulary(
         filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
         human: t.Optional[bool] = None,
         merges: t.Optional[t.Sequence[str]] = None,
+        scores: t.Optional[t.Sequence[str]] = None,
         pretty: t.Optional[bool] = None,
     ) -> ObjectApiResponse[t.Any]:
         """
@@ -3312,11 +3313,14 @@ async def put_trained_model_vocabulary(
         :param model_id: The unique identifier of the trained model.
         :param vocabulary: The model vocabulary, which must not be empty.
         :param merges: The optional model merges if required by the tokenizer.
+        :param scores: The optional model scores if required by the tokenizer.
         """
         if model_id in SKIP_IN_PATH:
             raise ValueError("Empty value passed for parameter 'model_id'")
         if vocabulary is None:
             raise ValueError("Empty value passed for parameter 'vocabulary'")
+        if scores is not None and len(scores) != len(vocabulary):
+            raise ValueError("The value length for the 'scores' and 'vocabulary' parameters is not the same")
         __path = f"/_ml/trained_models/{_quote(model_id)}/vocabulary"
         __body: t.Dict[str, t.Any] = {}
         __query: t.Dict[str, t.Any] = {}
@@ -3330,6 +3334,8 @@ async def put_trained_model_vocabulary(
             __query["human"] = human
         if merges is not None:
             __body["merges"] = merges
+        if scores is not None:
+            __body["scores"] = scores
         if pretty is not None:
             __query["pretty"] = pretty
         __headers = {"accept": "application/json", "content-type": "application/json"}
diff --git a/elasticsearch/_sync/client/ml.py b/elasticsearch/_sync/client/ml.py
index ce85e587c..7752aea8b 100644
--- a/elasticsearch/_sync/client/ml.py
+++ b/elasticsearch/_sync/client/ml.py
@@ -3302,6 +3302,7 @@ def put_trained_model_vocabulary(
         filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
         human: t.Optional[bool] = None,
         merges: t.Optional[t.Sequence[str]] = None,
+        scores: t.Optional[t.Sequence[str]] = None,
         pretty: t.Optional[bool] = None,
     ) -> ObjectApiResponse[t.Any]:
         """
@@ -3312,11 +3313,14 @@ def put_trained_model_vocabulary(
         :param model_id: The unique identifier of the trained model.
         :param vocabulary: The model vocabulary, which must not be empty.
         :param merges: The optional model merges if required by the tokenizer.
+        :param scores: The optional vocabulary value scores if required by the tokenizer.
         """
         if model_id in SKIP_IN_PATH:
             raise ValueError("Empty value passed for parameter 'model_id'")
         if vocabulary is None:
             raise ValueError("Empty value passed for parameter 'vocabulary'")
+        if scores is not None and len(scores) != len(vocabulary):
+            raise ValueError("The value length for the 'scores' and 'vocabulary' parameters is not the same")
         __path = f"/_ml/trained_models/{_quote(model_id)}/vocabulary"
         __body: t.Dict[str, t.Any] = {}
         __query: t.Dict[str, t.Any] = {}
@@ -3330,6 +3334,8 @@ def put_trained_model_vocabulary(
             __query["human"] = human
         if merges is not None:
             __body["merges"] = merges
+        if scores is not None:
+            __body["scores"] = scores
         if pretty is not None:
             __query["pretty"] = pretty
         __headers = {"accept": "application/json", "content-type": "application/json"}

From 5f8a0002fda4ca7db2c0c1eb7c3304231c161601 Mon Sep 17 00:00:00 2001
From: David Olaru <dolaru@elastic.co>
Date: Mon, 30 Oct 2023 17:41:28 +0000
Subject: [PATCH 2/2] Fix formatting & async docstring

---
 elasticsearch/_async/client/ml.py | 6 ++++--
 elasticsearch/_sync/client/ml.py  | 4 +++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/elasticsearch/_async/client/ml.py b/elasticsearch/_async/client/ml.py
index 285059d74..bba4a2f6f 100644
--- a/elasticsearch/_async/client/ml.py
+++ b/elasticsearch/_async/client/ml.py
@@ -3313,14 +3313,16 @@ async def put_trained_model_vocabulary(
         :param model_id: The unique identifier of the trained model.
         :param vocabulary: The model vocabulary, which must not be empty.
         :param merges: The optional model merges if required by the tokenizer.
-        :param scores: The optional model scores if required by the tokenizer.
+        :param scores: The optional vocabulary value scores if required by the tokenizer.
         """
         if model_id in SKIP_IN_PATH:
             raise ValueError("Empty value passed for parameter 'model_id'")
         if vocabulary is None:
             raise ValueError("Empty value passed for parameter 'vocabulary'")
         if scores is not None and len(scores) != len(vocabulary):
-            raise ValueError("The value length for the 'scores' and 'vocabulary' parameters is not the same")
+            raise ValueError(
+                "The value length for the 'scores' and 'vocabulary' parameters is not the same"
+            )
         __path = f"/_ml/trained_models/{_quote(model_id)}/vocabulary"
         __body: t.Dict[str, t.Any] = {}
         __query: t.Dict[str, t.Any] = {}
diff --git a/elasticsearch/_sync/client/ml.py b/elasticsearch/_sync/client/ml.py
index 7752aea8b..c57405984 100644
--- a/elasticsearch/_sync/client/ml.py
+++ b/elasticsearch/_sync/client/ml.py
@@ -3320,7 +3320,9 @@ def put_trained_model_vocabulary(
         if vocabulary is None:
             raise ValueError("Empty value passed for parameter 'vocabulary'")
         if scores is not None and len(scores) != len(vocabulary):
-            raise ValueError("The value length for the 'scores' and 'vocabulary' parameters is not the same")
+            raise ValueError(
+                "The value length for the 'scores' and 'vocabulary' parameters is not the same"
+            )
         __path = f"/_ml/trained_models/{_quote(model_id)}/vocabulary"
         __body: t.Dict[str, t.Any] = {}
         __query: t.Dict[str, t.Any] = {}