From 6e300663d8d5b7a4cb18322533924ac18c64a7cb Mon Sep 17 00:00:00 2001 From: David Olaru Date: Mon, 30 Oct 2023 17:32:30 +0000 Subject: [PATCH 1/2] Add missing param `scores` on `ml.put_trained_model_vocabulary` This adds the missing `scores` parameter that was introduced in v8.9.0. [Docs](https://www.elastic.co/guide/en/elasticsearch/reference/8.9/put-trained-model-vocabulary.html#ml-put-trained-model-vocabulary-request-body): > (Optional, array) Vocabulary value scores used by sentence-piece tokenization. Must have the same length as vocabulary. Required for unigram sentence-piece tokenized models like XLMRoberta and T5. --- elasticsearch/_async/client/ml.py | 6 ++++++ elasticsearch/_sync/client/ml.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/elasticsearch/_async/client/ml.py b/elasticsearch/_async/client/ml.py index 9c1339aa1..285059d74 100644 --- a/elasticsearch/_async/client/ml.py +++ b/elasticsearch/_async/client/ml.py @@ -3302,6 +3302,7 @@ async def put_trained_model_vocabulary( filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, merges: t.Optional[t.Sequence[str]] = None, + scores: t.Optional[t.Sequence[str]] = None, pretty: t.Optional[bool] = None, ) -> ObjectApiResponse[t.Any]: """ @@ -3312,11 +3313,14 @@ async def put_trained_model_vocabulary( :param model_id: The unique identifier of the trained model. :param vocabulary: The model vocabulary, which must not be empty. :param merges: The optional model merges if required by the tokenizer. + :param scores: The optional model scores if required by the tokenizer. """ if model_id in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'model_id'") if vocabulary is None: raise ValueError("Empty value passed for parameter 'vocabulary'") + if scores is not None and len(scores) != len(vocabulary): + raise ValueError("The value length for the 'scores' and 'vocabulary' parameters is not the same") __path = f"/_ml/trained_models/{_quote(model_id)}/vocabulary" __body: t.Dict[str, t.Any] = {} __query: t.Dict[str, t.Any] = {} @@ -3330,6 +3334,8 @@ async def put_trained_model_vocabulary( __query["human"] = human if merges is not None: __body["merges"] = merges + if scores is not None: + __body["scores"] = scores if pretty is not None: __query["pretty"] = pretty __headers = {"accept": "application/json", "content-type": "application/json"} diff --git a/elasticsearch/_sync/client/ml.py b/elasticsearch/_sync/client/ml.py index ce85e587c..7752aea8b 100644 --- a/elasticsearch/_sync/client/ml.py +++ b/elasticsearch/_sync/client/ml.py @@ -3302,6 +3302,7 @@ def put_trained_model_vocabulary( filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, merges: t.Optional[t.Sequence[str]] = None, + scores: t.Optional[t.Sequence[str]] = None, pretty: t.Optional[bool] = None, ) -> ObjectApiResponse[t.Any]: """ @@ -3312,11 +3313,14 @@ def put_trained_model_vocabulary( :param model_id: The unique identifier of the trained model. :param vocabulary: The model vocabulary, which must not be empty. :param merges: The optional model merges if required by the tokenizer. + :param scores: The optional vocabulary value scores if required by the tokenizer. """ if model_id in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'model_id'") if vocabulary is None: raise ValueError("Empty value passed for parameter 'vocabulary'") + if scores is not None and len(scores) != len(vocabulary): + raise ValueError("The value length for the 'scores' and 'vocabulary' parameters is not the same") __path = f"/_ml/trained_models/{_quote(model_id)}/vocabulary" __body: t.Dict[str, t.Any] = {} __query: t.Dict[str, t.Any] = {} @@ -3330,6 +3334,8 @@ def put_trained_model_vocabulary( __query["human"] = human if merges is not None: __body["merges"] = merges + if scores is not None: + __body["scores"] = scores if pretty is not None: __query["pretty"] = pretty __headers = {"accept": "application/json", "content-type": "application/json"} From 5f8a0002fda4ca7db2c0c1eb7c3304231c161601 Mon Sep 17 00:00:00 2001 From: David Olaru Date: Mon, 30 Oct 2023 17:41:28 +0000 Subject: [PATCH 2/2] Fix formatting & async docstring --- elasticsearch/_async/client/ml.py | 6 ++++-- elasticsearch/_sync/client/ml.py | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/elasticsearch/_async/client/ml.py b/elasticsearch/_async/client/ml.py index 285059d74..bba4a2f6f 100644 --- a/elasticsearch/_async/client/ml.py +++ b/elasticsearch/_async/client/ml.py @@ -3313,14 +3313,16 @@ async def put_trained_model_vocabulary( :param model_id: The unique identifier of the trained model. :param vocabulary: The model vocabulary, which must not be empty. :param merges: The optional model merges if required by the tokenizer. - :param scores: The optional model scores if required by the tokenizer. + :param scores: The optional vocabulary value scores if required by the tokenizer. """ if model_id in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'model_id'") if vocabulary is None: raise ValueError("Empty value passed for parameter 'vocabulary'") if scores is not None and len(scores) != len(vocabulary): - raise ValueError("The value length for the 'scores' and 'vocabulary' parameters is not the same") + raise ValueError( + "The value length for the 'scores' and 'vocabulary' parameters is not the same" + ) __path = f"/_ml/trained_models/{_quote(model_id)}/vocabulary" __body: t.Dict[str, t.Any] = {} __query: t.Dict[str, t.Any] = {} diff --git a/elasticsearch/_sync/client/ml.py b/elasticsearch/_sync/client/ml.py index 7752aea8b..c57405984 100644 --- a/elasticsearch/_sync/client/ml.py +++ b/elasticsearch/_sync/client/ml.py @@ -3320,7 +3320,9 @@ def put_trained_model_vocabulary( if vocabulary is None: raise ValueError("Empty value passed for parameter 'vocabulary'") if scores is not None and len(scores) != len(vocabulary): - raise ValueError("The value length for the 'scores' and 'vocabulary' parameters is not the same") + raise ValueError( + "The value length for the 'scores' and 'vocabulary' parameters is not the same" + ) __path = f"/_ml/trained_models/{_quote(model_id)}/vocabulary" __body: t.Dict[str, t.Any] = {} __query: t.Dict[str, t.Any] = {}