diff --git a/elasticsearch/dsl/__init__.py b/elasticsearch/dsl/__init__.py new file mode 100644 index 000000000..a91e84424 --- /dev/null +++ b/elasticsearch/dsl/__init__.py @@ -0,0 +1,206 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from . import async_connections, connections +from .aggs import A, Agg +from .analysis import analyzer, char_filter, normalizer, token_filter, tokenizer +from .document import AsyncDocument, Document +from .document_base import InnerDoc, M, MetaField, mapped_field +from .exceptions import ( + ElasticsearchDslException, + IllegalOperation, + UnknownDslObject, + ValidationException, +) +from .faceted_search import ( + AsyncFacetedSearch, + DateHistogramFacet, + Facet, + FacetedResponse, + FacetedSearch, + HistogramFacet, + NestedFacet, + RangeFacet, + TermsFacet, +) +from .field import ( + Binary, + Boolean, + Byte, + Completion, + ConstantKeyword, + CustomField, + Date, + DateRange, + DenseVector, + Double, + DoubleRange, + Field, + Float, + FloatRange, + GeoPoint, + GeoShape, + HalfFloat, + Integer, + IntegerRange, + Ip, + IpRange, + Join, + Keyword, + Long, + LongRange, + Murmur3, + Nested, + Object, + Percolator, + Point, + RangeField, + RankFeature, + RankFeatures, + ScaledFloat, + SearchAsYouType, + Shape, + Short, + SparseVector, + Text, + TokenCount, + construct_field, +) +from .function import SF +from .index import ( + AsyncComposableIndexTemplate, + AsyncIndex, + AsyncIndexTemplate, + ComposableIndexTemplate, + Index, + IndexTemplate, +) +from .mapping import AsyncMapping, Mapping +from .query import Q, Query +from .response import AggResponse, Response, UpdateByQueryResponse +from .search import ( + AsyncEmptySearch, + AsyncMultiSearch, + AsyncSearch, + EmptySearch, + MultiSearch, + Search, +) +from .update_by_query import AsyncUpdateByQuery, UpdateByQuery +from .utils import AttrDict, AttrList, DslBase +from .wrappers import Range + +VERSION = (8, 17, 1) +__version__ = VERSION +__versionstr__ = ".".join(map(str, VERSION)) +__all__ = [ + "A", + "Agg", + "AggResponse", + "AsyncComposableIndexTemplate", + "AsyncDocument", + "AsyncEmptySearch", + "AsyncFacetedSearch", + "AsyncIndex", + "AsyncIndexTemplate", + "AsyncMapping", + "AsyncMultiSearch", + "AsyncSearch", + "AsyncUpdateByQuery", + "AttrDict", + "AttrList", + "Binary", + "Boolean", + "Byte", + "Completion", + "ComposableIndexTemplate", + "ConstantKeyword", + "CustomField", + "Date", + "DateHistogramFacet", + "DateRange", + "DenseVector", + "Document", + "Double", + "DoubleRange", + "DslBase", + "ElasticsearchDslException", + "EmptySearch", + "Facet", + "FacetedResponse", + "FacetedSearch", + "Field", + "Float", + "FloatRange", + "GeoPoint", + "GeoShape", + "HalfFloat", + "HistogramFacet", + "IllegalOperation", + "Index", + "IndexTemplate", + "InnerDoc", + "Integer", + "IntegerRange", + "Ip", + "IpRange", + "Join", + "Keyword", + "Long", + "LongRange", + "M", + "Mapping", + "MetaField", + "MultiSearch", + "Murmur3", + "Nested", + "NestedFacet", + "Object", + "Percolator", + "Point", + "Q", + "Query", + "Range", + "RangeFacet", + "RangeField", + "RankFeature", + "RankFeatures", + "Response", + "SF", + "ScaledFloat", + "Search", + "SearchAsYouType", + "Shape", + "Short", + "SparseVector", + "TermsFacet", + "Text", + "TokenCount", + "UnknownDslObject", + "UpdateByQuery", + "UpdateByQueryResponse", + "ValidationException", + "analyzer", + "async_connections", + "char_filter", + "connections", + "construct_field", + "mapped_field", + "normalizer", + "token_filter", + "tokenizer", +] diff --git a/elasticsearch/dsl/_async/__init__.py b/elasticsearch/dsl/_async/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/elasticsearch/dsl/_async/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/elasticsearch/dsl/_async/document.py b/elasticsearch/dsl/_async/document.py new file mode 100644 index 000000000..3f5d69f11 --- /dev/null +++ b/elasticsearch/dsl/_async/document.py @@ -0,0 +1,521 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from typing import ( + TYPE_CHECKING, + Any, + AsyncIterable, + Dict, + List, + Optional, + Tuple, + Union, + cast, +) + +from elasticsearch.exceptions import NotFoundError, RequestError +from elasticsearch.helpers import async_bulk +from typing_extensions import Self, dataclass_transform + +from .._async.index import AsyncIndex +from ..async_connections import get_connection +from ..document_base import DocumentBase, DocumentMeta, mapped_field +from ..exceptions import IllegalOperation +from ..utils import DOC_META_FIELDS, META_FIELDS, AsyncUsingType, merge +from .search import AsyncSearch + +if TYPE_CHECKING: + from elasticsearch import AsyncElasticsearch + + +class AsyncIndexMeta(DocumentMeta): + _index: AsyncIndex + + # global flag to guard us from associating an Index with the base Document + # class, only user defined subclasses should have an _index attr + _document_initialized = False + + def __new__( + cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any] + ) -> "AsyncIndexMeta": + new_cls = super().__new__(cls, name, bases, attrs) + if cls._document_initialized: + index_opts = attrs.pop("Index", None) + index = cls.construct_index(index_opts, bases) + new_cls._index = index + index.document(new_cls) + cls._document_initialized = True + return cast(AsyncIndexMeta, new_cls) + + @classmethod + def construct_index( + cls, opts: Dict[str, Any], bases: Tuple[type, ...] + ) -> AsyncIndex: + if opts is None: + for b in bases: + if hasattr(b, "_index"): + return b._index + + # Set None as Index name so it will set _all while making the query + return AsyncIndex(name=None) + + i = AsyncIndex( + getattr(opts, "name", "*"), using=getattr(opts, "using", "default") + ) + i.settings(**getattr(opts, "settings", {})) + i.aliases(**getattr(opts, "aliases", {})) + for a in getattr(opts, "analyzers", ()): + i.analyzer(a) + return i + + +@dataclass_transform(field_specifiers=(mapped_field,)) +class AsyncDocument(DocumentBase, metaclass=AsyncIndexMeta): + """ + Model-like class for persisting documents in elasticsearch. + """ + + if TYPE_CHECKING: + _index: AsyncIndex + + @classmethod + def _get_using(cls, using: Optional[AsyncUsingType] = None) -> AsyncUsingType: + return cast(AsyncUsingType, using or cls._index._using) + + @classmethod + def _get_connection( + cls, using: Optional[AsyncUsingType] = None + ) -> "AsyncElasticsearch": + return get_connection(cls._get_using(using)) + + @classmethod + async def init( + cls, index: Optional[str] = None, using: Optional[AsyncUsingType] = None + ) -> None: + """ + Create the index and populate the mappings in elasticsearch. + """ + i = cls._index + if index: + i = i.clone(name=index) + await i.save(using=using) + + @classmethod + def search( + cls, using: Optional[AsyncUsingType] = None, index: Optional[str] = None + ) -> AsyncSearch[Self]: + """ + Create an :class:`~elasticsearch.dsl.Search` instance that will search + over this ``Document``. + """ + return AsyncSearch( + using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls] + ) + + @classmethod + async def get( + cls, + id: str, + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + **kwargs: Any, + ) -> Optional[Self]: + """ + Retrieve a single document from elasticsearch using its ``id``. + + :arg id: ``id`` of the document to be retrieved + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.get`` unchanged. + """ + es = cls._get_connection(using) + doc = await es.get(index=cls._default_index(index), id=id, **kwargs) + if not doc.get("found", False): + return None + return cls.from_es(doc) + + @classmethod + async def exists( + cls, + id: str, + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + **kwargs: Any, + ) -> bool: + """ + check if exists a single document from elasticsearch using its ``id``. + + :arg id: ``id`` of the document to check if exists + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.exists`` unchanged. + """ + es = cls._get_connection(using) + return bool(await es.exists(index=cls._default_index(index), id=id, **kwargs)) + + @classmethod + async def mget( + cls, + docs: List[Dict[str, Any]], + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + raise_on_error: bool = True, + missing: str = "none", + **kwargs: Any, + ) -> List[Optional[Self]]: + r""" + Retrieve multiple document by their ``id``\s. Returns a list of instances + in the same order as requested. + + :arg docs: list of ``id``\s of the documents to be retrieved or a list + of document specifications as per + https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg missing: what to do when one of the documents requested is not + found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise + ``NotFoundError``) or ``'skip'`` (ignore the missing document). + + Any additional keyword arguments will be passed to + ``Elasticsearch.mget`` unchanged. + """ + if missing not in ("raise", "skip", "none"): + raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") + es = cls._get_connection(using) + body = { + "docs": [ + doc if isinstance(doc, collections.abc.Mapping) else {"_id": doc} + for doc in docs + ] + } + results = await es.mget(index=cls._default_index(index), body=body, **kwargs) + + objs: List[Optional[Self]] = [] + error_docs: List[Self] = [] + missing_docs: List[Self] = [] + for doc in results["docs"]: + if doc.get("found"): + if error_docs or missing_docs: + # We're going to raise an exception anyway, so avoid an + # expensive call to cls.from_es(). + continue + + objs.append(cls.from_es(doc)) + + elif doc.get("error"): + if raise_on_error: + error_docs.append(doc) + if missing == "none": + objs.append(None) + + # The doc didn't cause an error, but the doc also wasn't found. + elif missing == "raise": + missing_docs.append(doc) + elif missing == "none": + objs.append(None) + + if error_docs: + error_ids = [doc["_id"] for doc in error_docs] + message = "Required routing not provided for documents %s." + message %= ", ".join(error_ids) + raise RequestError(400, message, error_docs) # type: ignore + if missing_docs: + missing_ids = [doc["_id"] for doc in missing_docs] + message = f"Documents {', '.join(missing_ids)} not found." + raise NotFoundError(404, message, {"docs": missing_docs}) # type: ignore + return objs + + async def delete( + self, + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + **kwargs: Any, + ) -> None: + """ + Delete the instance in elasticsearch. + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.delete`` unchanged. + """ + es = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + i = self._get_index(index) + assert i is not None + + await es.delete(index=i, **doc_meta) + + async def update( + self, + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + detect_noop: bool = True, + doc_as_upsert: bool = False, + refresh: bool = False, + retry_on_conflict: Optional[int] = None, + script: Optional[Union[str, Dict[str, Any]]] = None, + script_id: Optional[str] = None, + scripted_upsert: bool = False, + upsert: Optional[Dict[str, Any]] = None, + return_doc_meta: bool = False, + **fields: Any, + ) -> Any: + """ + Partial update of the document, specify fields you wish to update and + both the instance and the document in elasticsearch will be updated:: + + doc = MyDocument(title='Document Title!') + doc.save() + doc.update(title='New Document Title!') + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg detect_noop: Set to ``False`` to disable noop detection. + :arg refresh: Control when the changes made by this request are visible + to search. Set to ``True`` for immediate effect. + :arg retry_on_conflict: In between the get and indexing phases of the + update, it is possible that another process might have already + updated the same document. By default, the update will fail with a + version conflict exception. The retry_on_conflict parameter + controls how many times to retry the update before finally throwing + an exception. + :arg doc_as_upsert: Instead of sending a partial doc plus an upsert + doc, setting doc_as_upsert to true will use the contents of doc as + the upsert value + :arg script: the source code of the script as a string, or a dictionary + with script attributes to update. + :arg return_doc_meta: set to ``True`` to return all metadata from the + index API call instead of only the operation result + + :return: operation result noop/updated + """ + body: Dict[str, Any] = { + "doc_as_upsert": doc_as_upsert, + "detect_noop": detect_noop, + } + + # scripted update + if script or script_id: + if upsert is not None: + body["upsert"] = upsert + + if script: + if isinstance(script, str): + script = {"source": script} + else: + script = {"id": script_id} + + if "params" not in script: + script["params"] = fields + else: + script["params"].update(fields) + + body["script"] = script + body["scripted_upsert"] = scripted_upsert + + # partial document update + else: + if not fields: + raise IllegalOperation( + "You cannot call update() without updating individual fields or a script. " + "If you wish to update the entire object use save()." + ) + + # update given fields locally + merge(self, fields) + + # prepare data for ES + values = self.to_dict(skip_empty=False) + + # if fields were given: partial update + body["doc"] = {k: values.get(k) for k in fields.keys()} + + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + if retry_on_conflict is not None: + doc_meta["retry_on_conflict"] = retry_on_conflict + + # Optimistic concurrency control + if ( + retry_on_conflict in (None, 0) + and "seq_no" in self.meta + and "primary_term" in self.meta + ): + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + i = self._get_index(index) + assert i is not None + + meta = await self._get_connection(using).update( + index=i, body=body, refresh=refresh, **doc_meta + ) + + # update meta information from ES + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] + + async def save( + self, + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + validate: bool = True, + skip_empty: bool = True, + return_doc_meta: bool = False, + **kwargs: Any, + ) -> Any: + """ + Save the document into elasticsearch. If the document doesn't exist it + is created, it is overwritten otherwise. Returns ``True`` if this + operations resulted in new document being created. + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg validate: set to ``False`` to skip validating the document + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in elasticsearch. + :arg return_doc_meta: set to ``True`` to return all metadata from the + update API call instead of only the operation result + + Any additional keyword arguments will be passed to + ``Elasticsearch.index`` unchanged. + + :return: operation result created/updated + """ + if validate: + self.full_clean() + + es = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + i = self._get_index(index) + assert i is not None + + meta = await es.index( + index=i, + body=self.to_dict(skip_empty=skip_empty), + **doc_meta, + ) + # update meta information from ES + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] + + @classmethod + async def bulk( + cls, + actions: AsyncIterable[Union[Self, Dict[str, Any]]], + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + validate: bool = True, + skip_empty: bool = True, + **kwargs: Any, + ) -> Tuple[int, Union[int, List[Any]]]: + """ + Allows to perform multiple indexing operations in a single request. + + :arg actions: a generator that returns document instances to be indexed, + bulk operation dictionaries. + :arg using: connection alias to use, defaults to ``'default'`` + :arg index: Elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg validate: set to ``False`` to skip validating the documents + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in Elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.bulk`` unchanged. + + :return: bulk operation results + """ + es = cls._get_connection(using) + + i = cls._default_index(index) + assert i is not None + + class Generate: + def __init__( + self, + doc_iterator: AsyncIterable[Union[AsyncDocument, Dict[str, Any]]], + ): + self.doc_iterator = doc_iterator.__aiter__() + + def __aiter__(self) -> Self: + return self + + async def __anext__(self) -> Dict[str, Any]: + doc: Optional[Union[AsyncDocument, Dict[str, Any]]] = ( + await self.doc_iterator.__anext__() + ) + + if isinstance(doc, dict): + action = doc + doc = None + if "_source" in action and isinstance( + action["_source"], AsyncDocument + ): + doc = action["_source"] + if validate: # pragma: no cover + doc.full_clean() + action["_source"] = doc.to_dict( + include_meta=False, skip_empty=skip_empty + ) + elif doc is not None: + if validate: # pragma: no cover + doc.full_clean() + action = doc.to_dict(include_meta=True, skip_empty=skip_empty) + if "_index" not in action: + action["_index"] = i + return action + + return await async_bulk(es, Generate(actions), **kwargs) diff --git a/elasticsearch/dsl/_async/faceted_search.py b/elasticsearch/dsl/_async/faceted_search.py new file mode 100644 index 000000000..199dcfca1 --- /dev/null +++ b/elasticsearch/dsl/_async/faceted_search.py @@ -0,0 +1,51 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING + +from ..faceted_search_base import FacetedResponse, FacetedSearchBase + +from ..utils import _R +from .search import AsyncSearch + +if TYPE_CHECKING: + from ..response import Response + + +class AsyncFacetedSearch(FacetedSearchBase[_R]): + _s: AsyncSearch[_R] + + async def count(self) -> int: + return await self._s.count() + + def search(self) -> AsyncSearch[_R]: + """ + Returns the base Search object to which the facets are added. + + You can customize the query by overriding this method and returning a + modified search object. + """ + s = AsyncSearch[_R](doc_type=self.doc_types, index=self.index, using=self.using) + return s.response_class(FacetedResponse) + + async def execute(self) -> "Response[_R]": + """ + Execute the search and return the response. + """ + r = await self._s.execute() + r._faceted_search = self + return r diff --git a/elasticsearch/dsl/_async/index.py b/elasticsearch/dsl/_async/index.py new file mode 100644 index 000000000..71542dffd --- /dev/null +++ b/elasticsearch/dsl/_async/index.py @@ -0,0 +1,638 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING, Any, Dict, Optional + +from typing_extensions import Self + +from ..async_connections import get_connection +from ..exceptions import IllegalOperation +from ..index_base import IndexBase +from ..utils import AsyncUsingType +from .mapping import AsyncMapping +from .search import AsyncSearch +from .update_by_query import AsyncUpdateByQuery + +if TYPE_CHECKING: + from elastic_transport import ObjectApiResponse + from elasticsearch import AsyncElasticsearch + + +class AsyncIndexTemplate: + def __init__( + self, + name: str, + template: str, + index: Optional["AsyncIndex"] = None, + order: Optional[int] = None, + **kwargs: Any, + ): + if index is None: + self._index = AsyncIndex(template, **kwargs) + else: + if kwargs: + raise ValueError( + "You cannot specify options for Index when" + " passing an Index instance." + ) + self._index = index.clone() + self._index._name = template + self._template_name = name + self.order = order + + def __getattr__(self, attr_name: str) -> Any: + return getattr(self._index, attr_name) + + def to_dict(self) -> Dict[str, Any]: + d = self._index.to_dict() + d["index_patterns"] = [self._index._name] + if self.order is not None: + d["order"] = self.order + return d + + async def save( + self, using: Optional[AsyncUsingType] = None + ) -> "ObjectApiResponse[Any]": + es = get_connection(using or self._index._using) + return await es.indices.put_template( + name=self._template_name, body=self.to_dict() + ) + + +class AsyncComposableIndexTemplate: + def __init__( + self, + name: str, + template: str, + index: Optional["AsyncIndex"] = None, + priority: Optional[int] = None, + **kwargs: Any, + ): + if index is None: + self._index = AsyncIndex(template, **kwargs) + else: + if kwargs: + raise ValueError( + "You cannot specify options for Index when" + " passing an Index instance." + ) + self._index = index.clone() + self._index._name = template + self._template_name = name + self.priority = priority + + def __getattr__(self, attr_name: str) -> Any: + return getattr(self._index, attr_name) + + def to_dict(self) -> Dict[str, Any]: + d: Dict[str, Any] = {"template": self._index.to_dict()} + d["index_patterns"] = [self._index._name] + if self.priority is not None: + d["priority"] = self.priority + return d + + async def save( + self, using: Optional[AsyncUsingType] = None + ) -> "ObjectApiResponse[Any]": + es = get_connection(using or self._index._using) + return await es.indices.put_index_template( + name=self._template_name, **self.to_dict() + ) + + +class AsyncIndex(IndexBase): + _using: AsyncUsingType + + if TYPE_CHECKING: + + def get_or_create_mapping(self) -> AsyncMapping: ... + + def __init__(self, name: str, using: AsyncUsingType = "default"): + """ + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + super().__init__(name, AsyncMapping, using=using) + + def _get_connection( + self, using: Optional[AsyncUsingType] = None + ) -> "AsyncElasticsearch": + if self._name is None: + raise ValueError("You cannot perform API calls on the default index.") + return get_connection(using or self._using) + + connection = property(_get_connection) + + def as_template( + self, + template_name: str, + pattern: Optional[str] = None, + order: Optional[int] = None, + ) -> AsyncIndexTemplate: + return AsyncIndexTemplate( + template_name, pattern or self._name, index=self, order=order + ) + + def as_composable_template( + self, + template_name: str, + pattern: Optional[str] = None, + priority: Optional[int] = None, + ) -> AsyncComposableIndexTemplate: + return AsyncComposableIndexTemplate( + template_name, pattern or self._name, index=self, priority=priority + ) + + async def load_mappings(self, using: Optional[AsyncUsingType] = None) -> None: + await self.get_or_create_mapping().update_from_es( + self._name, using=using or self._using + ) + + def clone( + self, name: Optional[str] = None, using: Optional[AsyncUsingType] = None + ) -> Self: + """ + Create a copy of the instance with another name or connection alias. + Useful for creating multiple indices with shared configuration:: + + i = Index('base-index') + i.settings(number_of_shards=1) + i.create() + + i2 = i.clone('other-index') + i2.create() + + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + i = self.__class__(name or self._name, using=using or self._using) + i._settings = self._settings.copy() + i._aliases = self._aliases.copy() + i._analysis = self._analysis.copy() + i._doc_types = self._doc_types[:] + if self._mapping is not None: + i._mapping = self._mapping._clone() + return i + + def search(self, using: Optional[AsyncUsingType] = None) -> AsyncSearch: + """ + Return a :class:`~elasticsearch.dsl.Search` object searching over the + index (or all the indices belonging to this template) and its + ``Document``\\s. + """ + return AsyncSearch( + using=using or self._using, index=self._name, doc_type=self._doc_types + ) + + def updateByQuery( + self, using: Optional[AsyncUsingType] = None + ) -> AsyncUpdateByQuery: + """ + Return a :class:`~elasticsearch.dsl.UpdateByQuery` object searching over the index + (or all the indices belonging to this template) and updating Documents that match + the search criteria. + + For more information, see here: + https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update-by-query.html + """ + return AsyncUpdateByQuery( + using=using or self._using, + index=self._name, + ) + + async def create( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Creates the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.create`` unchanged. + """ + return await self._get_connection(using).indices.create( + index=self._name, body=self.to_dict(), **kwargs + ) + + async def is_closed(self, using: Optional[AsyncUsingType] = None) -> bool: + state = await self._get_connection(using).cluster.state( + index=self._name, metric="metadata" + ) + return bool(state["metadata"]["indices"][self._name]["state"] == "close") + + async def save( + self, using: Optional[AsyncUsingType] = None + ) -> "Optional[ObjectApiResponse[Any]]": + """ + Sync the index definition with elasticsearch, creating the index if it + doesn't exist and updating its settings and mappings if it does. + + Note some settings and mapping changes cannot be done on an open + index (or at all on an existing index) and for those this method will + fail with the underlying exception. + """ + if not await self.exists(using=using): + return await self.create(using=using) + + body = self.to_dict() + settings = body.pop("settings", {}) + analysis = settings.pop("analysis", None) + current_settings = (await self.get_settings(using=using))[self._name][ + "settings" + ]["index"] + if analysis: + if await self.is_closed(using=using): + # closed index, update away + settings["analysis"] = analysis + else: + # compare analysis definition, if all analysis objects are + # already defined as requested, skip analysis update and + # proceed, otherwise raise IllegalOperation + existing_analysis = current_settings.get("analysis", {}) + if any( + existing_analysis.get(section, {}).get(k, None) + != analysis[section][k] + for section in analysis + for k in analysis[section] + ): + raise IllegalOperation( + "You cannot update analysis configuration on an open index, " + "you need to close index %s first." % self._name + ) + + # try and update the settings + if settings: + settings = settings.copy() + for k, v in list(settings.items()): + if k in current_settings and current_settings[k] == str(v): + del settings[k] + + if settings: + await self.put_settings(using=using, body=settings) + + # update the mappings, any conflict in the mappings will result in an + # exception + mappings = body.pop("mappings", {}) + if mappings: + return await self.put_mapping(using=using, body=mappings) + + return None + + async def analyze( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Perform the analysis process on a text and return the tokens breakdown + of the text. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.analyze`` unchanged. + """ + return await self._get_connection(using).indices.analyze( + index=self._name, **kwargs + ) + + async def refresh( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Performs a refresh operation on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.refresh`` unchanged. + """ + return await self._get_connection(using).indices.refresh( + index=self._name, **kwargs + ) + + async def flush( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Performs a flush operation on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.flush`` unchanged. + """ + return await self._get_connection(using).indices.flush( + index=self._name, **kwargs + ) + + async def get( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The get index API allows to retrieve information about the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get`` unchanged. + """ + return await self._get_connection(using).indices.get(index=self._name, **kwargs) + + async def open( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Opens the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.open`` unchanged. + """ + return await self._get_connection(using).indices.open( + index=self._name, **kwargs + ) + + async def close( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Closes the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.close`` unchanged. + """ + return await self._get_connection(using).indices.close( + index=self._name, **kwargs + ) + + async def delete( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Deletes the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.delete`` unchanged. + """ + return await self._get_connection(using).indices.delete( + index=self._name, **kwargs + ) + + async def exists( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> bool: + """ + Returns ``True`` if the index already exists in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists`` unchanged. + """ + return bool( + await self._get_connection(using).indices.exists(index=self._name, **kwargs) + ) + + async def put_mapping( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Register specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_mapping`` unchanged. + """ + return await self._get_connection(using).indices.put_mapping( + index=self._name, **kwargs + ) + + async def get_mapping( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_mapping`` unchanged. + """ + return await self._get_connection(using).indices.get_mapping( + index=self._name, **kwargs + ) + + async def get_field_mapping( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve mapping definition of a specific field. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_field_mapping`` unchanged. + """ + return await self._get_connection(using).indices.get_field_mapping( + index=self._name, **kwargs + ) + + async def put_alias( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Create an alias for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_alias`` unchanged. + """ + return await self._get_connection(using).indices.put_alias( + index=self._name, **kwargs + ) + + async def exists_alias( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> bool: + """ + Return a boolean indicating whether given alias exists for this index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists_alias`` unchanged. + """ + return bool( + await self._get_connection(using).indices.exists_alias( + index=self._name, **kwargs + ) + ) + + async def get_alias( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve a specified alias. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_alias`` unchanged. + """ + return await self._get_connection(using).indices.get_alias( + index=self._name, **kwargs + ) + + async def delete_alias( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Delete specific alias. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.delete_alias`` unchanged. + """ + return await self._get_connection(using).indices.delete_alias( + index=self._name, **kwargs + ) + + async def get_settings( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve settings for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_settings`` unchanged. + """ + return await self._get_connection(using).indices.get_settings( + index=self._name, **kwargs + ) + + async def put_settings( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Change specific index level settings in real time. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_settings`` unchanged. + """ + return await self._get_connection(using).indices.put_settings( + index=self._name, **kwargs + ) + + async def stats( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve statistics on different operations happening on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.stats`` unchanged. + """ + return await self._get_connection(using).indices.stats( + index=self._name, **kwargs + ) + + async def segments( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Provide low level segments information that a Lucene index (shard + level) is built with. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.segments`` unchanged. + """ + return await self._get_connection(using).indices.segments( + index=self._name, **kwargs + ) + + async def validate_query( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Validate a potentially expensive query without executing it. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.validate_query`` unchanged. + """ + return await self._get_connection(using).indices.validate_query( + index=self._name, **kwargs + ) + + async def clear_cache( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Clear all caches or specific cached associated with the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.clear_cache`` unchanged. + """ + return await self._get_connection(using).indices.clear_cache( + index=self._name, **kwargs + ) + + async def recovery( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The indices recovery API provides insight into on-going shard + recoveries for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.recovery`` unchanged. + """ + return await self._get_connection(using).indices.recovery( + index=self._name, **kwargs + ) + + async def shard_stores( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Provides store information for shard copies of the index. Store + information reports on which nodes shard copies exist, the shard copy + version, indicating how recent they are, and any exceptions encountered + while opening the shard index or from earlier engine failure. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.shard_stores`` unchanged. + """ + return await self._get_connection(using).indices.shard_stores( + index=self._name, **kwargs + ) + + async def forcemerge( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The force merge API allows to force merging of the index through an + API. The merge relates to the number of segments a Lucene index holds + within each shard. The force merge operation allows to reduce the + number of segments by merging them. + + This call will block until the merge is complete. If the http + connection is lost, the request will continue in the background, and + any new requests will block until the previous force merge is complete. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.forcemerge`` unchanged. + """ + return await self._get_connection(using).indices.forcemerge( + index=self._name, **kwargs + ) + + async def shrink( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The shrink index API allows you to shrink an existing index into a new + index with fewer primary shards. The number of primary shards in the + target index must be a factor of the shards in the source index. For + example an index with 8 primary shards can be shrunk into 4, 2 or 1 + primary shards or an index with 15 primary shards can be shrunk into 5, + 3 or 1. If the number of shards in the index is a prime number it can + only be shrunk into a single primary shard. Before shrinking, a + (primary or replica) copy of every shard in the index must be present + on the same node. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.shrink`` unchanged. + """ + return await self._get_connection(using).indices.shrink( + index=self._name, **kwargs + ) diff --git a/elasticsearch/dsl/_async/mapping.py b/elasticsearch/dsl/_async/mapping.py new file mode 100644 index 000000000..7ef9c6dac --- /dev/null +++ b/elasticsearch/dsl/_async/mapping.py @@ -0,0 +1,49 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List, Optional, Union + +from typing_extensions import Self + +from ..async_connections import get_connection +from ..mapping_base import MappingBase +from ..utils import AsyncUsingType + + +class AsyncMapping(MappingBase): + @classmethod + async def from_es( + cls, index: Optional[Union[str, List[str]]], using: AsyncUsingType = "default" + ) -> Self: + m = cls() + await m.update_from_es(index, using) + return m + + async def update_from_es( + self, index: Optional[Union[str, List[str]]], using: AsyncUsingType = "default" + ) -> None: + es = get_connection(using) + raw = await es.indices.get_mapping(index=index) + _, raw = raw.popitem() + self._update_from_dict(raw["mappings"]) + + async def save(self, index: str, using: AsyncUsingType = "default") -> None: + from .index import AsyncIndex + + i = AsyncIndex(index, using=using) + i.mapping(self) + await i.save() diff --git a/elasticsearch/dsl/_async/search.py b/elasticsearch/dsl/_async/search.py new file mode 100644 index 000000000..ea6288622 --- /dev/null +++ b/elasticsearch/dsl/_async/search.py @@ -0,0 +1,232 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import contextlib +from typing import ( + TYPE_CHECKING, + Any, + AsyncIterator, + Dict, + Iterator, + List, + Optional, + cast, +) + +from elasticsearch.exceptions import ApiError +from elasticsearch.helpers import async_scan +from typing_extensions import Self + +from ..async_connections import get_connection +from ..response import Response +from ..search_base import MultiSearchBase, SearchBase +from ..utils import _R, AsyncUsingType, AttrDict + + +class AsyncSearch(SearchBase[_R]): + _using: AsyncUsingType + + def __aiter__(self) -> AsyncIterator[_R]: + """ + Iterate over the hits. + """ + + class ResultsIterator(AsyncIterator[_R]): + def __init__(self, search: AsyncSearch[_R]): + self.search = search + self.iterator: Optional[Iterator[_R]] = None + + async def __anext__(self) -> _R: + if self.iterator is None: + self.iterator = iter(await self.search.execute()) + try: + return next(self.iterator) + except StopIteration: + raise StopAsyncIteration() + + return ResultsIterator(self) + + async def count(self) -> int: + """ + Return the number of hits matching the query and filters. Note that + only the actual number is returned. + """ + if hasattr(self, "_response") and self._response.hits.total.relation == "eq": # type: ignore[attr-defined] + return cast(int, self._response.hits.total.value) # type: ignore[attr-defined] + + es = get_connection(self._using) + + d = self.to_dict(count=True) + # TODO: failed shards detection + resp = await es.count( + index=self._index, + query=cast(Optional[Dict[str, Any]], d.get("query", None)), + **self._params, + ) + + return cast(int, resp["count"]) + + async def execute(self, ignore_cache: bool = False) -> Response[_R]: + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + + :arg ignore_cache: if set to ``True``, consecutive calls will hit + ES, while cached result will be ignored. Defaults to `False` + """ + if ignore_cache or not hasattr(self, "_response"): + es = get_connection(self._using) + + self._response = self._response_class( + self, + ( + await es.search( + index=self._index, body=self.to_dict(), **self._params + ) + ).body, + ) + return self._response + + async def scan(self) -> AsyncIterator[_R]: + """ + Turn the search into a scan search and return a generator that will + iterate over all the documents matching the query. + + Use ``params`` method to specify any additional arguments you with to + pass to the underlying ``scan`` helper from ``elasticsearch-py`` - + https://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.scan + + The ``iterate()`` method should be preferred, as it provides similar + functionality using an Elasticsearch point in time. + """ + es = get_connection(self._using) + + async for hit in async_scan( + es, query=self.to_dict(), index=self._index, **self._params + ): + yield self._get_result(cast(AttrDict[Any], hit)) + + async def delete(self) -> AttrDict[Any]: + """ + delete() executes the query by delegating to delete_by_query() + """ + + es = get_connection(self._using) + assert self._index is not None + + return AttrDict( + cast( + Dict[str, Any], + await es.delete_by_query( + index=self._index, body=self.to_dict(), **self._params + ), + ) + ) + + @contextlib.asynccontextmanager + async def point_in_time(self, keep_alive: str = "1m") -> AsyncIterator[Self]: + """ + Open a point in time (pit) that can be used across several searches. + + This method implements a context manager that returns a search object + configured to operate within the created pit. + + :arg keep_alive: the time to live for the point in time, renewed with each search request + """ + es = get_connection(self._using) + + pit = await es.open_point_in_time( + index=self._index or "*", keep_alive=keep_alive + ) + search = self.index().extra(pit={"id": pit["id"], "keep_alive": keep_alive}) + if not search._sort: + search = search.sort("_shard_doc") + yield search + await es.close_point_in_time(id=pit["id"]) + + async def iterate(self, keep_alive: str = "1m") -> AsyncIterator[_R]: + """ + Return a generator that iterates over all the documents matching the query. + + This method uses a point in time to provide consistent results even when + the index is changing. It should be preferred over ``scan()``. + + :arg keep_alive: the time to live for the point in time, renewed with each new search request + """ + async with self.point_in_time(keep_alive=keep_alive) as s: + while True: + r = await s.execute() + for hit in r: + yield hit + if len(r.hits) == 0: + break + s = s.search_after() + + +class AsyncMultiSearch(MultiSearchBase[_R]): + """ + Combine multiple :class:`~elasticsearch.dsl.Search` objects into a single + request. + """ + + _using: AsyncUsingType + + if TYPE_CHECKING: + + def add(self, search: AsyncSearch[_R]) -> Self: ... # type: ignore[override] + + async def execute( + self, ignore_cache: bool = False, raise_on_error: bool = True + ) -> List[Response[_R]]: + """ + Execute the multi search request and return a list of search results. + """ + if ignore_cache or not hasattr(self, "_response"): + es = get_connection(self._using) + + responses = await es.msearch( + index=self._index, body=self.to_dict(), **self._params + ) + + out: List[Response[_R]] = [] + for s, r in zip(self._searches, responses["responses"]): + if r.get("error", False): + if raise_on_error: + raise ApiError("N/A", meta=responses.meta, body=r) + r = None + else: + r = Response(s, r) + out.append(r) + + self._response = out + + return self._response + + +class AsyncEmptySearch(AsyncSearch[_R]): + async def count(self) -> int: + return 0 + + async def execute(self, ignore_cache: bool = False) -> Response[_R]: + return self._response_class(self, {"hits": {"total": 0, "hits": []}}) + + async def scan(self) -> AsyncIterator[_R]: + return + yield # a bit strange, but this forces an empty generator function + + async def delete(self) -> AttrDict[Any]: + return AttrDict[Any]({}) diff --git a/elasticsearch/dsl/_async/update_by_query.py b/elasticsearch/dsl/_async/update_by_query.py new file mode 100644 index 000000000..bff3aa947 --- /dev/null +++ b/elasticsearch/dsl/_async/update_by_query.py @@ -0,0 +1,47 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING + +from ..async_connections import get_connection +from ..update_by_query_base import UpdateByQueryBase +from ..utils import _R, AsyncUsingType + +if TYPE_CHECKING: + from ..response import UpdateByQueryResponse + + +class AsyncUpdateByQuery(UpdateByQueryBase[_R]): + _using: AsyncUsingType + + async def execute(self) -> "UpdateByQueryResponse[_R]": + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + """ + es = get_connection(self._using) + assert self._index is not None + + self._response = self._response_class( + self, + ( + await es.update_by_query( + index=self._index, **self.to_dict(), **self._params + ) + ).body, + ) + return self._response diff --git a/elasticsearch/dsl/_sync/__init__.py b/elasticsearch/dsl/_sync/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/elasticsearch/dsl/_sync/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/elasticsearch/dsl/_sync/document.py b/elasticsearch/dsl/_sync/document.py new file mode 100644 index 000000000..c8143412f --- /dev/null +++ b/elasticsearch/dsl/_sync/document.py @@ -0,0 +1,513 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Optional, + Tuple, + Union, + cast, +) + +from elasticsearch.exceptions import NotFoundError, RequestError +from elasticsearch.helpers import bulk +from typing_extensions import Self, dataclass_transform + +from .._sync.index import Index +from ..connections import get_connection +from ..document_base import DocumentBase, DocumentMeta, mapped_field +from ..exceptions import IllegalOperation +from ..utils import DOC_META_FIELDS, META_FIELDS, UsingType, merge +from .search import Search + +if TYPE_CHECKING: + from elasticsearch import Elasticsearch + + +class IndexMeta(DocumentMeta): + _index: Index + + # global flag to guard us from associating an Index with the base Document + # class, only user defined subclasses should have an _index attr + _document_initialized = False + + def __new__( + cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any] + ) -> "IndexMeta": + new_cls = super().__new__(cls, name, bases, attrs) + if cls._document_initialized: + index_opts = attrs.pop("Index", None) + index = cls.construct_index(index_opts, bases) + new_cls._index = index + index.document(new_cls) + cls._document_initialized = True + return cast(IndexMeta, new_cls) + + @classmethod + def construct_index(cls, opts: Dict[str, Any], bases: Tuple[type, ...]) -> Index: + if opts is None: + for b in bases: + if hasattr(b, "_index"): + return b._index + + # Set None as Index name so it will set _all while making the query + return Index(name=None) + + i = Index(getattr(opts, "name", "*"), using=getattr(opts, "using", "default")) + i.settings(**getattr(opts, "settings", {})) + i.aliases(**getattr(opts, "aliases", {})) + for a in getattr(opts, "analyzers", ()): + i.analyzer(a) + return i + + +@dataclass_transform(field_specifiers=(mapped_field,)) +class Document(DocumentBase, metaclass=IndexMeta): + """ + Model-like class for persisting documents in elasticsearch. + """ + + if TYPE_CHECKING: + _index: Index + + @classmethod + def _get_using(cls, using: Optional[UsingType] = None) -> UsingType: + return cast(UsingType, using or cls._index._using) + + @classmethod + def _get_connection(cls, using: Optional[UsingType] = None) -> "Elasticsearch": + return get_connection(cls._get_using(using)) + + @classmethod + def init( + cls, index: Optional[str] = None, using: Optional[UsingType] = None + ) -> None: + """ + Create the index and populate the mappings in elasticsearch. + """ + i = cls._index + if index: + i = i.clone(name=index) + i.save(using=using) + + @classmethod + def search( + cls, using: Optional[UsingType] = None, index: Optional[str] = None + ) -> Search[Self]: + """ + Create an :class:`~elasticsearch.dsl.Search` instance that will search + over this ``Document``. + """ + return Search( + using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls] + ) + + @classmethod + def get( + cls, + id: str, + using: Optional[UsingType] = None, + index: Optional[str] = None, + **kwargs: Any, + ) -> Optional[Self]: + """ + Retrieve a single document from elasticsearch using its ``id``. + + :arg id: ``id`` of the document to be retrieved + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.get`` unchanged. + """ + es = cls._get_connection(using) + doc = es.get(index=cls._default_index(index), id=id, **kwargs) + if not doc.get("found", False): + return None + return cls.from_es(doc) + + @classmethod + def exists( + cls, + id: str, + using: Optional[UsingType] = None, + index: Optional[str] = None, + **kwargs: Any, + ) -> bool: + """ + check if exists a single document from elasticsearch using its ``id``. + + :arg id: ``id`` of the document to check if exists + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.exists`` unchanged. + """ + es = cls._get_connection(using) + return bool(es.exists(index=cls._default_index(index), id=id, **kwargs)) + + @classmethod + def mget( + cls, + docs: List[Dict[str, Any]], + using: Optional[UsingType] = None, + index: Optional[str] = None, + raise_on_error: bool = True, + missing: str = "none", + **kwargs: Any, + ) -> List[Optional[Self]]: + r""" + Retrieve multiple document by their ``id``\s. Returns a list of instances + in the same order as requested. + + :arg docs: list of ``id``\s of the documents to be retrieved or a list + of document specifications as per + https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg missing: what to do when one of the documents requested is not + found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise + ``NotFoundError``) or ``'skip'`` (ignore the missing document). + + Any additional keyword arguments will be passed to + ``Elasticsearch.mget`` unchanged. + """ + if missing not in ("raise", "skip", "none"): + raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") + es = cls._get_connection(using) + body = { + "docs": [ + doc if isinstance(doc, collections.abc.Mapping) else {"_id": doc} + for doc in docs + ] + } + results = es.mget(index=cls._default_index(index), body=body, **kwargs) + + objs: List[Optional[Self]] = [] + error_docs: List[Self] = [] + missing_docs: List[Self] = [] + for doc in results["docs"]: + if doc.get("found"): + if error_docs or missing_docs: + # We're going to raise an exception anyway, so avoid an + # expensive call to cls.from_es(). + continue + + objs.append(cls.from_es(doc)) + + elif doc.get("error"): + if raise_on_error: + error_docs.append(doc) + if missing == "none": + objs.append(None) + + # The doc didn't cause an error, but the doc also wasn't found. + elif missing == "raise": + missing_docs.append(doc) + elif missing == "none": + objs.append(None) + + if error_docs: + error_ids = [doc["_id"] for doc in error_docs] + message = "Required routing not provided for documents %s." + message %= ", ".join(error_ids) + raise RequestError(400, message, error_docs) # type: ignore + if missing_docs: + missing_ids = [doc["_id"] for doc in missing_docs] + message = f"Documents {', '.join(missing_ids)} not found." + raise NotFoundError(404, message, {"docs": missing_docs}) # type: ignore + return objs + + def delete( + self, + using: Optional[UsingType] = None, + index: Optional[str] = None, + **kwargs: Any, + ) -> None: + """ + Delete the instance in elasticsearch. + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.delete`` unchanged. + """ + es = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + i = self._get_index(index) + assert i is not None + + es.delete(index=i, **doc_meta) + + def update( + self, + using: Optional[UsingType] = None, + index: Optional[str] = None, + detect_noop: bool = True, + doc_as_upsert: bool = False, + refresh: bool = False, + retry_on_conflict: Optional[int] = None, + script: Optional[Union[str, Dict[str, Any]]] = None, + script_id: Optional[str] = None, + scripted_upsert: bool = False, + upsert: Optional[Dict[str, Any]] = None, + return_doc_meta: bool = False, + **fields: Any, + ) -> Any: + """ + Partial update of the document, specify fields you wish to update and + both the instance and the document in elasticsearch will be updated:: + + doc = MyDocument(title='Document Title!') + doc.save() + doc.update(title='New Document Title!') + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg detect_noop: Set to ``False`` to disable noop detection. + :arg refresh: Control when the changes made by this request are visible + to search. Set to ``True`` for immediate effect. + :arg retry_on_conflict: In between the get and indexing phases of the + update, it is possible that another process might have already + updated the same document. By default, the update will fail with a + version conflict exception. The retry_on_conflict parameter + controls how many times to retry the update before finally throwing + an exception. + :arg doc_as_upsert: Instead of sending a partial doc plus an upsert + doc, setting doc_as_upsert to true will use the contents of doc as + the upsert value + :arg script: the source code of the script as a string, or a dictionary + with script attributes to update. + :arg return_doc_meta: set to ``True`` to return all metadata from the + index API call instead of only the operation result + + :return: operation result noop/updated + """ + body: Dict[str, Any] = { + "doc_as_upsert": doc_as_upsert, + "detect_noop": detect_noop, + } + + # scripted update + if script or script_id: + if upsert is not None: + body["upsert"] = upsert + + if script: + if isinstance(script, str): + script = {"source": script} + else: + script = {"id": script_id} + + if "params" not in script: + script["params"] = fields + else: + script["params"].update(fields) + + body["script"] = script + body["scripted_upsert"] = scripted_upsert + + # partial document update + else: + if not fields: + raise IllegalOperation( + "You cannot call update() without updating individual fields or a script. " + "If you wish to update the entire object use save()." + ) + + # update given fields locally + merge(self, fields) + + # prepare data for ES + values = self.to_dict(skip_empty=False) + + # if fields were given: partial update + body["doc"] = {k: values.get(k) for k in fields.keys()} + + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + if retry_on_conflict is not None: + doc_meta["retry_on_conflict"] = retry_on_conflict + + # Optimistic concurrency control + if ( + retry_on_conflict in (None, 0) + and "seq_no" in self.meta + and "primary_term" in self.meta + ): + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + i = self._get_index(index) + assert i is not None + + meta = self._get_connection(using).update( + index=i, body=body, refresh=refresh, **doc_meta + ) + + # update meta information from ES + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] + + def save( + self, + using: Optional[UsingType] = None, + index: Optional[str] = None, + validate: bool = True, + skip_empty: bool = True, + return_doc_meta: bool = False, + **kwargs: Any, + ) -> Any: + """ + Save the document into elasticsearch. If the document doesn't exist it + is created, it is overwritten otherwise. Returns ``True`` if this + operations resulted in new document being created. + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg validate: set to ``False`` to skip validating the document + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in elasticsearch. + :arg return_doc_meta: set to ``True`` to return all metadata from the + update API call instead of only the operation result + + Any additional keyword arguments will be passed to + ``Elasticsearch.index`` unchanged. + + :return: operation result created/updated + """ + if validate: + self.full_clean() + + es = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + i = self._get_index(index) + assert i is not None + + meta = es.index( + index=i, + body=self.to_dict(skip_empty=skip_empty), + **doc_meta, + ) + # update meta information from ES + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] + + @classmethod + def bulk( + cls, + actions: Iterable[Union[Self, Dict[str, Any]]], + using: Optional[UsingType] = None, + index: Optional[str] = None, + validate: bool = True, + skip_empty: bool = True, + **kwargs: Any, + ) -> Tuple[int, Union[int, List[Any]]]: + """ + Allows to perform multiple indexing operations in a single request. + + :arg actions: a generator that returns document instances to be indexed, + bulk operation dictionaries. + :arg using: connection alias to use, defaults to ``'default'`` + :arg index: Elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg validate: set to ``False`` to skip validating the documents + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in Elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.bulk`` unchanged. + + :return: bulk operation results + """ + es = cls._get_connection(using) + + i = cls._default_index(index) + assert i is not None + + class Generate: + def __init__( + self, + doc_iterator: Iterable[Union[Document, Dict[str, Any]]], + ): + self.doc_iterator = doc_iterator.__iter__() + + def __iter__(self) -> Self: + return self + + def __next__(self) -> Dict[str, Any]: + doc: Optional[Union[Document, Dict[str, Any]]] = ( + self.doc_iterator.__next__() + ) + + if isinstance(doc, dict): + action = doc + doc = None + if "_source" in action and isinstance(action["_source"], Document): + doc = action["_source"] + if validate: # pragma: no cover + doc.full_clean() + action["_source"] = doc.to_dict( + include_meta=False, skip_empty=skip_empty + ) + elif doc is not None: + if validate: # pragma: no cover + doc.full_clean() + action = doc.to_dict(include_meta=True, skip_empty=skip_empty) + if "_index" not in action: + action["_index"] = i + return action + + return bulk(es, Generate(actions), **kwargs) diff --git a/elasticsearch/dsl/_sync/faceted_search.py b/elasticsearch/dsl/_sync/faceted_search.py new file mode 100644 index 000000000..115492c7a --- /dev/null +++ b/elasticsearch/dsl/_sync/faceted_search.py @@ -0,0 +1,51 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING + +from ..faceted_search_base import FacetedResponse, FacetedSearchBase + +from ..utils import _R +from .search import Search + +if TYPE_CHECKING: + from ..response import Response + + +class FacetedSearch(FacetedSearchBase[_R]): + _s: Search[_R] + + def count(self) -> int: + return self._s.count() + + def search(self) -> Search[_R]: + """ + Returns the base Search object to which the facets are added. + + You can customize the query by overriding this method and returning a + modified search object. + """ + s = Search[_R](doc_type=self.doc_types, index=self.index, using=self.using) + return s.response_class(FacetedResponse) + + def execute(self) -> "Response[_R]": + """ + Execute the search and return the response. + """ + r = self._s.execute() + r._faceted_search = self + return r diff --git a/elasticsearch/dsl/_sync/index.py b/elasticsearch/dsl/_sync/index.py new file mode 100644 index 000000000..171f70bfb --- /dev/null +++ b/elasticsearch/dsl/_sync/index.py @@ -0,0 +1,596 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING, Any, Dict, Optional + +from typing_extensions import Self + +from ..connections import get_connection +from ..exceptions import IllegalOperation +from ..index_base import IndexBase +from ..utils import UsingType +from .mapping import Mapping +from .search import Search +from .update_by_query import UpdateByQuery + +if TYPE_CHECKING: + from elastic_transport import ObjectApiResponse + from elasticsearch import Elasticsearch + + +class IndexTemplate: + def __init__( + self, + name: str, + template: str, + index: Optional["Index"] = None, + order: Optional[int] = None, + **kwargs: Any, + ): + if index is None: + self._index = Index(template, **kwargs) + else: + if kwargs: + raise ValueError( + "You cannot specify options for Index when" + " passing an Index instance." + ) + self._index = index.clone() + self._index._name = template + self._template_name = name + self.order = order + + def __getattr__(self, attr_name: str) -> Any: + return getattr(self._index, attr_name) + + def to_dict(self) -> Dict[str, Any]: + d = self._index.to_dict() + d["index_patterns"] = [self._index._name] + if self.order is not None: + d["order"] = self.order + return d + + def save(self, using: Optional[UsingType] = None) -> "ObjectApiResponse[Any]": + es = get_connection(using or self._index._using) + return es.indices.put_template(name=self._template_name, body=self.to_dict()) + + +class ComposableIndexTemplate: + def __init__( + self, + name: str, + template: str, + index: Optional["Index"] = None, + priority: Optional[int] = None, + **kwargs: Any, + ): + if index is None: + self._index = Index(template, **kwargs) + else: + if kwargs: + raise ValueError( + "You cannot specify options for Index when" + " passing an Index instance." + ) + self._index = index.clone() + self._index._name = template + self._template_name = name + self.priority = priority + + def __getattr__(self, attr_name: str) -> Any: + return getattr(self._index, attr_name) + + def to_dict(self) -> Dict[str, Any]: + d: Dict[str, Any] = {"template": self._index.to_dict()} + d["index_patterns"] = [self._index._name] + if self.priority is not None: + d["priority"] = self.priority + return d + + def save(self, using: Optional[UsingType] = None) -> "ObjectApiResponse[Any]": + es = get_connection(using or self._index._using) + return es.indices.put_index_template(name=self._template_name, **self.to_dict()) + + +class Index(IndexBase): + _using: UsingType + + if TYPE_CHECKING: + + def get_or_create_mapping(self) -> Mapping: ... + + def __init__(self, name: str, using: UsingType = "default"): + """ + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + super().__init__(name, Mapping, using=using) + + def _get_connection(self, using: Optional[UsingType] = None) -> "Elasticsearch": + if self._name is None: + raise ValueError("You cannot perform API calls on the default index.") + return get_connection(using or self._using) + + connection = property(_get_connection) + + def as_template( + self, + template_name: str, + pattern: Optional[str] = None, + order: Optional[int] = None, + ) -> IndexTemplate: + return IndexTemplate( + template_name, pattern or self._name, index=self, order=order + ) + + def as_composable_template( + self, + template_name: str, + pattern: Optional[str] = None, + priority: Optional[int] = None, + ) -> ComposableIndexTemplate: + return ComposableIndexTemplate( + template_name, pattern or self._name, index=self, priority=priority + ) + + def load_mappings(self, using: Optional[UsingType] = None) -> None: + self.get_or_create_mapping().update_from_es( + self._name, using=using or self._using + ) + + def clone( + self, name: Optional[str] = None, using: Optional[UsingType] = None + ) -> Self: + """ + Create a copy of the instance with another name or connection alias. + Useful for creating multiple indices with shared configuration:: + + i = Index('base-index') + i.settings(number_of_shards=1) + i.create() + + i2 = i.clone('other-index') + i2.create() + + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + i = self.__class__(name or self._name, using=using or self._using) + i._settings = self._settings.copy() + i._aliases = self._aliases.copy() + i._analysis = self._analysis.copy() + i._doc_types = self._doc_types[:] + if self._mapping is not None: + i._mapping = self._mapping._clone() + return i + + def search(self, using: Optional[UsingType] = None) -> Search: + """ + Return a :class:`~elasticsearch.dsl.Search` object searching over the + index (or all the indices belonging to this template) and its + ``Document``\\s. + """ + return Search( + using=using or self._using, index=self._name, doc_type=self._doc_types + ) + + def updateByQuery(self, using: Optional[UsingType] = None) -> UpdateByQuery: + """ + Return a :class:`~elasticsearch.dsl.UpdateByQuery` object searching over the index + (or all the indices belonging to this template) and updating Documents that match + the search criteria. + + For more information, see here: + https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update-by-query.html + """ + return UpdateByQuery( + using=using or self._using, + index=self._name, + ) + + def create( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Creates the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.create`` unchanged. + """ + return self._get_connection(using).indices.create( + index=self._name, body=self.to_dict(), **kwargs + ) + + def is_closed(self, using: Optional[UsingType] = None) -> bool: + state = self._get_connection(using).cluster.state( + index=self._name, metric="metadata" + ) + return bool(state["metadata"]["indices"][self._name]["state"] == "close") + + def save( + self, using: Optional[UsingType] = None + ) -> "Optional[ObjectApiResponse[Any]]": + """ + Sync the index definition with elasticsearch, creating the index if it + doesn't exist and updating its settings and mappings if it does. + + Note some settings and mapping changes cannot be done on an open + index (or at all on an existing index) and for those this method will + fail with the underlying exception. + """ + if not self.exists(using=using): + return self.create(using=using) + + body = self.to_dict() + settings = body.pop("settings", {}) + analysis = settings.pop("analysis", None) + current_settings = (self.get_settings(using=using))[self._name]["settings"][ + "index" + ] + if analysis: + if self.is_closed(using=using): + # closed index, update away + settings["analysis"] = analysis + else: + # compare analysis definition, if all analysis objects are + # already defined as requested, skip analysis update and + # proceed, otherwise raise IllegalOperation + existing_analysis = current_settings.get("analysis", {}) + if any( + existing_analysis.get(section, {}).get(k, None) + != analysis[section][k] + for section in analysis + for k in analysis[section] + ): + raise IllegalOperation( + "You cannot update analysis configuration on an open index, " + "you need to close index %s first." % self._name + ) + + # try and update the settings + if settings: + settings = settings.copy() + for k, v in list(settings.items()): + if k in current_settings and current_settings[k] == str(v): + del settings[k] + + if settings: + self.put_settings(using=using, body=settings) + + # update the mappings, any conflict in the mappings will result in an + # exception + mappings = body.pop("mappings", {}) + if mappings: + return self.put_mapping(using=using, body=mappings) + + return None + + def analyze( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Perform the analysis process on a text and return the tokens breakdown + of the text. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.analyze`` unchanged. + """ + return self._get_connection(using).indices.analyze(index=self._name, **kwargs) + + def refresh( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Performs a refresh operation on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.refresh`` unchanged. + """ + return self._get_connection(using).indices.refresh(index=self._name, **kwargs) + + def flush( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Performs a flush operation on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.flush`` unchanged. + """ + return self._get_connection(using).indices.flush(index=self._name, **kwargs) + + def get( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The get index API allows to retrieve information about the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get`` unchanged. + """ + return self._get_connection(using).indices.get(index=self._name, **kwargs) + + def open( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Opens the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.open`` unchanged. + """ + return self._get_connection(using).indices.open(index=self._name, **kwargs) + + def close( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Closes the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.close`` unchanged. + """ + return self._get_connection(using).indices.close(index=self._name, **kwargs) + + def delete( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Deletes the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.delete`` unchanged. + """ + return self._get_connection(using).indices.delete(index=self._name, **kwargs) + + def exists(self, using: Optional[UsingType] = None, **kwargs: Any) -> bool: + """ + Returns ``True`` if the index already exists in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists`` unchanged. + """ + return bool( + self._get_connection(using).indices.exists(index=self._name, **kwargs) + ) + + def put_mapping( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Register specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_mapping`` unchanged. + """ + return self._get_connection(using).indices.put_mapping( + index=self._name, **kwargs + ) + + def get_mapping( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_mapping`` unchanged. + """ + return self._get_connection(using).indices.get_mapping( + index=self._name, **kwargs + ) + + def get_field_mapping( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve mapping definition of a specific field. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_field_mapping`` unchanged. + """ + return self._get_connection(using).indices.get_field_mapping( + index=self._name, **kwargs + ) + + def put_alias( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Create an alias for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_alias`` unchanged. + """ + return self._get_connection(using).indices.put_alias(index=self._name, **kwargs) + + def exists_alias(self, using: Optional[UsingType] = None, **kwargs: Any) -> bool: + """ + Return a boolean indicating whether given alias exists for this index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists_alias`` unchanged. + """ + return bool( + self._get_connection(using).indices.exists_alias(index=self._name, **kwargs) + ) + + def get_alias( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve a specified alias. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_alias`` unchanged. + """ + return self._get_connection(using).indices.get_alias(index=self._name, **kwargs) + + def delete_alias( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Delete specific alias. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.delete_alias`` unchanged. + """ + return self._get_connection(using).indices.delete_alias( + index=self._name, **kwargs + ) + + def get_settings( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve settings for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_settings`` unchanged. + """ + return self._get_connection(using).indices.get_settings( + index=self._name, **kwargs + ) + + def put_settings( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Change specific index level settings in real time. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_settings`` unchanged. + """ + return self._get_connection(using).indices.put_settings( + index=self._name, **kwargs + ) + + def stats( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve statistics on different operations happening on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.stats`` unchanged. + """ + return self._get_connection(using).indices.stats(index=self._name, **kwargs) + + def segments( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Provide low level segments information that a Lucene index (shard + level) is built with. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.segments`` unchanged. + """ + return self._get_connection(using).indices.segments(index=self._name, **kwargs) + + def validate_query( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Validate a potentially expensive query without executing it. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.validate_query`` unchanged. + """ + return self._get_connection(using).indices.validate_query( + index=self._name, **kwargs + ) + + def clear_cache( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Clear all caches or specific cached associated with the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.clear_cache`` unchanged. + """ + return self._get_connection(using).indices.clear_cache( + index=self._name, **kwargs + ) + + def recovery( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The indices recovery API provides insight into on-going shard + recoveries for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.recovery`` unchanged. + """ + return self._get_connection(using).indices.recovery(index=self._name, **kwargs) + + def shard_stores( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Provides store information for shard copies of the index. Store + information reports on which nodes shard copies exist, the shard copy + version, indicating how recent they are, and any exceptions encountered + while opening the shard index or from earlier engine failure. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.shard_stores`` unchanged. + """ + return self._get_connection(using).indices.shard_stores( + index=self._name, **kwargs + ) + + def forcemerge( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The force merge API allows to force merging of the index through an + API. The merge relates to the number of segments a Lucene index holds + within each shard. The force merge operation allows to reduce the + number of segments by merging them. + + This call will block until the merge is complete. If the http + connection is lost, the request will continue in the background, and + any new requests will block until the previous force merge is complete. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.forcemerge`` unchanged. + """ + return self._get_connection(using).indices.forcemerge( + index=self._name, **kwargs + ) + + def shrink( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The shrink index API allows you to shrink an existing index into a new + index with fewer primary shards. The number of primary shards in the + target index must be a factor of the shards in the source index. For + example an index with 8 primary shards can be shrunk into 4, 2 or 1 + primary shards or an index with 15 primary shards can be shrunk into 5, + 3 or 1. If the number of shards in the index is a prime number it can + only be shrunk into a single primary shard. Before shrinking, a + (primary or replica) copy of every shard in the index must be present + on the same node. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.shrink`` unchanged. + """ + return self._get_connection(using).indices.shrink(index=self._name, **kwargs) diff --git a/elasticsearch/dsl/_sync/mapping.py b/elasticsearch/dsl/_sync/mapping.py new file mode 100644 index 000000000..4ee0f282a --- /dev/null +++ b/elasticsearch/dsl/_sync/mapping.py @@ -0,0 +1,49 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List, Optional, Union + +from typing_extensions import Self + +from ..connections import get_connection +from ..mapping_base import MappingBase +from ..utils import UsingType + + +class Mapping(MappingBase): + @classmethod + def from_es( + cls, index: Optional[Union[str, List[str]]], using: UsingType = "default" + ) -> Self: + m = cls() + m.update_from_es(index, using) + return m + + def update_from_es( + self, index: Optional[Union[str, List[str]]], using: UsingType = "default" + ) -> None: + es = get_connection(using) + raw = es.indices.get_mapping(index=index) + _, raw = raw.popitem() + self._update_from_dict(raw["mappings"]) + + def save(self, index: str, using: UsingType = "default") -> None: + from .index import Index + + i = Index(index, using=using) + i.mapping(self) + i.save() diff --git a/elasticsearch/dsl/_sync/search.py b/elasticsearch/dsl/_sync/search.py new file mode 100644 index 000000000..f3e028347 --- /dev/null +++ b/elasticsearch/dsl/_sync/search.py @@ -0,0 +1,217 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import contextlib +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, cast + +from elasticsearch.exceptions import ApiError +from elasticsearch.helpers import scan +from typing_extensions import Self + +from ..connections import get_connection +from ..response import Response +from ..search_base import MultiSearchBase, SearchBase +from ..utils import _R, AttrDict, UsingType + + +class Search(SearchBase[_R]): + _using: UsingType + + def __iter__(self) -> Iterator[_R]: + """ + Iterate over the hits. + """ + + class ResultsIterator(Iterator[_R]): + def __init__(self, search: Search[_R]): + self.search = search + self.iterator: Optional[Iterator[_R]] = None + + def __next__(self) -> _R: + if self.iterator is None: + self.iterator = iter(self.search.execute()) + try: + return next(self.iterator) + except StopIteration: + raise StopIteration() + + return ResultsIterator(self) + + def count(self) -> int: + """ + Return the number of hits matching the query and filters. Note that + only the actual number is returned. + """ + if hasattr(self, "_response") and self._response.hits.total.relation == "eq": # type: ignore[attr-defined] + return cast(int, self._response.hits.total.value) # type: ignore[attr-defined] + + es = get_connection(self._using) + + d = self.to_dict(count=True) + # TODO: failed shards detection + resp = es.count( + index=self._index, + query=cast(Optional[Dict[str, Any]], d.get("query", None)), + **self._params, + ) + + return cast(int, resp["count"]) + + def execute(self, ignore_cache: bool = False) -> Response[_R]: + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + + :arg ignore_cache: if set to ``True``, consecutive calls will hit + ES, while cached result will be ignored. Defaults to `False` + """ + if ignore_cache or not hasattr(self, "_response"): + es = get_connection(self._using) + + self._response = self._response_class( + self, + ( + es.search(index=self._index, body=self.to_dict(), **self._params) + ).body, + ) + return self._response + + def scan(self) -> Iterator[_R]: + """ + Turn the search into a scan search and return a generator that will + iterate over all the documents matching the query. + + Use ``params`` method to specify any additional arguments you with to + pass to the underlying ``scan`` helper from ``elasticsearch-py`` - + https://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.scan + + The ``iterate()`` method should be preferred, as it provides similar + functionality using an Elasticsearch point in time. + """ + es = get_connection(self._using) + + for hit in scan(es, query=self.to_dict(), index=self._index, **self._params): + yield self._get_result(cast(AttrDict[Any], hit)) + + def delete(self) -> AttrDict[Any]: + """ + delete() executes the query by delegating to delete_by_query() + """ + + es = get_connection(self._using) + assert self._index is not None + + return AttrDict( + cast( + Dict[str, Any], + es.delete_by_query( + index=self._index, body=self.to_dict(), **self._params + ), + ) + ) + + @contextlib.contextmanager + def point_in_time(self, keep_alive: str = "1m") -> Iterator[Self]: + """ + Open a point in time (pit) that can be used across several searches. + + This method implements a context manager that returns a search object + configured to operate within the created pit. + + :arg keep_alive: the time to live for the point in time, renewed with each search request + """ + es = get_connection(self._using) + + pit = es.open_point_in_time(index=self._index or "*", keep_alive=keep_alive) + search = self.index().extra(pit={"id": pit["id"], "keep_alive": keep_alive}) + if not search._sort: + search = search.sort("_shard_doc") + yield search + es.close_point_in_time(id=pit["id"]) + + def iterate(self, keep_alive: str = "1m") -> Iterator[_R]: + """ + Return a generator that iterates over all the documents matching the query. + + This method uses a point in time to provide consistent results even when + the index is changing. It should be preferred over ``scan()``. + + :arg keep_alive: the time to live for the point in time, renewed with each new search request + """ + with self.point_in_time(keep_alive=keep_alive) as s: + while True: + r = s.execute() + for hit in r: + yield hit + if len(r.hits) == 0: + break + s = s.search_after() + + +class MultiSearch(MultiSearchBase[_R]): + """ + Combine multiple :class:`~elasticsearch.dsl.Search` objects into a single + request. + """ + + _using: UsingType + + if TYPE_CHECKING: + + def add(self, search: Search[_R]) -> Self: ... # type: ignore[override] + + def execute( + self, ignore_cache: bool = False, raise_on_error: bool = True + ) -> List[Response[_R]]: + """ + Execute the multi search request and return a list of search results. + """ + if ignore_cache or not hasattr(self, "_response"): + es = get_connection(self._using) + + responses = es.msearch( + index=self._index, body=self.to_dict(), **self._params + ) + + out: List[Response[_R]] = [] + for s, r in zip(self._searches, responses["responses"]): + if r.get("error", False): + if raise_on_error: + raise ApiError("N/A", meta=responses.meta, body=r) + r = None + else: + r = Response(s, r) + out.append(r) + + self._response = out + + return self._response + + +class EmptySearch(Search[_R]): + def count(self) -> int: + return 0 + + def execute(self, ignore_cache: bool = False) -> Response[_R]: + return self._response_class(self, {"hits": {"total": 0, "hits": []}}) + + def scan(self) -> Iterator[_R]: + return + yield # a bit strange, but this forces an empty generator function + + def delete(self) -> AttrDict[Any]: + return AttrDict[Any]({}) diff --git a/elasticsearch/dsl/_sync/update_by_query.py b/elasticsearch/dsl/_sync/update_by_query.py new file mode 100644 index 000000000..0caecc029 --- /dev/null +++ b/elasticsearch/dsl/_sync/update_by_query.py @@ -0,0 +1,45 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING + +from ..connections import get_connection +from ..update_by_query_base import UpdateByQueryBase +from ..utils import _R, UsingType + +if TYPE_CHECKING: + from ..response import UpdateByQueryResponse + + +class UpdateByQuery(UpdateByQueryBase[_R]): + _using: UsingType + + def execute(self) -> "UpdateByQueryResponse[_R]": + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + """ + es = get_connection(self._using) + assert self._index is not None + + self._response = self._response_class( + self, + ( + es.update_by_query(index=self._index, **self.to_dict(), **self._params) + ).body, + ) + return self._response diff --git a/elasticsearch/dsl/aggs.py b/elasticsearch/dsl/aggs.py new file mode 100644 index 000000000..6175027d7 --- /dev/null +++ b/elasticsearch/dsl/aggs.py @@ -0,0 +1,3731 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from copy import deepcopy +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Dict, + Generic, + Iterable, + Literal, + Mapping, + MutableMapping, + Optional, + Sequence, + Union, + cast, +) + +from elastic_transport.client_utils import DEFAULT + +from .query import Query +from .response.aggs import AggResponse, BucketData, FieldBucketData, TopHitsData +from .utils import _R, AttrDict, DslBase + +if TYPE_CHECKING: + from elastic_transport.client_utils import DefaultType + + from . import types + + from .document_base import InstrumentedField + from .search_base import SearchBase + + +def A( + name_or_agg: Union[MutableMapping[str, Any], "Agg[_R]", str], + filter: Optional[Union[str, "Query"]] = None, + **params: Any, +) -> "Agg[_R]": + if filter is not None: + if name_or_agg != "filter": + raise ValueError( + "Aggregation %r doesn't accept positional argument 'filter'." + % name_or_agg + ) + params["filter"] = filter + + # {"terms": {"field": "tags"}, "aggs": {...}} + if isinstance(name_or_agg, collections.abc.MutableMapping): + if params: + raise ValueError("A() cannot accept parameters when passing in a dict.") + # copy to avoid modifying in-place + agg = deepcopy(name_or_agg) + # pop out nested aggs + aggs = agg.pop("aggs", None) + # pop out meta data + meta = agg.pop("meta", None) + # should be {"terms": {"field": "tags"}} + if len(agg) != 1: + raise ValueError( + 'A() can only accept dict with an aggregation ({"terms": {...}}). ' + "Instead it got (%r)" % name_or_agg + ) + agg_type, params = agg.popitem() + if aggs: + params = params.copy() + params["aggs"] = aggs + if meta: + params = params.copy() + params["meta"] = meta + return Agg[_R].get_dsl_class(agg_type)(_expand__to_dot=False, **params) + + # Terms(...) just return the nested agg + elif isinstance(name_or_agg, Agg): + if params: + raise ValueError( + "A() cannot accept parameters when passing in an Agg object." + ) + return name_or_agg + + # "terms", field="tags" + return Agg[_R].get_dsl_class(name_or_agg)(**params) + + +class Agg(DslBase, Generic[_R]): + _type_name = "agg" + _type_shortcut = staticmethod(A) + name = "" + + def __contains__(self, key: str) -> bool: + return False + + def to_dict(self) -> Dict[str, Any]: + d = super().to_dict() + if isinstance(d[self.name], dict): + n = cast(Dict[str, Any], d[self.name]) + if "meta" in n: + d["meta"] = n.pop("meta") + return d + + def result(self, search: "SearchBase[_R]", data: Dict[str, Any]) -> AttrDict[Any]: + return AggResponse[_R](self, search, data) + + +class AggBase(Generic[_R]): + aggs: Dict[str, Agg[_R]] + _base: Agg[_R] + _params: Dict[str, Any] + _param_defs: ClassVar[Dict[str, Any]] = { + "aggs": {"type": "agg", "hash": True}, + } + + def __contains__(self, key: str) -> bool: + return key in self._params.get("aggs", {}) + + def __getitem__(self, agg_name: str) -> Agg[_R]: + agg = cast( + Agg[_R], self._params.setdefault("aggs", {})[agg_name] + ) # propagate KeyError + + # make sure we're not mutating a shared state - whenever accessing a + # bucket, return a shallow copy of it to be safe + if isinstance(agg, Bucket): + agg = A(agg.name, **agg._params) + # be sure to store the copy so any modifications to it will affect us + self._params["aggs"][agg_name] = agg + + return agg + + def __setitem__(self, agg_name: str, agg: Agg[_R]) -> None: + self.aggs[agg_name] = A(agg) + + def __iter__(self) -> Iterable[str]: + return iter(self.aggs) + + def _agg( + self, + bucket: bool, + name: str, + agg_type: Union[Dict[str, Any], Agg[_R], str], + *args: Any, + **params: Any, + ) -> Agg[_R]: + agg = self[name] = A(agg_type, *args, **params) + + # For chaining - when creating new buckets return them... + if bucket: + return agg + # otherwise return self._base so we can keep chaining + else: + return self._base + + def metric( + self, + name: str, + agg_type: Union[Dict[str, Any], Agg[_R], str], + *args: Any, + **params: Any, + ) -> Agg[_R]: + return self._agg(False, name, agg_type, *args, **params) + + def bucket( + self, + name: str, + agg_type: Union[Dict[str, Any], Agg[_R], str], + *args: Any, + **params: Any, + ) -> "Bucket[_R]": + return cast("Bucket[_R]", self._agg(True, name, agg_type, *args, **params)) + + def pipeline( + self, + name: str, + agg_type: Union[Dict[str, Any], Agg[_R], str], + *args: Any, + **params: Any, + ) -> "Pipeline[_R]": + return cast("Pipeline[_R]", self._agg(False, name, agg_type, *args, **params)) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return BucketData(self, search, data) # type: ignore + + +class Bucket(AggBase[_R], Agg[_R]): + def __init__(self, **params: Any): + super().__init__(**params) + # remember self for chaining + self._base = self + + def to_dict(self) -> Dict[str, Any]: + d = super(AggBase, self).to_dict() + if isinstance(d[self.name], dict): + n = cast(AttrDict[Any], d[self.name]) + if "aggs" in n: + d["aggs"] = n.pop("aggs") + return d + + +class Pipeline(Agg[_R]): + pass + + +class AdjacencyMatrix(Bucket[_R]): + """ + A bucket aggregation returning a form of adjacency matrix. The request + provides a collection of named filter expressions, similar to the + `filters` aggregation. Each bucket in the response represents a non- + empty cell in the matrix of intersecting filters. + + :arg filters: Filters used to create buckets. At least one filter is + required. + :arg separator: Separator used to concatenate filter names. Defaults + to &. + """ + + name = "adjacency_matrix" + _param_defs = { + "filters": {"type": "query", "hash": True}, + } + + def __init__( + self, + *, + filters: Union[Mapping[str, Query], "DefaultType"] = DEFAULT, + separator: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(filters=filters, separator=separator, **kwargs) + + +class AutoDateHistogram(Bucket[_R]): + """ + A multi-bucket aggregation similar to the date histogram, except + instead of providing an interval to use as the width of each bucket, a + target number of buckets is provided. + + :arg buckets: The target number of buckets. Defaults to `10` if + omitted. + :arg field: The field on which to run the aggregation. + :arg format: The date format used to format `key_as_string` in the + response. If no `format` is specified, the first date format + specified in the field mapping is used. + :arg minimum_interval: The minimum rounding interval. This can make + the collection process more efficient, as the aggregation will not + attempt to round at any interval lower than `minimum_interval`. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg offset: Time zone specified as a ISO 8601 UTC offset. + :arg params: + :arg script: + :arg time_zone: Time zone ID. + """ + + name = "auto_date_histogram" + + def __init__( + self, + *, + buckets: Union[int, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + minimum_interval: Union[ + Literal["second", "minute", "hour", "day", "month", "year"], "DefaultType" + ] = DEFAULT, + missing: Any = DEFAULT, + offset: Union[str, "DefaultType"] = DEFAULT, + params: Union[Mapping[str, Any], "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + time_zone: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + buckets=buckets, + field=field, + format=format, + minimum_interval=minimum_interval, + missing=missing, + offset=offset, + params=params, + script=script, + time_zone=time_zone, + **kwargs, + ) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return FieldBucketData(self, search, data) + + +class Avg(Agg[_R]): + """ + A single-value metrics aggregation that computes the average of + numeric values that are extracted from the aggregated documents. + + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "avg" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, field=field, missing=missing, script=script, **kwargs + ) + + +class AvgBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which calculates the mean value of a + specified metric in a sibling aggregation. The specified metric must + be numeric and the sibling aggregation must be a multi-bucket + aggregation. + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "avg_bucket" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class Boxplot(Agg[_R]): + """ + A metrics aggregation that computes a box plot of numeric values + extracted from the aggregated documents. + + :arg compression: Limits the maximum number of nodes used by the + underlying TDigest algorithm to `20 * compression`, enabling + control of memory usage and approximation error. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "boxplot" + + def __init__( + self, + *, + compression: Union[float, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + compression=compression, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class BucketScript(Pipeline[_R]): + """ + A parent pipeline aggregation which runs a script which can perform + per bucket computations on metrics in the parent multi-bucket + aggregation. + + :arg script: The script to run for this aggregation. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "bucket_script" + + def __init__( + self, + *, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + script=script, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class BucketSelector(Pipeline[_R]): + """ + A parent pipeline aggregation which runs a script to determine whether + the current bucket will be retained in the parent multi-bucket + aggregation. + + :arg script: The script to run for this aggregation. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "bucket_selector" + + def __init__( + self, + *, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + script=script, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class BucketSort(Bucket[_R]): + """ + A parent pipeline aggregation which sorts the buckets of its parent + multi-bucket aggregation. + + :arg from: Buckets in positions prior to `from` will be truncated. + :arg gap_policy: The policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg size: The number of buckets to return. Defaults to all buckets of + the parent aggregation. + :arg sort: The list of fields to sort on. + """ + + name = "bucket_sort" + + def __init__( + self, + *, + from_: Union[int, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + sort: Union[ + Union[Union[str, "InstrumentedField"], "types.SortOptions"], + Sequence[Union[Union[str, "InstrumentedField"], "types.SortOptions"]], + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + from_=from_, gap_policy=gap_policy, size=size, sort=sort, **kwargs + ) + + +class BucketCountKsTest(Pipeline[_R]): + """ + A sibling pipeline aggregation which runs a two sample + Kolmogorov–Smirnov test ("K-S test") against a provided distribution + and the distribution implied by the documents counts in the configured + sibling aggregation. + + :arg alternative: A list of string values indicating which K-S test + alternative to calculate. The valid values are: "greater", "less", + "two_sided". This parameter is key for determining the K-S + statistic used when calculating the K-S test. Default value is all + possible alternative hypotheses. + :arg fractions: A list of doubles indicating the distribution of the + samples with which to compare to the `buckets_path` results. In + typical usage this is the overall proportion of documents in each + bucket, which is compared with the actual document proportions in + each bucket from the sibling aggregation counts. The default is to + assume that overall documents are uniformly distributed on these + buckets, which they would be if one used equal percentiles of a + metric to define the bucket end points. + :arg sampling_method: Indicates the sampling methodology when + calculating the K-S test. Note, this is sampling of the returned + values. This determines the cumulative distribution function (CDF) + points used comparing the two samples. Default is `upper_tail`, + which emphasizes the upper end of the CDF points. Valid options + are: `upper_tail`, `uniform`, and `lower_tail`. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "bucket_count_ks_test" + + def __init__( + self, + *, + alternative: Union[Sequence[str], "DefaultType"] = DEFAULT, + fractions: Union[Sequence[float], "DefaultType"] = DEFAULT, + sampling_method: Union[str, "DefaultType"] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + alternative=alternative, + fractions=fractions, + sampling_method=sampling_method, + buckets_path=buckets_path, + **kwargs, + ) + + +class BucketCorrelation(Pipeline[_R]): + """ + A sibling pipeline aggregation which runs a correlation function on + the configured sibling multi-bucket aggregation. + + :arg function: (required) The correlation function to execute. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "bucket_correlation" + + def __init__( + self, + *, + function: Union[ + "types.BucketCorrelationFunction", Dict[str, Any], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__(function=function, buckets_path=buckets_path, **kwargs) + + +class Cardinality(Agg[_R]): + """ + A single-value metrics aggregation that calculates an approximate + count of distinct values. + + :arg precision_threshold: A unique count below which counts are + expected to be close to accurate. This allows to trade memory for + accuracy. Defaults to `3000` if omitted. + :arg rehash: + :arg execution_hint: Mechanism by which cardinality aggregations is + run. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "cardinality" + + def __init__( + self, + *, + precision_threshold: Union[int, "DefaultType"] = DEFAULT, + rehash: Union[bool, "DefaultType"] = DEFAULT, + execution_hint: Union[ + Literal[ + "global_ordinals", + "segment_ordinals", + "direct", + "save_memory_heuristic", + "save_time_heuristic", + ], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + precision_threshold=precision_threshold, + rehash=rehash, + execution_hint=execution_hint, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class CategorizeText(Bucket[_R]): + """ + A multi-bucket aggregation that groups semi-structured text into + buckets. + + :arg field: (required) The semi-structured text field to categorize. + :arg max_unique_tokens: The maximum number of unique tokens at any + position up to max_matched_tokens. Must be larger than 1. Smaller + values use less memory and create fewer categories. Larger values + will use more memory and create narrower categories. Max allowed + value is 100. Defaults to `50` if omitted. + :arg max_matched_tokens: The maximum number of token positions to + match on before attempting to merge categories. Larger values will + use more memory and create narrower categories. Max allowed value + is 100. Defaults to `5` if omitted. + :arg similarity_threshold: The minimum percentage of tokens that must + match for text to be added to the category bucket. Must be between + 1 and 100. The larger the value the narrower the categories. + Larger values will increase memory usage and create narrower + categories. Defaults to `50` if omitted. + :arg categorization_filters: This property expects an array of regular + expressions. The expressions are used to filter out matching + sequences from the categorization field values. You can use this + functionality to fine tune the categorization by excluding + sequences from consideration when categories are defined. For + example, you can exclude SQL statements that appear in your log + files. This property cannot be used at the same time as + categorization_analyzer. If you only want to define simple regular + expression filters that are applied prior to tokenization, setting + this property is the easiest method. If you also want to customize + the tokenizer or post-tokenization filtering, use the + categorization_analyzer property instead and include the filters + as pattern_replace character filters. + :arg categorization_analyzer: The categorization analyzer specifies + how the text is analyzed and tokenized before being categorized. + The syntax is very similar to that used to define the analyzer in + the [Analyze endpoint](https://www.elastic.co/guide/en/elasticsear + ch/reference/8.0/indices-analyze.html). This property cannot be + used at the same time as categorization_filters. + :arg shard_size: The number of categorization buckets to return from + each shard before merging all the results. + :arg size: The number of buckets to return. Defaults to `10` if + omitted. + :arg min_doc_count: The minimum number of documents in a bucket to be + returned to the results. + :arg shard_min_doc_count: The minimum number of documents in a bucket + to be returned from the shard before merging. + """ + + name = "categorize_text" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + max_unique_tokens: Union[int, "DefaultType"] = DEFAULT, + max_matched_tokens: Union[int, "DefaultType"] = DEFAULT, + similarity_threshold: Union[int, "DefaultType"] = DEFAULT, + categorization_filters: Union[Sequence[str], "DefaultType"] = DEFAULT, + categorization_analyzer: Union[ + str, "types.CustomCategorizeTextAnalyzer", Dict[str, Any], "DefaultType" + ] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + max_unique_tokens=max_unique_tokens, + max_matched_tokens=max_matched_tokens, + similarity_threshold=similarity_threshold, + categorization_filters=categorization_filters, + categorization_analyzer=categorization_analyzer, + shard_size=shard_size, + size=size, + min_doc_count=min_doc_count, + shard_min_doc_count=shard_min_doc_count, + **kwargs, + ) + + +class Children(Bucket[_R]): + """ + A single bucket aggregation that selects child documents that have the + specified type, as defined in a `join` field. + + :arg type: The child type that should be selected. + """ + + name = "children" + + def __init__(self, type: Union[str, "DefaultType"] = DEFAULT, **kwargs: Any): + super().__init__(type=type, **kwargs) + + +class Composite(Bucket[_R]): + """ + A multi-bucket aggregation that creates composite buckets from + different sources. Unlike the other multi-bucket aggregations, you can + use the `composite` aggregation to paginate *all* buckets from a + multi-level aggregation efficiently. + + :arg after: When paginating, use the `after_key` value returned in the + previous response to retrieve the next page. + :arg size: The number of composite buckets that should be returned. + Defaults to `10` if omitted. + :arg sources: The value sources used to build composite buckets. Keys + are returned in the order of the `sources` definition. + """ + + name = "composite" + + def __init__( + self, + *, + after: Union[ + Mapping[ + Union[str, "InstrumentedField"], Union[int, float, str, bool, None, Any] + ], + "DefaultType", + ] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + sources: Union[Sequence[Mapping[str, Agg[_R]]], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(after=after, size=size, sources=sources, **kwargs) + + +class CumulativeCardinality(Pipeline[_R]): + """ + A parent pipeline aggregation which calculates the cumulative + cardinality in a parent `histogram` or `date_histogram` aggregation. + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "cumulative_cardinality" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class CumulativeSum(Pipeline[_R]): + """ + A parent pipeline aggregation which calculates the cumulative sum of a + specified metric in a parent `histogram` or `date_histogram` + aggregation. + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "cumulative_sum" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class DateHistogram(Bucket[_R]): + """ + A multi-bucket values source based aggregation that can be applied on + date values or date range values extracted from the documents. It + dynamically builds fixed size (interval) buckets over the values. + + :arg calendar_interval: Calendar-aware interval. Can be specified + using the unit name, such as `month`, or as a single unit + quantity, such as `1M`. + :arg extended_bounds: Enables extending the bounds of the histogram + beyond the data itself. + :arg hard_bounds: Limits the histogram to specified bounds. + :arg field: The date field whose values are use to build a histogram. + :arg fixed_interval: Fixed intervals: a fixed number of SI units and + never deviate, regardless of where they fall on the calendar. + :arg format: The date format used to format `key_as_string` in the + response. If no `format` is specified, the first date format + specified in the field mapping is used. + :arg interval: + :arg min_doc_count: Only returns buckets that have `min_doc_count` + number of documents. By default, all buckets between the first + bucket that matches documents and the last one are returned. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg offset: Changes the start value of each bucket by the specified + positive (`+`) or negative offset (`-`) duration. + :arg order: The sort order of the returned buckets. + :arg params: + :arg script: + :arg time_zone: Time zone used for bucketing and rounding. Defaults to + Coordinated Universal Time (UTC). + :arg keyed: Set to `true` to associate a unique string key with each + bucket and return the ranges as a hash rather than an array. + """ + + name = "date_histogram" + + def __init__( + self, + *, + calendar_interval: Union[ + Literal[ + "second", "minute", "hour", "day", "week", "month", "quarter", "year" + ], + "DefaultType", + ] = DEFAULT, + extended_bounds: Union[ + "types.ExtendedBounds", Dict[str, Any], "DefaultType" + ] = DEFAULT, + hard_bounds: Union[ + "types.ExtendedBounds", Dict[str, Any], "DefaultType" + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + fixed_interval: Any = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + interval: Any = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + missing: Any = DEFAULT, + offset: Any = DEFAULT, + order: Union[ + Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]], + Sequence[Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]]], + "DefaultType", + ] = DEFAULT, + params: Union[Mapping[str, Any], "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + time_zone: Union[str, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + calendar_interval=calendar_interval, + extended_bounds=extended_bounds, + hard_bounds=hard_bounds, + field=field, + fixed_interval=fixed_interval, + format=format, + interval=interval, + min_doc_count=min_doc_count, + missing=missing, + offset=offset, + order=order, + params=params, + script=script, + time_zone=time_zone, + keyed=keyed, + **kwargs, + ) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return FieldBucketData(self, search, data) + + +class DateRange(Bucket[_R]): + """ + A multi-bucket value source based aggregation that enables the user to + define a set of date ranges - each representing a bucket. + + :arg field: The date field whose values are use to build ranges. + :arg format: The date format used to format `from` and `to` in the + response. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg ranges: Array of date ranges. + :arg time_zone: Time zone used to convert dates from another time zone + to UTC. + :arg keyed: Set to `true` to associate a unique string key with each + bucket and returns the ranges as a hash rather than an array. + """ + + name = "date_range" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + ranges: Union[ + Sequence["types.DateRangeExpression"], + Sequence[Dict[str, Any]], + "DefaultType", + ] = DEFAULT, + time_zone: Union[str, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + format=format, + missing=missing, + ranges=ranges, + time_zone=time_zone, + keyed=keyed, + **kwargs, + ) + + +class Derivative(Pipeline[_R]): + """ + A parent pipeline aggregation which calculates the derivative of a + specified metric in a parent `histogram` or `date_histogram` + aggregation. + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "derivative" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class DiversifiedSampler(Bucket[_R]): + """ + A filtering aggregation used to limit any sub aggregations' processing + to a sample of the top-scoring documents. Similar to the `sampler` + aggregation, but adds the ability to limit the number of matches that + share a common value. + + :arg execution_hint: The type of value used for de-duplication. + Defaults to `global_ordinals` if omitted. + :arg max_docs_per_value: Limits how many documents are permitted per + choice of de-duplicating value. Defaults to `1` if omitted. + :arg script: + :arg shard_size: Limits how many top-scoring documents are collected + in the sample processed on each shard. Defaults to `100` if + omitted. + :arg field: The field used to provide values used for de-duplication. + """ + + name = "diversified_sampler" + + def __init__( + self, + *, + execution_hint: Union[ + Literal["map", "global_ordinals", "bytes_hash"], "DefaultType" + ] = DEFAULT, + max_docs_per_value: Union[int, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + execution_hint=execution_hint, + max_docs_per_value=max_docs_per_value, + script=script, + shard_size=shard_size, + field=field, + **kwargs, + ) + + +class ExtendedStats(Agg[_R]): + """ + A multi-value metrics aggregation that computes stats over numeric + values extracted from the aggregated documents. + + :arg sigma: The number of standard deviations above/below the mean to + display. + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "extended_stats" + + def __init__( + self, + *, + sigma: Union[float, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + sigma=sigma, + format=format, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class ExtendedStatsBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which calculates a variety of stats + across all bucket of a specified metric in a sibling aggregation. + + :arg sigma: The number of standard deviations above/below the mean to + display. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "extended_stats_bucket" + + def __init__( + self, + *, + sigma: Union[float, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + sigma=sigma, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class FrequentItemSets(Agg[_R]): + """ + A bucket aggregation which finds frequent item sets, a form of + association rules mining that identifies items that often occur + together. + + :arg fields: (required) Fields to analyze. + :arg minimum_set_size: The minimum size of one item set. Defaults to + `1` if omitted. + :arg minimum_support: The minimum support of one item set. Defaults to + `0.1` if omitted. + :arg size: The number of top item sets to return. Defaults to `10` if + omitted. + :arg filter: Query that filters documents from analysis. + """ + + name = "frequent_item_sets" + _param_defs = { + "filter": {"type": "query"}, + } + + def __init__( + self, + *, + fields: Union[ + Sequence["types.FrequentItemSetsField"], + Sequence[Dict[str, Any]], + "DefaultType", + ] = DEFAULT, + minimum_set_size: Union[int, "DefaultType"] = DEFAULT, + minimum_support: Union[float, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + filter: Union[Query, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + fields=fields, + minimum_set_size=minimum_set_size, + minimum_support=minimum_support, + size=size, + filter=filter, + **kwargs, + ) + + +class Filter(Bucket[_R]): + """ + A single bucket aggregation that narrows the set of documents to those + that match a query. + + :arg filter: A single bucket aggregation that narrows the set of + documents to those that match a query. + """ + + name = "filter" + _param_defs = { + "filter": {"type": "query"}, + "aggs": {"type": "agg", "hash": True}, + } + + def __init__(self, filter: Union[Query, "DefaultType"] = DEFAULT, **kwargs: Any): + super().__init__(filter=filter, **kwargs) + + def to_dict(self) -> Dict[str, Any]: + d = super().to_dict() + if isinstance(d[self.name], dict): + n = cast(AttrDict[Any], d[self.name]) + n.update(n.pop("filter", {})) + return d + + +class Filters(Bucket[_R]): + """ + A multi-bucket aggregation where each bucket contains the documents + that match a query. + + :arg filters: Collection of queries from which to build buckets. + :arg other_bucket: Set to `true` to add a bucket to the response which + will contain all documents that do not match any of the given + filters. + :arg other_bucket_key: The key with which the other bucket is + returned. Defaults to `_other_` if omitted. + :arg keyed: By default, the named filters aggregation returns the + buckets as an object. Set to `false` to return the buckets as an + array of objects. Defaults to `True` if omitted. + """ + + name = "filters" + _param_defs = { + "filters": {"type": "query", "hash": True}, + "aggs": {"type": "agg", "hash": True}, + } + + def __init__( + self, + *, + filters: Union[Dict[str, Query], "DefaultType"] = DEFAULT, + other_bucket: Union[bool, "DefaultType"] = DEFAULT, + other_bucket_key: Union[str, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + filters=filters, + other_bucket=other_bucket, + other_bucket_key=other_bucket_key, + keyed=keyed, + **kwargs, + ) + + +class GeoBounds(Agg[_R]): + """ + A metric aggregation that computes the geographic bounding box + containing all values for a Geopoint or Geoshape field. + + :arg wrap_longitude: Specifies whether the bounding box should be + allowed to overlap the international date line. Defaults to `True` + if omitted. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "geo_bounds" + + def __init__( + self, + *, + wrap_longitude: Union[bool, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + wrap_longitude=wrap_longitude, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class GeoCentroid(Agg[_R]): + """ + A metric aggregation that computes the weighted centroid from all + coordinate values for geo fields. + + :arg count: + :arg location: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "geo_centroid" + + def __init__( + self, + *, + count: Union[int, "DefaultType"] = DEFAULT, + location: Union[ + "types.LatLonGeoLocation", + "types.GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + count=count, + location=location, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class GeoDistance(Bucket[_R]): + """ + A multi-bucket aggregation that works on `geo_point` fields. Evaluates + the distance of each document value from an origin point and + determines the buckets it belongs to, based on ranges defined in the + request. + + :arg distance_type: The distance calculation type. Defaults to `arc` + if omitted. + :arg field: A field of type `geo_point` used to evaluate the distance. + :arg origin: The origin used to evaluate the distance. + :arg ranges: An array of ranges used to bucket documents. + :arg unit: The distance unit. Defaults to `m` if omitted. + """ + + name = "geo_distance" + + def __init__( + self, + *, + distance_type: Union[Literal["arc", "plane"], "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + origin: Union[ + "types.LatLonGeoLocation", + "types.GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + ranges: Union[ + Sequence["types.AggregationRange"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + unit: Union[ + Literal["in", "ft", "yd", "mi", "nmi", "km", "m", "cm", "mm"], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + distance_type=distance_type, + field=field, + origin=origin, + ranges=ranges, + unit=unit, + **kwargs, + ) + + +class GeohashGrid(Bucket[_R]): + """ + A multi-bucket aggregation that groups `geo_point` and `geo_shape` + values into buckets that represent a grid. Each cell is labeled using + a geohash which is of user-definable precision. + + :arg bounds: The bounding box to filter the points in each bucket. + :arg field: Field containing indexed `geo_point` or `geo_shape` + values. If the field contains an array, `geohash_grid` aggregates + all array values. + :arg precision: The string length of the geohashes used to define + cells/buckets in the results. Defaults to `5` if omitted. + :arg shard_size: Allows for more accurate counting of the top cells + returned in the final result the aggregation. Defaults to + returning `max(10,(size x number-of-shards))` buckets from each + shard. + :arg size: The maximum number of geohash buckets to return. Defaults + to `10000` if omitted. + """ + + name = "geohash_grid" + + def __init__( + self, + *, + bounds: Union[ + "types.CoordsGeoBounds", + "types.TopLeftBottomRightGeoBounds", + "types.TopRightBottomLeftGeoBounds", + "types.WktGeoBounds", + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + precision: Union[float, str, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + bounds=bounds, + field=field, + precision=precision, + shard_size=shard_size, + size=size, + **kwargs, + ) + + +class GeoLine(Agg[_R]): + """ + Aggregates all `geo_point` values within a bucket into a `LineString` + ordered by the chosen sort field. + + :arg point: (required) The name of the geo_point field. + :arg sort: (required) The name of the numeric field to use as the sort + key for ordering the points. When the `geo_line` aggregation is + nested inside a `time_series` aggregation, this field defaults to + `@timestamp`, and any other value will result in error. + :arg include_sort: When `true`, returns an additional array of the + sort values in the feature properties. + :arg sort_order: The order in which the line is sorted (ascending or + descending). Defaults to `asc` if omitted. + :arg size: The maximum length of the line represented in the + aggregation. Valid sizes are between 1 and 10000. Defaults to + `10000` if omitted. + """ + + name = "geo_line" + + def __init__( + self, + *, + point: Union["types.GeoLinePoint", Dict[str, Any], "DefaultType"] = DEFAULT, + sort: Union["types.GeoLineSort", Dict[str, Any], "DefaultType"] = DEFAULT, + include_sort: Union[bool, "DefaultType"] = DEFAULT, + sort_order: Union[Literal["asc", "desc"], "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + point=point, + sort=sort, + include_sort=include_sort, + sort_order=sort_order, + size=size, + **kwargs, + ) + + +class GeotileGrid(Bucket[_R]): + """ + A multi-bucket aggregation that groups `geo_point` and `geo_shape` + values into buckets that represent a grid. Each cell corresponds to a + map tile as used by many online map sites. + + :arg field: Field containing indexed `geo_point` or `geo_shape` + values. If the field contains an array, `geotile_grid` aggregates + all array values. + :arg precision: Integer zoom of the key used to define cells/buckets + in the results. Values outside of the range [0,29] will be + rejected. Defaults to `7` if omitted. + :arg shard_size: Allows for more accurate counting of the top cells + returned in the final result the aggregation. Defaults to + returning `max(10,(size x number-of-shards))` buckets from each + shard. + :arg size: The maximum number of buckets to return. Defaults to + `10000` if omitted. + :arg bounds: A bounding box to filter the geo-points or geo-shapes in + each bucket. + """ + + name = "geotile_grid" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + precision: Union[float, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + bounds: Union[ + "types.CoordsGeoBounds", + "types.TopLeftBottomRightGeoBounds", + "types.TopRightBottomLeftGeoBounds", + "types.WktGeoBounds", + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + precision=precision, + shard_size=shard_size, + size=size, + bounds=bounds, + **kwargs, + ) + + +class GeohexGrid(Bucket[_R]): + """ + A multi-bucket aggregation that groups `geo_point` and `geo_shape` + values into buckets that represent a grid. Each cell corresponds to a + H3 cell index and is labeled using the H3Index representation. + + :arg field: (required) Field containing indexed `geo_point` or + `geo_shape` values. If the field contains an array, `geohex_grid` + aggregates all array values. + :arg precision: Integer zoom of the key used to defined cells or + buckets in the results. Value should be between 0-15. Defaults to + `6` if omitted. + :arg bounds: Bounding box used to filter the geo-points in each + bucket. + :arg size: Maximum number of buckets to return. Defaults to `10000` if + omitted. + :arg shard_size: Number of buckets returned from each shard. + """ + + name = "geohex_grid" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + precision: Union[int, "DefaultType"] = DEFAULT, + bounds: Union[ + "types.CoordsGeoBounds", + "types.TopLeftBottomRightGeoBounds", + "types.TopRightBottomLeftGeoBounds", + "types.WktGeoBounds", + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + precision=precision, + bounds=bounds, + size=size, + shard_size=shard_size, + **kwargs, + ) + + +class Global(Bucket[_R]): + """ + Defines a single bucket of all the documents within the search + execution context. This context is defined by the indices and the + document types you’re searching on, but is not influenced by the + search query itself. + """ + + name = "global" + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + + +class Histogram(Bucket[_R]): + """ + A multi-bucket values source based aggregation that can be applied on + numeric values or numeric range values extracted from the documents. + It dynamically builds fixed size (interval) buckets over the values. + + :arg extended_bounds: Enables extending the bounds of the histogram + beyond the data itself. + :arg hard_bounds: Limits the range of buckets in the histogram. It is + particularly useful in the case of open data ranges that can + result in a very large number of buckets. + :arg field: The name of the field to aggregate on. + :arg interval: The interval for the buckets. Must be a positive + decimal. + :arg min_doc_count: Only returns buckets that have `min_doc_count` + number of documents. By default, the response will fill gaps in + the histogram with empty buckets. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg offset: By default, the bucket keys start with 0 and then + continue in even spaced steps of `interval`. The bucket boundaries + can be shifted by using the `offset` option. + :arg order: The sort order of the returned buckets. By default, the + returned buckets are sorted by their key ascending. + :arg script: + :arg format: + :arg keyed: If `true`, returns buckets as a hash instead of an array, + keyed by the bucket keys. + """ + + name = "histogram" + + def __init__( + self, + *, + extended_bounds: Union[ + "types.ExtendedBounds", Dict[str, Any], "DefaultType" + ] = DEFAULT, + hard_bounds: Union[ + "types.ExtendedBounds", Dict[str, Any], "DefaultType" + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + interval: Union[float, "DefaultType"] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + missing: Union[float, "DefaultType"] = DEFAULT, + offset: Union[float, "DefaultType"] = DEFAULT, + order: Union[ + Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]], + Sequence[Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]]], + "DefaultType", + ] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + extended_bounds=extended_bounds, + hard_bounds=hard_bounds, + field=field, + interval=interval, + min_doc_count=min_doc_count, + missing=missing, + offset=offset, + order=order, + script=script, + format=format, + keyed=keyed, + **kwargs, + ) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return FieldBucketData(self, search, data) + + +class IPRange(Bucket[_R]): + """ + A multi-bucket value source based aggregation that enables the user to + define a set of IP ranges - each representing a bucket. + + :arg field: The date field whose values are used to build ranges. + :arg ranges: Array of IP ranges. + """ + + name = "ip_range" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + ranges: Union[ + Sequence["types.IpRangeAggregationRange"], + Sequence[Dict[str, Any]], + "DefaultType", + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__(field=field, ranges=ranges, **kwargs) + + +class IPPrefix(Bucket[_R]): + """ + A bucket aggregation that groups documents based on the network or + sub-network of an IP address. + + :arg field: (required) The IP address field to aggregation on. The + field mapping type must be `ip`. + :arg prefix_length: (required) Length of the network prefix. For IPv4 + addresses the accepted range is [0, 32]. For IPv6 addresses the + accepted range is [0, 128]. + :arg is_ipv6: Defines whether the prefix applies to IPv6 addresses. + :arg append_prefix_length: Defines whether the prefix length is + appended to IP address keys in the response. + :arg keyed: Defines whether buckets are returned as a hash rather than + an array in the response. + :arg min_doc_count: Minimum number of documents in a bucket for it to + be included in the response. Defaults to `1` if omitted. + """ + + name = "ip_prefix" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + prefix_length: Union[int, "DefaultType"] = DEFAULT, + is_ipv6: Union[bool, "DefaultType"] = DEFAULT, + append_prefix_length: Union[bool, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + prefix_length=prefix_length, + is_ipv6=is_ipv6, + append_prefix_length=append_prefix_length, + keyed=keyed, + min_doc_count=min_doc_count, + **kwargs, + ) + + +class Inference(Pipeline[_R]): + """ + A parent pipeline aggregation which loads a pre-trained model and + performs inference on the collated result fields from the parent + bucket aggregation. + + :arg model_id: (required) The ID or alias for the trained model. + :arg inference_config: Contains the inference type and its options. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "inference" + + def __init__( + self, + *, + model_id: Union[str, "DefaultType"] = DEFAULT, + inference_config: Union[ + "types.InferenceConfigContainer", Dict[str, Any], "DefaultType" + ] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + model_id=model_id, + inference_config=inference_config, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class Line(Agg[_R]): + """ + :arg point: (required) The name of the geo_point field. + :arg sort: (required) The name of the numeric field to use as the sort + key for ordering the points. When the `geo_line` aggregation is + nested inside a `time_series` aggregation, this field defaults to + `@timestamp`, and any other value will result in error. + :arg include_sort: When `true`, returns an additional array of the + sort values in the feature properties. + :arg sort_order: The order in which the line is sorted (ascending or + descending). Defaults to `asc` if omitted. + :arg size: The maximum length of the line represented in the + aggregation. Valid sizes are between 1 and 10000. Defaults to + `10000` if omitted. + """ + + name = "line" + + def __init__( + self, + *, + point: Union["types.GeoLinePoint", Dict[str, Any], "DefaultType"] = DEFAULT, + sort: Union["types.GeoLineSort", Dict[str, Any], "DefaultType"] = DEFAULT, + include_sort: Union[bool, "DefaultType"] = DEFAULT, + sort_order: Union[Literal["asc", "desc"], "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + point=point, + sort=sort, + include_sort=include_sort, + sort_order=sort_order, + size=size, + **kwargs, + ) + + +class MatrixStats(Agg[_R]): + """ + A numeric aggregation that computes the following statistics over a + set of document fields: `count`, `mean`, `variance`, `skewness`, + `kurtosis`, `covariance`, and `covariance`. + + :arg mode: Array value the aggregation will use for array or multi- + valued fields. Defaults to `avg` if omitted. + :arg fields: An array of fields for computing the statistics. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + """ + + name = "matrix_stats" + + def __init__( + self, + *, + mode: Union[ + Literal["min", "max", "sum", "avg", "median"], "DefaultType" + ] = DEFAULT, + fields: Union[ + Union[str, "InstrumentedField"], + Sequence[Union[str, "InstrumentedField"]], + "DefaultType", + ] = DEFAULT, + missing: Union[ + Mapping[Union[str, "InstrumentedField"], float], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__(mode=mode, fields=fields, missing=missing, **kwargs) + + +class Max(Agg[_R]): + """ + A single-value metrics aggregation that returns the maximum value + among the numeric values extracted from the aggregated documents. + + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "max" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, field=field, missing=missing, script=script, **kwargs + ) + + +class MaxBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which identifies the bucket(s) with the + maximum value of a specified metric in a sibling aggregation and + outputs both the value and the key(s) of the bucket(s). + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "max_bucket" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class MedianAbsoluteDeviation(Agg[_R]): + """ + A single-value aggregation that approximates the median absolute + deviation of its search results. + + :arg compression: Limits the maximum number of nodes used by the + underlying TDigest algorithm to `20 * compression`, enabling + control of memory usage and approximation error. Defaults to + `1000` if omitted. + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "median_absolute_deviation" + + def __init__( + self, + *, + compression: Union[float, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + compression=compression, + format=format, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class Min(Agg[_R]): + """ + A single-value metrics aggregation that returns the minimum value + among numeric values extracted from the aggregated documents. + + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "min" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, field=field, missing=missing, script=script, **kwargs + ) + + +class MinBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which identifies the bucket(s) with the + minimum value of a specified metric in a sibling aggregation and + outputs both the value and the key(s) of the bucket(s). + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "min_bucket" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class Missing(Bucket[_R]): + """ + A field data based single bucket aggregation, that creates a bucket of + all documents in the current document set context that are missing a + field value (effectively, missing a field or having the configured + NULL value set). + + :arg field: The name of the field. + :arg missing: + """ + + name = "missing" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(field=field, missing=missing, **kwargs) + + +class MovingAvg(Pipeline[_R]): + """ """ + + name = "moving_avg" + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + + +class LinearMovingAverageAggregation(MovingAvg[_R]): + """ + :arg model: (required) + :arg settings: (required) + :arg minimize: + :arg predict: + :arg window: + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + def __init__( + self, + *, + model: Any = DEFAULT, + settings: Union["types.EmptyObject", Dict[str, Any], "DefaultType"] = DEFAULT, + minimize: Union[bool, "DefaultType"] = DEFAULT, + predict: Union[int, "DefaultType"] = DEFAULT, + window: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + model=model, + settings=settings, + minimize=minimize, + predict=predict, + window=window, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class SimpleMovingAverageAggregation(MovingAvg[_R]): + """ + :arg model: (required) + :arg settings: (required) + :arg minimize: + :arg predict: + :arg window: + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + def __init__( + self, + *, + model: Any = DEFAULT, + settings: Union["types.EmptyObject", Dict[str, Any], "DefaultType"] = DEFAULT, + minimize: Union[bool, "DefaultType"] = DEFAULT, + predict: Union[int, "DefaultType"] = DEFAULT, + window: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + model=model, + settings=settings, + minimize=minimize, + predict=predict, + window=window, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class EwmaMovingAverageAggregation(MovingAvg[_R]): + """ + :arg model: (required) + :arg settings: (required) + :arg minimize: + :arg predict: + :arg window: + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + def __init__( + self, + *, + model: Any = DEFAULT, + settings: Union[ + "types.EwmaModelSettings", Dict[str, Any], "DefaultType" + ] = DEFAULT, + minimize: Union[bool, "DefaultType"] = DEFAULT, + predict: Union[int, "DefaultType"] = DEFAULT, + window: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + model=model, + settings=settings, + minimize=minimize, + predict=predict, + window=window, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class HoltMovingAverageAggregation(MovingAvg[_R]): + """ + :arg model: (required) + :arg settings: (required) + :arg minimize: + :arg predict: + :arg window: + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + def __init__( + self, + *, + model: Any = DEFAULT, + settings: Union[ + "types.HoltLinearModelSettings", Dict[str, Any], "DefaultType" + ] = DEFAULT, + minimize: Union[bool, "DefaultType"] = DEFAULT, + predict: Union[int, "DefaultType"] = DEFAULT, + window: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + model=model, + settings=settings, + minimize=minimize, + predict=predict, + window=window, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class HoltWintersMovingAverageAggregation(MovingAvg[_R]): + """ + :arg model: (required) + :arg settings: (required) + :arg minimize: + :arg predict: + :arg window: + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + def __init__( + self, + *, + model: Any = DEFAULT, + settings: Union[ + "types.HoltWintersModelSettings", Dict[str, Any], "DefaultType" + ] = DEFAULT, + minimize: Union[bool, "DefaultType"] = DEFAULT, + predict: Union[int, "DefaultType"] = DEFAULT, + window: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + model=model, + settings=settings, + minimize=minimize, + predict=predict, + window=window, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class MovingPercentiles(Pipeline[_R]): + """ + Given an ordered series of percentiles, "slides" a window across those + percentiles and computes cumulative percentiles. + + :arg window: The size of window to "slide" across the histogram. + :arg shift: By default, the window consists of the last n values + excluding the current bucket. Increasing `shift` by 1, moves the + starting window position by 1 to the right. + :arg keyed: + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "moving_percentiles" + + def __init__( + self, + *, + window: Union[int, "DefaultType"] = DEFAULT, + shift: Union[int, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + window=window, + shift=shift, + keyed=keyed, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class MovingFn(Pipeline[_R]): + """ + Given an ordered series of data, "slides" a window across the data and + runs a custom script on each window of data. For convenience, a number + of common functions are predefined such as `min`, `max`, and moving + averages. + + :arg script: The script that should be executed on each window of + data. + :arg shift: By default, the window consists of the last n values + excluding the current bucket. Increasing `shift` by 1, moves the + starting window position by 1 to the right. + :arg window: The size of window to "slide" across the histogram. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "moving_fn" + + def __init__( + self, + *, + script: Union[str, "DefaultType"] = DEFAULT, + shift: Union[int, "DefaultType"] = DEFAULT, + window: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + script=script, + shift=shift, + window=window, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class MultiTerms(Bucket[_R]): + """ + A multi-bucket value source based aggregation where buckets are + dynamically built - one per unique set of values. + + :arg terms: (required) The field from which to generate sets of terms. + :arg collect_mode: Specifies the strategy for data collection. + Defaults to `breadth_first` if omitted. + :arg order: Specifies the sort order of the buckets. Defaults to + sorting by descending document count. + :arg min_doc_count: The minimum number of documents in a bucket for it + to be returned. Defaults to `1` if omitted. + :arg shard_min_doc_count: The minimum number of documents in a bucket + on each shard for it to be returned. Defaults to `1` if omitted. + :arg shard_size: The number of candidate terms produced by each shard. + By default, `shard_size` will be automatically estimated based on + the number of shards and the `size` parameter. + :arg show_term_doc_count_error: Calculates the doc count error on per + term basis. + :arg size: The number of term buckets should be returned out of the + overall terms list. Defaults to `10` if omitted. + """ + + name = "multi_terms" + + def __init__( + self, + *, + terms: Union[ + Sequence["types.MultiTermLookup"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + collect_mode: Union[ + Literal["depth_first", "breadth_first"], "DefaultType" + ] = DEFAULT, + order: Union[ + Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]], + Sequence[Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]]], + "DefaultType", + ] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + show_term_doc_count_error: Union[bool, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + terms=terms, + collect_mode=collect_mode, + order=order, + min_doc_count=min_doc_count, + shard_min_doc_count=shard_min_doc_count, + shard_size=shard_size, + show_term_doc_count_error=show_term_doc_count_error, + size=size, + **kwargs, + ) + + +class Nested(Bucket[_R]): + """ + A special single bucket aggregation that enables aggregating nested + documents. + + :arg path: The path to the field of type `nested`. + """ + + name = "nested" + + def __init__( + self, + path: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(path=path, **kwargs) + + +class Normalize(Pipeline[_R]): + """ + A parent pipeline aggregation which calculates the specific + normalized/rescaled value for a specific bucket value. + + :arg method: The specific method to apply. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "normalize" + + def __init__( + self, + *, + method: Union[ + Literal[ + "rescale_0_1", + "rescale_0_100", + "percent_of_sum", + "mean", + "z-score", + "softmax", + ], + "DefaultType", + ] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + method=method, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class Parent(Bucket[_R]): + """ + A special single bucket aggregation that selects parent documents that + have the specified type, as defined in a `join` field. + + :arg type: The child type that should be selected. + """ + + name = "parent" + + def __init__(self, type: Union[str, "DefaultType"] = DEFAULT, **kwargs: Any): + super().__init__(type=type, **kwargs) + + +class PercentileRanks(Agg[_R]): + """ + A multi-value metrics aggregation that calculates one or more + percentile ranks over numeric values extracted from the aggregated + documents. + + :arg keyed: By default, the aggregation associates a unique string key + with each bucket and returns the ranges as a hash rather than an + array. Set to `false` to disable this behavior. Defaults to `True` + if omitted. + :arg values: An array of values for which to calculate the percentile + ranks. + :arg hdr: Uses the alternative High Dynamic Range Histogram algorithm + to calculate percentile ranks. + :arg tdigest: Sets parameters for the default TDigest algorithm used + to calculate percentile ranks. + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "percentile_ranks" + + def __init__( + self, + *, + keyed: Union[bool, "DefaultType"] = DEFAULT, + values: Union[Sequence[float], None, "DefaultType"] = DEFAULT, + hdr: Union["types.HdrMethod", Dict[str, Any], "DefaultType"] = DEFAULT, + tdigest: Union["types.TDigest", Dict[str, Any], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + keyed=keyed, + values=values, + hdr=hdr, + tdigest=tdigest, + format=format, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class Percentiles(Agg[_R]): + """ + A multi-value metrics aggregation that calculates one or more + percentiles over numeric values extracted from the aggregated + documents. + + :arg keyed: By default, the aggregation associates a unique string key + with each bucket and returns the ranges as a hash rather than an + array. Set to `false` to disable this behavior. Defaults to `True` + if omitted. + :arg percents: The percentiles to calculate. + :arg hdr: Uses the alternative High Dynamic Range Histogram algorithm + to calculate percentiles. + :arg tdigest: Sets parameters for the default TDigest algorithm used + to calculate percentiles. + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "percentiles" + + def __init__( + self, + *, + keyed: Union[bool, "DefaultType"] = DEFAULT, + percents: Union[Sequence[float], "DefaultType"] = DEFAULT, + hdr: Union["types.HdrMethod", Dict[str, Any], "DefaultType"] = DEFAULT, + tdigest: Union["types.TDigest", Dict[str, Any], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + keyed=keyed, + percents=percents, + hdr=hdr, + tdigest=tdigest, + format=format, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class PercentilesBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which calculates percentiles across all + bucket of a specified metric in a sibling aggregation. + + :arg percents: The list of percentiles to calculate. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "percentiles_bucket" + + def __init__( + self, + *, + percents: Union[Sequence[float], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + percents=percents, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class Range(Bucket[_R]): + """ + A multi-bucket value source based aggregation that enables the user to + define a set of ranges - each representing a bucket. + + :arg field: The date field whose values are use to build ranges. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg ranges: An array of ranges used to bucket documents. + :arg script: + :arg keyed: Set to `true` to associate a unique string key with each + bucket and return the ranges as a hash rather than an array. + :arg format: + """ + + name = "range" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[int, "DefaultType"] = DEFAULT, + ranges: Union[ + Sequence["types.AggregationRange"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + missing=missing, + ranges=ranges, + script=script, + keyed=keyed, + format=format, + **kwargs, + ) + + +class RareTerms(Bucket[_R]): + """ + A multi-bucket value source based aggregation which finds "rare" + terms — terms that are at the long-tail of the distribution and are + not frequent. + + :arg exclude: Terms that should be excluded from the aggregation. + :arg field: The field from which to return rare terms. + :arg include: Terms that should be included in the aggregation. + :arg max_doc_count: The maximum number of documents a term should + appear in. Defaults to `1` if omitted. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg precision: The precision of the internal CuckooFilters. Smaller + precision leads to better approximation, but higher memory usage. + Defaults to `0.001` if omitted. + :arg value_type: + """ + + name = "rare_terms" + + def __init__( + self, + *, + exclude: Union[str, Sequence[str], "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + include: Union[ + str, Sequence[str], "types.TermsPartition", Dict[str, Any], "DefaultType" + ] = DEFAULT, + max_doc_count: Union[int, "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + precision: Union[float, "DefaultType"] = DEFAULT, + value_type: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + exclude=exclude, + field=field, + include=include, + max_doc_count=max_doc_count, + missing=missing, + precision=precision, + value_type=value_type, + **kwargs, + ) + + +class Rate(Agg[_R]): + """ + Calculates a rate of documents or a field in each bucket. Can only be + used inside a `date_histogram` or `composite` aggregation. + + :arg unit: The interval used to calculate the rate. By default, the + interval of the `date_histogram` is used. + :arg mode: How the rate is calculated. Defaults to `sum` if omitted. + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "rate" + + def __init__( + self, + *, + unit: Union[ + Literal[ + "second", "minute", "hour", "day", "week", "month", "quarter", "year" + ], + "DefaultType", + ] = DEFAULT, + mode: Union[Literal["sum", "value_count"], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + unit=unit, + mode=mode, + format=format, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class ReverseNested(Bucket[_R]): + """ + A special single bucket aggregation that enables aggregating on parent + documents from nested documents. Should only be defined inside a + `nested` aggregation. + + :arg path: Defines the nested object field that should be joined back + to. The default is empty, which means that it joins back to the + root/main document level. + """ + + name = "reverse_nested" + + def __init__( + self, + path: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(path=path, **kwargs) + + +class RandomSampler(Bucket[_R]): + """ + A single bucket aggregation that randomly includes documents in the + aggregated results. Sampling provides significant speed improvement at + the cost of accuracy. + + :arg probability: (required) The probability that a document will be + included in the aggregated data. Must be greater than 0, less than + 0.5, or exactly 1. The lower the probability, the fewer documents + are matched. + :arg seed: The seed to generate the random sampling of documents. When + a seed is provided, the random subset of documents is the same + between calls. + :arg shard_seed: When combined with seed, setting shard_seed ensures + 100% consistent sampling over shards where data is exactly the + same. + """ + + name = "random_sampler" + + def __init__( + self, + *, + probability: Union[float, "DefaultType"] = DEFAULT, + seed: Union[int, "DefaultType"] = DEFAULT, + shard_seed: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + probability=probability, seed=seed, shard_seed=shard_seed, **kwargs + ) + + +class Sampler(Bucket[_R]): + """ + A filtering aggregation used to limit any sub aggregations' processing + to a sample of the top-scoring documents. + + :arg shard_size: Limits how many top-scoring documents are collected + in the sample processed on each shard. Defaults to `100` if + omitted. + """ + + name = "sampler" + + def __init__(self, shard_size: Union[int, "DefaultType"] = DEFAULT, **kwargs: Any): + super().__init__(shard_size=shard_size, **kwargs) + + +class ScriptedMetric(Agg[_R]): + """ + A metric aggregation that uses scripts to provide a metric output. + + :arg combine_script: Runs once on each shard after document collection + is complete. Allows the aggregation to consolidate the state + returned from each shard. + :arg init_script: Runs prior to any collection of documents. Allows + the aggregation to set up any initial state. + :arg map_script: Run once per document collected. If no + `combine_script` is specified, the resulting state needs to be + stored in the `state` object. + :arg params: A global object with script parameters for `init`, `map` + and `combine` scripts. It is shared between the scripts. + :arg reduce_script: Runs once on the coordinating node after all + shards have returned their results. The script is provided with + access to a variable `states`, which is an array of the result of + the `combine_script` on each shard. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "scripted_metric" + + def __init__( + self, + *, + combine_script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + init_script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + map_script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + params: Union[Mapping[str, Any], "DefaultType"] = DEFAULT, + reduce_script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + combine_script=combine_script, + init_script=init_script, + map_script=map_script, + params=params, + reduce_script=reduce_script, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class SerialDiff(Pipeline[_R]): + """ + An aggregation that subtracts values in a time series from themselves + at different time lags or periods. + + :arg lag: The historical bucket to subtract from the current value. + Must be a positive, non-zero integer. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "serial_diff" + + def __init__( + self, + *, + lag: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + lag=lag, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class SignificantTerms(Bucket[_R]): + """ + Returns interesting or unusual occurrences of terms in a set. + + :arg background_filter: A background filter that can be used to focus + in on significant terms within a narrower context, instead of the + entire index. + :arg chi_square: Use Chi square, as described in "Information + Retrieval", Manning et al., Chapter 13.5.2, as the significance + score. + :arg exclude: Terms to exclude. + :arg execution_hint: Mechanism by which the aggregation should be + executed: using field values directly or using global ordinals. + :arg field: The field from which to return significant terms. + :arg gnd: Use Google normalized distance as described in "The Google + Similarity Distance", Cilibrasi and Vitanyi, 2007, as the + significance score. + :arg include: Terms to include. + :arg jlh: Use JLH score as the significance score. + :arg min_doc_count: Only return terms that are found in more than + `min_doc_count` hits. Defaults to `3` if omitted. + :arg mutual_information: Use mutual information as described in + "Information Retrieval", Manning et al., Chapter 13.5.1, as the + significance score. + :arg percentage: A simple calculation of the number of documents in + the foreground sample with a term divided by the number of + documents in the background with the term. + :arg script_heuristic: Customized score, implemented via a script. + :arg shard_min_doc_count: Regulates the certainty a shard has if the + term should actually be added to the candidate list or not with + respect to the `min_doc_count`. Terms will only be considered if + their local shard frequency within the set is higher than the + `shard_min_doc_count`. + :arg shard_size: Can be used to control the volumes of candidate terms + produced by each shard. By default, `shard_size` will be + automatically estimated based on the number of shards and the + `size` parameter. + :arg size: The number of buckets returned out of the overall terms + list. + """ + + name = "significant_terms" + _param_defs = { + "background_filter": {"type": "query"}, + } + + def __init__( + self, + *, + background_filter: Union[Query, "DefaultType"] = DEFAULT, + chi_square: Union[ + "types.ChiSquareHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + exclude: Union[str, Sequence[str], "DefaultType"] = DEFAULT, + execution_hint: Union[ + Literal[ + "map", + "global_ordinals", + "global_ordinals_hash", + "global_ordinals_low_cardinality", + ], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + gnd: Union[ + "types.GoogleNormalizedDistanceHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + include: Union[ + str, Sequence[str], "types.TermsPartition", Dict[str, Any], "DefaultType" + ] = DEFAULT, + jlh: Union["types.EmptyObject", Dict[str, Any], "DefaultType"] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + mutual_information: Union[ + "types.MutualInformationHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + percentage: Union[ + "types.PercentageScoreHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + script_heuristic: Union[ + "types.ScriptedHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + background_filter=background_filter, + chi_square=chi_square, + exclude=exclude, + execution_hint=execution_hint, + field=field, + gnd=gnd, + include=include, + jlh=jlh, + min_doc_count=min_doc_count, + mutual_information=mutual_information, + percentage=percentage, + script_heuristic=script_heuristic, + shard_min_doc_count=shard_min_doc_count, + shard_size=shard_size, + size=size, + **kwargs, + ) + + +class SignificantText(Bucket[_R]): + """ + Returns interesting or unusual occurrences of free-text terms in a + set. + + :arg background_filter: A background filter that can be used to focus + in on significant terms within a narrower context, instead of the + entire index. + :arg chi_square: Use Chi square, as described in "Information + Retrieval", Manning et al., Chapter 13.5.2, as the significance + score. + :arg exclude: Values to exclude. + :arg execution_hint: Determines whether the aggregation will use field + values directly or global ordinals. + :arg field: The field from which to return significant text. + :arg filter_duplicate_text: Whether to out duplicate text to deal with + noisy data. + :arg gnd: Use Google normalized distance as described in "The Google + Similarity Distance", Cilibrasi and Vitanyi, 2007, as the + significance score. + :arg include: Values to include. + :arg jlh: Use JLH score as the significance score. + :arg min_doc_count: Only return values that are found in more than + `min_doc_count` hits. Defaults to `3` if omitted. + :arg mutual_information: Use mutual information as described in + "Information Retrieval", Manning et al., Chapter 13.5.1, as the + significance score. + :arg percentage: A simple calculation of the number of documents in + the foreground sample with a term divided by the number of + documents in the background with the term. + :arg script_heuristic: Customized score, implemented via a script. + :arg shard_min_doc_count: Regulates the certainty a shard has if the + values should actually be added to the candidate list or not with + respect to the min_doc_count. Values will only be considered if + their local shard frequency within the set is higher than the + `shard_min_doc_count`. + :arg shard_size: The number of candidate terms produced by each shard. + By default, `shard_size` will be automatically estimated based on + the number of shards and the `size` parameter. + :arg size: The number of buckets returned out of the overall terms + list. + :arg source_fields: Overrides the JSON `_source` fields from which + text will be analyzed. + """ + + name = "significant_text" + _param_defs = { + "background_filter": {"type": "query"}, + } + + def __init__( + self, + *, + background_filter: Union[Query, "DefaultType"] = DEFAULT, + chi_square: Union[ + "types.ChiSquareHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + exclude: Union[str, Sequence[str], "DefaultType"] = DEFAULT, + execution_hint: Union[ + Literal[ + "map", + "global_ordinals", + "global_ordinals_hash", + "global_ordinals_low_cardinality", + ], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + filter_duplicate_text: Union[bool, "DefaultType"] = DEFAULT, + gnd: Union[ + "types.GoogleNormalizedDistanceHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + include: Union[ + str, Sequence[str], "types.TermsPartition", Dict[str, Any], "DefaultType" + ] = DEFAULT, + jlh: Union["types.EmptyObject", Dict[str, Any], "DefaultType"] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + mutual_information: Union[ + "types.MutualInformationHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + percentage: Union[ + "types.PercentageScoreHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + script_heuristic: Union[ + "types.ScriptedHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + source_fields: Union[ + Union[str, "InstrumentedField"], + Sequence[Union[str, "InstrumentedField"]], + "DefaultType", + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + background_filter=background_filter, + chi_square=chi_square, + exclude=exclude, + execution_hint=execution_hint, + field=field, + filter_duplicate_text=filter_duplicate_text, + gnd=gnd, + include=include, + jlh=jlh, + min_doc_count=min_doc_count, + mutual_information=mutual_information, + percentage=percentage, + script_heuristic=script_heuristic, + shard_min_doc_count=shard_min_doc_count, + shard_size=shard_size, + size=size, + source_fields=source_fields, + **kwargs, + ) + + +class Stats(Agg[_R]): + """ + A multi-value metrics aggregation that computes stats over numeric + values extracted from the aggregated documents. + + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "stats" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, field=field, missing=missing, script=script, **kwargs + ) + + +class StatsBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which calculates a variety of stats + across all bucket of a specified metric in a sibling aggregation. + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "stats_bucket" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class StringStats(Agg[_R]): + """ + A multi-value metrics aggregation that computes statistics over string + values extracted from the aggregated documents. + + :arg show_distribution: Shows the probability distribution for all + characters. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "string_stats" + + def __init__( + self, + *, + show_distribution: Union[bool, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + show_distribution=show_distribution, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class Sum(Agg[_R]): + """ + A single-value metrics aggregation that sums numeric values that are + extracted from the aggregated documents. + + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "sum" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, field=field, missing=missing, script=script, **kwargs + ) + + +class SumBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which calculates the sum of a specified + metric across all buckets in a sibling aggregation. + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "sum_bucket" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class Terms(Bucket[_R]): + """ + A multi-bucket value source based aggregation where buckets are + dynamically built - one per unique value. + + :arg collect_mode: Determines how child aggregations should be + calculated: breadth-first or depth-first. + :arg exclude: Values to exclude. Accepts regular expressions and + partitions. + :arg execution_hint: Determines whether the aggregation will use field + values directly or global ordinals. + :arg field: The field from which to return terms. + :arg include: Values to include. Accepts regular expressions and + partitions. + :arg min_doc_count: Only return values that are found in more than + `min_doc_count` hits. Defaults to `1` if omitted. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg missing_order: + :arg missing_bucket: + :arg value_type: Coerced unmapped fields into the specified type. + :arg order: Specifies the sort order of the buckets. Defaults to + sorting by descending document count. + :arg script: + :arg shard_min_doc_count: Regulates the certainty a shard has if the + term should actually be added to the candidate list or not with + respect to the `min_doc_count`. Terms will only be considered if + their local shard frequency within the set is higher than the + `shard_min_doc_count`. + :arg shard_size: The number of candidate terms produced by each shard. + By default, `shard_size` will be automatically estimated based on + the number of shards and the `size` parameter. + :arg show_term_doc_count_error: Set to `true` to return the + `doc_count_error_upper_bound`, which is an upper bound to the + error on the `doc_count` returned by each shard. + :arg size: The number of buckets returned out of the overall terms + list. Defaults to `10` if omitted. + :arg format: + """ + + name = "terms" + + def __init__( + self, + *, + collect_mode: Union[ + Literal["depth_first", "breadth_first"], "DefaultType" + ] = DEFAULT, + exclude: Union[str, Sequence[str], "DefaultType"] = DEFAULT, + execution_hint: Union[ + Literal[ + "map", + "global_ordinals", + "global_ordinals_hash", + "global_ordinals_low_cardinality", + ], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + include: Union[ + str, Sequence[str], "types.TermsPartition", Dict[str, Any], "DefaultType" + ] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + missing_order: Union[ + Literal["first", "last", "default"], "DefaultType" + ] = DEFAULT, + missing_bucket: Union[bool, "DefaultType"] = DEFAULT, + value_type: Union[str, "DefaultType"] = DEFAULT, + order: Union[ + Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]], + Sequence[Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]]], + "DefaultType", + ] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + show_term_doc_count_error: Union[bool, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + collect_mode=collect_mode, + exclude=exclude, + execution_hint=execution_hint, + field=field, + include=include, + min_doc_count=min_doc_count, + missing=missing, + missing_order=missing_order, + missing_bucket=missing_bucket, + value_type=value_type, + order=order, + script=script, + shard_min_doc_count=shard_min_doc_count, + shard_size=shard_size, + show_term_doc_count_error=show_term_doc_count_error, + size=size, + format=format, + **kwargs, + ) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return FieldBucketData(self, search, data) + + +class TimeSeries(Bucket[_R]): + """ + The time series aggregation queries data created using a time series + index. This is typically data such as metrics or other data streams + with a time component, and requires creating an index using the time + series mode. + + :arg size: The maximum number of results to return. Defaults to + `10000` if omitted. + :arg keyed: Set to `true` to associate a unique string key with each + bucket and returns the ranges as a hash rather than an array. + """ + + name = "time_series" + + def __init__( + self, + *, + size: Union[int, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(size=size, keyed=keyed, **kwargs) + + +class TopHits(Agg[_R]): + """ + A metric aggregation that returns the top matching documents per + bucket. + + :arg docvalue_fields: Fields for which to return doc values. + :arg explain: If `true`, returns detailed information about score + computation as part of a hit. + :arg fields: Array of wildcard (*) patterns. The request returns + values for field names matching these patterns in the hits.fields + property of the response. + :arg from: Starting document offset. + :arg highlight: Specifies the highlighter to use for retrieving + highlighted snippets from one or more fields in the search + results. + :arg script_fields: Returns the result of one or more script + evaluations for each hit. + :arg size: The maximum number of top matching hits to return per + bucket. Defaults to `3` if omitted. + :arg sort: Sort order of the top matching hits. By default, the hits + are sorted by the score of the main query. + :arg _source: Selects the fields of the source that are returned. + :arg stored_fields: Returns values for the specified stored fields + (fields that use the `store` mapping option). + :arg track_scores: If `true`, calculates and returns document scores, + even if the scores are not used for sorting. + :arg version: If `true`, returns document version as part of a hit. + :arg seq_no_primary_term: If `true`, returns sequence number and + primary term of the last modification of each hit. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "top_hits" + + def __init__( + self, + *, + docvalue_fields: Union[ + Sequence["types.FieldAndFormat"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + explain: Union[bool, "DefaultType"] = DEFAULT, + fields: Union[ + Sequence["types.FieldAndFormat"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + from_: Union[int, "DefaultType"] = DEFAULT, + highlight: Union["types.Highlight", Dict[str, Any], "DefaultType"] = DEFAULT, + script_fields: Union[ + Mapping[str, "types.ScriptField"], Dict[str, Any], "DefaultType" + ] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + sort: Union[ + Union[Union[str, "InstrumentedField"], "types.SortOptions"], + Sequence[Union[Union[str, "InstrumentedField"], "types.SortOptions"]], + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + _source: Union[ + bool, "types.SourceFilter", Dict[str, Any], "DefaultType" + ] = DEFAULT, + stored_fields: Union[ + Union[str, "InstrumentedField"], + Sequence[Union[str, "InstrumentedField"]], + "DefaultType", + ] = DEFAULT, + track_scores: Union[bool, "DefaultType"] = DEFAULT, + version: Union[bool, "DefaultType"] = DEFAULT, + seq_no_primary_term: Union[bool, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + docvalue_fields=docvalue_fields, + explain=explain, + fields=fields, + from_=from_, + highlight=highlight, + script_fields=script_fields, + size=size, + sort=sort, + _source=_source, + stored_fields=stored_fields, + track_scores=track_scores, + version=version, + seq_no_primary_term=seq_no_primary_term, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return TopHitsData(self, search, data) + + +class TTest(Agg[_R]): + """ + A metrics aggregation that performs a statistical hypothesis test in + which the test statistic follows a Student’s t-distribution under the + null hypothesis on numeric values extracted from the aggregated + documents. + + :arg a: Test population A. + :arg b: Test population B. + :arg type: The type of test. Defaults to `heteroscedastic` if omitted. + """ + + name = "t_test" + + def __init__( + self, + *, + a: Union["types.TestPopulation", Dict[str, Any], "DefaultType"] = DEFAULT, + b: Union["types.TestPopulation", Dict[str, Any], "DefaultType"] = DEFAULT, + type: Union[ + Literal["paired", "homoscedastic", "heteroscedastic"], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__(a=a, b=b, type=type, **kwargs) + + +class TopMetrics(Agg[_R]): + """ + A metric aggregation that selects metrics from the document with the + largest or smallest sort value. + + :arg metrics: The fields of the top document to return. + :arg size: The number of top documents from which to return metrics. + Defaults to `1` if omitted. + :arg sort: The sort order of the documents. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "top_metrics" + + def __init__( + self, + *, + metrics: Union[ + "types.TopMetricsValue", + Sequence["types.TopMetricsValue"], + Sequence[Dict[str, Any]], + "DefaultType", + ] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + sort: Union[ + Union[Union[str, "InstrumentedField"], "types.SortOptions"], + Sequence[Union[Union[str, "InstrumentedField"], "types.SortOptions"]], + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + metrics=metrics, + size=size, + sort=sort, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class ValueCount(Agg[_R]): + """ + A single-value metrics aggregation that counts the number of values + that are extracted from the aggregated documents. + + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "value_count" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, field=field, missing=missing, script=script, **kwargs + ) + + +class WeightedAvg(Agg[_R]): + """ + A single-value metrics aggregation that computes the weighted average + of numeric values that are extracted from the aggregated documents. + + :arg format: A numeric response formatter. + :arg value: Configuration for the field that provides the values. + :arg value_type: + :arg weight: Configuration for the field or script that provides the + weights. + """ + + name = "weighted_avg" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + value: Union[ + "types.WeightedAverageValue", Dict[str, Any], "DefaultType" + ] = DEFAULT, + value_type: Union[ + Literal[ + "string", + "long", + "double", + "number", + "date", + "date_nanos", + "ip", + "numeric", + "geo_point", + "boolean", + ], + "DefaultType", + ] = DEFAULT, + weight: Union[ + "types.WeightedAverageValue", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, value=value, value_type=value_type, weight=weight, **kwargs + ) + + +class VariableWidthHistogram(Bucket[_R]): + """ + A multi-bucket aggregation similar to the histogram, except instead of + providing an interval to use as the width of each bucket, a target + number of buckets is provided. + + :arg field: The name of the field. + :arg buckets: The target number of buckets. Defaults to `10` if + omitted. + :arg shard_size: The number of buckets that the coordinating node will + request from each shard. Defaults to `buckets * 50`. + :arg initial_buffer: Specifies the number of individual documents that + will be stored in memory on a shard before the initial bucketing + algorithm is run. Defaults to `min(10 * shard_size, 50000)`. + :arg script: + """ + + name = "variable_width_histogram" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + buckets: Union[int, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + initial_buffer: Union[int, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + buckets=buckets, + shard_size=shard_size, + initial_buffer=initial_buffer, + script=script, + **kwargs, + ) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return FieldBucketData(self, search, data) diff --git a/elasticsearch/dsl/analysis.py b/elasticsearch/dsl/analysis.py new file mode 100644 index 000000000..a810064e0 --- /dev/null +++ b/elasticsearch/dsl/analysis.py @@ -0,0 +1,341 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, ClassVar, Dict, List, Optional, Union, cast + +from . import async_connections, connections +from .utils import AsyncUsingType, AttrDict, DslBase, UsingType, merge + +__all__ = ["tokenizer", "analyzer", "char_filter", "token_filter", "normalizer"] + + +class AnalysisBase: + @classmethod + def _type_shortcut( + cls, + name_or_instance: Union[str, "AnalysisBase"], + type: Optional[str] = None, + **kwargs: Any, + ) -> DslBase: + if isinstance(name_or_instance, cls): + if type or kwargs: + raise ValueError(f"{cls.__name__}() cannot accept parameters.") + return name_or_instance # type: ignore[return-value] + + if not (type or kwargs): + return cls.get_dsl_class("builtin")(name_or_instance) # type: ignore + + return cls.get_dsl_class(type, "custom")( # type: ignore + name_or_instance, type or "custom", **kwargs + ) + + +class CustomAnalysis: + name = "custom" + + def __init__(self, filter_name: str, builtin_type: str = "custom", **kwargs: Any): + self._builtin_type = builtin_type + self._name = filter_name + super().__init__(**kwargs) + + def to_dict(self) -> Dict[str, Any]: + # only name to present in lists + return self._name # type: ignore + + def get_definition(self) -> Dict[str, Any]: + d = super().to_dict() # type: ignore + d = d.pop(self.name) + d["type"] = self._builtin_type + return d # type: ignore + + +class CustomAnalysisDefinition(CustomAnalysis): + _type_name: str + _param_defs: ClassVar[Dict[str, Any]] + filter: List[Any] + char_filter: List[Any] + + def get_analysis_definition(self) -> Dict[str, Any]: + out = {self._type_name: {self._name: self.get_definition()}} + + t = cast("Tokenizer", getattr(self, "tokenizer", None)) + if "tokenizer" in self._param_defs and hasattr(t, "get_definition"): + out["tokenizer"] = {t._name: t.get_definition()} + + filters = { + f._name: f.get_definition() + for f in self.filter + if hasattr(f, "get_definition") + } + if filters: + out["filter"] = filters + + # any sub filter definitions like multiplexers etc? + for f in self.filter: + if hasattr(f, "get_analysis_definition"): + d = f.get_analysis_definition() + if d: + merge(out, d, True) + + char_filters = { + f._name: f.get_definition() + for f in self.char_filter + if hasattr(f, "get_definition") + } + if char_filters: + out["char_filter"] = char_filters + + return out + + +class BuiltinAnalysis: + name = "builtin" + + def __init__(self, name: str): + self._name = name + super().__init__() + + def to_dict(self) -> Dict[str, Any]: + # only name to present in lists + return self._name # type: ignore + + +class Analyzer(AnalysisBase, DslBase): + _type_name = "analyzer" + name = "" + + +class BuiltinAnalyzer(BuiltinAnalysis, Analyzer): + def get_analysis_definition(self) -> Dict[str, Any]: + return {} + + +class CustomAnalyzer(CustomAnalysisDefinition, Analyzer): + _param_defs = { + "filter": {"type": "token_filter", "multi": True}, + "char_filter": {"type": "char_filter", "multi": True}, + "tokenizer": {"type": "tokenizer"}, + } + + def _get_body( + self, text: str, explain: bool, attributes: Optional[Dict[str, Any]] + ) -> Dict[str, Any]: + body = {"text": text, "explain": explain} + if attributes: + body["attributes"] = attributes + + definition = self.get_analysis_definition() + analyzer_def = self.get_definition() + + for section in ("tokenizer", "char_filter", "filter"): + if section not in analyzer_def: + continue + sec_def = definition.get(section, {}) + sec_names = analyzer_def[section] + + if isinstance(sec_names, str): + body[section] = sec_def.get(sec_names, sec_names) + else: + body[section] = [ + sec_def.get(sec_name, sec_name) for sec_name in sec_names + ] + + if self._builtin_type != "custom": + body["analyzer"] = self._builtin_type + + return body + + def simulate( + self, + text: str, + using: UsingType = "default", + explain: bool = False, + attributes: Optional[Dict[str, Any]] = None, + ) -> AttrDict[Any]: + """ + Use the Analyze API of elasticsearch to test the outcome of this analyzer. + + :arg text: Text to be analyzed + :arg using: connection alias to use, defaults to ``'default'`` + :arg explain: will output all token attributes for each token. You can + filter token attributes you want to output by setting ``attributes`` + option. + :arg attributes: if ``explain`` is specified, filter the token + attributes to return. + """ + es = connections.get_connection(using) + return AttrDict( + cast( + Dict[str, Any], + es.indices.analyze(body=self._get_body(text, explain, attributes)), + ) + ) + + async def async_simulate( + self, + text: str, + using: AsyncUsingType = "default", + explain: bool = False, + attributes: Optional[Dict[str, Any]] = None, + ) -> AttrDict[Any]: + """ + Use the Analyze API of elasticsearch to test the outcome of this analyzer. + + :arg text: Text to be analyzed + :arg using: connection alias to use, defaults to ``'default'`` + :arg explain: will output all token attributes for each token. You can + filter token attributes you want to output by setting ``attributes`` + option. + :arg attributes: if ``explain`` is specified, filter the token + attributes to return. + """ + es = async_connections.get_connection(using) + return AttrDict( + cast( + Dict[str, Any], + await es.indices.analyze( + body=self._get_body(text, explain, attributes) + ), + ) + ) + + +class Normalizer(AnalysisBase, DslBase): + _type_name = "normalizer" + name = "" + + +class BuiltinNormalizer(BuiltinAnalysis, Normalizer): + def get_analysis_definition(self) -> Dict[str, Any]: + return {} + + +class CustomNormalizer(CustomAnalysisDefinition, Normalizer): + _param_defs = { + "filter": {"type": "token_filter", "multi": True}, + "char_filter": {"type": "char_filter", "multi": True}, + } + + +class Tokenizer(AnalysisBase, DslBase): + _type_name = "tokenizer" + name = "" + + +class BuiltinTokenizer(BuiltinAnalysis, Tokenizer): + pass + + +class CustomTokenizer(CustomAnalysis, Tokenizer): + pass + + +class TokenFilter(AnalysisBase, DslBase): + _type_name = "token_filter" + name = "" + + +class BuiltinTokenFilter(BuiltinAnalysis, TokenFilter): + pass + + +class CustomTokenFilter(CustomAnalysis, TokenFilter): + pass + + +class MultiplexerTokenFilter(CustomTokenFilter): + name = "multiplexer" + + def get_definition(self) -> Dict[str, Any]: + d = super(CustomTokenFilter, self).get_definition() + + if "filters" in d: + d["filters"] = [ + # comma delimited string given by user + ( + fs + if isinstance(fs, str) + else + # list of strings or TokenFilter objects + ", ".join(f.to_dict() if hasattr(f, "to_dict") else f for f in fs) + ) + for fs in self.filters + ] + return d + + def get_analysis_definition(self) -> Dict[str, Any]: + if not hasattr(self, "filters"): + return {} + + fs: Dict[str, Any] = {} + d = {"filter": fs} + for filters in self.filters: + if isinstance(filters, str): + continue + fs.update( + { + f._name: f.get_definition() + for f in filters + if hasattr(f, "get_definition") + } + ) + return d + + +class ConditionalTokenFilter(CustomTokenFilter): + name = "condition" + + def get_definition(self) -> Dict[str, Any]: + d = super(CustomTokenFilter, self).get_definition() + if "filter" in d: + d["filter"] = [ + f.to_dict() if hasattr(f, "to_dict") else f for f in self.filter + ] + return d + + def get_analysis_definition(self) -> Dict[str, Any]: + if not hasattr(self, "filter"): + return {} + + return { + "filter": { + f._name: f.get_definition() + for f in self.filter + if hasattr(f, "get_definition") + } + } + + +class CharFilter(AnalysisBase, DslBase): + _type_name = "char_filter" + name = "" + + +class BuiltinCharFilter(BuiltinAnalysis, CharFilter): + pass + + +class CustomCharFilter(CustomAnalysis, CharFilter): + pass + + +# shortcuts for direct use +analyzer = Analyzer._type_shortcut +tokenizer = Tokenizer._type_shortcut +token_filter = TokenFilter._type_shortcut +char_filter = CharFilter._type_shortcut +normalizer = Normalizer._type_shortcut diff --git a/elasticsearch/dsl/async_connections.py b/elasticsearch/dsl/async_connections.py new file mode 100644 index 000000000..8a23d3828 --- /dev/null +++ b/elasticsearch/dsl/async_connections.py @@ -0,0 +1,37 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Type + +from elasticsearch import AsyncElasticsearch + +from .connections import Connections + + +class AsyncElasticsearchConnections(Connections[AsyncElasticsearch]): + def __init__( + self, *, elasticsearch_class: Type[AsyncElasticsearch] = AsyncElasticsearch + ): + super().__init__(elasticsearch_class=elasticsearch_class) + + +connections = AsyncElasticsearchConnections(elasticsearch_class=AsyncElasticsearch) +configure = connections.configure +add_connection = connections.add_connection +remove_connection = connections.remove_connection +create_connection = connections.create_connection +get_connection = connections.get_connection diff --git a/elasticsearch/dsl/connections.py b/elasticsearch/dsl/connections.py new file mode 100644 index 000000000..a3d340967 --- /dev/null +++ b/elasticsearch/dsl/connections.py @@ -0,0 +1,144 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, Generic, Type, TypeVar, Union + +from elasticsearch import Elasticsearch + +from .serializer import serializer + +_T = TypeVar("_T") + + +class Connections(Generic[_T]): + """ + Class responsible for holding connections to different clusters. Used as a + singleton in this module. + """ + + def __init__(self, *, elasticsearch_class: Type[_T]): + self._kwargs: Dict[str, Any] = {} + self._conns: Dict[str, _T] = {} + self.elasticsearch_class: Type[_T] = elasticsearch_class + + def configure(self, **kwargs: Any) -> None: + """ + Configure multiple connections at once, useful for passing in config + dictionaries obtained from other sources, like Django's settings or a + configuration management tool. + + Example:: + + connections.configure( + default={'hosts': 'localhost'}, + dev={'hosts': ['esdev1.example.com:9200'], 'sniff_on_start': True}, + ) + + Connections will only be constructed lazily when requested through + ``get_connection``. + """ + for k in list(self._conns): + # try and preserve existing client to keep the persistent connections alive + if k in self._kwargs and kwargs.get(k, None) == self._kwargs[k]: + continue + del self._conns[k] + self._kwargs = kwargs + + def add_connection(self, alias: str, conn: _T) -> None: + """ + Add a connection object, it will be passed through as-is. + """ + self._conns[alias] = self._with_user_agent(conn) + + def remove_connection(self, alias: str) -> None: + """ + Remove connection from the registry. Raises ``KeyError`` if connection + wasn't found. + """ + errors = 0 + for d in (self._conns, self._kwargs): + try: + del d[alias] + except KeyError: + errors += 1 + + if errors == 2: + raise KeyError(f"There is no connection with alias {alias!r}.") + + def create_connection(self, alias: str = "default", **kwargs: Any) -> _T: + """ + Construct an instance of ``elasticsearch.Elasticsearch`` and register + it under given alias. + """ + kwargs.setdefault("serializer", serializer) + conn = self._conns[alias] = self.elasticsearch_class(**kwargs) + return self._with_user_agent(conn) + + def get_connection(self, alias: Union[str, _T] = "default") -> _T: + """ + Retrieve a connection, construct it if necessary (only configuration + was passed to us). If a non-string alias has been passed through we + assume it's already a client instance and will just return it as-is. + + Raises ``KeyError`` if no client (or its definition) is registered + under the alias. + """ + # do not check isinstance(Elasticsearch) so that people can wrap their + # clients + if not isinstance(alias, str): + return self._with_user_agent(alias) + + # connection already established + try: + return self._conns[alias] + except KeyError: + pass + + # if not, try to create it + try: + return self.create_connection(alias, **self._kwargs[alias]) + except KeyError: + # no connection and no kwargs to set one up + raise KeyError(f"There is no connection with alias {alias!r}.") + + def _with_user_agent(self, conn: _T) -> _T: + from . import __versionstr__ # this is here to avoid circular imports + + # try to inject our user agent + if hasattr(conn, "_headers"): + is_frozen = conn._headers.frozen + if is_frozen: + conn._headers = conn._headers.copy() + conn._headers.update( + {"user-agent": f"elasticsearch-dsl-py/{__versionstr__}"} + ) + if is_frozen: + conn._headers.freeze() + return conn + + +class ElasticsearchConnections(Connections[Elasticsearch]): + def __init__(self, *, elasticsearch_class: Type[Elasticsearch] = Elasticsearch): + super().__init__(elasticsearch_class=elasticsearch_class) + + +connections = ElasticsearchConnections() +configure = connections.configure +add_connection = connections.add_connection +remove_connection = connections.remove_connection +create_connection = connections.create_connection +get_connection = connections.get_connection diff --git a/elasticsearch/dsl/document.py b/elasticsearch/dsl/document.py new file mode 100644 index 000000000..c27c5af04 --- /dev/null +++ b/elasticsearch/dsl/document.py @@ -0,0 +1,20 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ._async.document import AsyncDocument # noqa: F401 +from ._sync.document import Document # noqa: F401 +from .document_base import InnerDoc, MetaField # noqa: F401 diff --git a/elasticsearch/dsl/document_base.py b/elasticsearch/dsl/document_base.py new file mode 100644 index 000000000..a7026778a --- /dev/null +++ b/elasticsearch/dsl/document_base.py @@ -0,0 +1,444 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import date, datetime +from fnmatch import fnmatch +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Dict, + Generic, + List, + Optional, + Tuple, + TypeVar, + Union, + get_args, + overload, +) + +try: + from types import UnionType # type: ignore[attr-defined] +except ImportError: + UnionType = None + +from typing_extensions import dataclass_transform + +from .exceptions import ValidationException +from .field import Binary, Boolean, Date, Field, Float, Integer, Nested, Object, Text +from .mapping import Mapping +from .utils import DOC_META_FIELDS, ObjectBase + +if TYPE_CHECKING: + from elastic_transport import ObjectApiResponse + + from .index_base import IndexBase + + +class MetaField: + def __init__(self, *args: Any, **kwargs: Any): + self.args, self.kwargs = args, kwargs + + +class InstrumentedField: + """Proxy object for a mapped document field. + + An object of this instance is returned when a field is accessed as a class + attribute of a ``Document`` or ``InnerDoc`` subclass. These objects can + be used in any situation in which a reference to a field is required, such + as when specifying sort options in a search:: + + class MyDocument(Document): + name: str + + s = MyDocument.search() + s = s.sort(-MyDocument.name) # sort by name in descending order + """ + + def __init__(self, name: str, field: Field): + self._name = name + self._field = field + + # note that the return value type here assumes classes will only be used to + # access fields (I haven't found a way to make this type dynamic based on a + # decision taken at runtime) + def __getattr__(self, attr: str) -> "InstrumentedField": + try: + # first let's see if this is an attribute of this object + return super().__getattribute__(attr) # type: ignore + except AttributeError: + try: + # next we see if we have a sub-field with this name + return InstrumentedField(f"{self._name}.{attr}", self._field[attr]) + except KeyError: + # lastly we let the wrapped field resolve this attribute + return getattr(self._field, attr) # type: ignore + + def __pos__(self) -> str: + """Return the field name representation for ascending sort order""" + return f"{self._name}" + + def __neg__(self) -> str: + """Return the field name representation for descending sort order""" + return f"-{self._name}" + + def __str__(self) -> str: + return self._name + + def __repr__(self) -> str: + return f"InstrumentedField[{self._name}]" + + +class DocumentMeta(type): + _doc_type: "DocumentOptions" + _index: "IndexBase" + + def __new__( + cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any] + ) -> "DocumentMeta": + # DocumentMeta filters attrs in place + attrs["_doc_type"] = DocumentOptions(name, bases, attrs) + return super().__new__(cls, name, bases, attrs) + + def __getattr__(cls, attr: str) -> Any: + if attr in cls._doc_type.mapping: + return InstrumentedField(attr, cls._doc_type.mapping[attr]) + return super().__getattribute__(attr) + + +class DocumentOptions: + type_annotation_map = { + int: (Integer, {}), + float: (Float, {}), + bool: (Boolean, {}), + str: (Text, {}), + bytes: (Binary, {}), + datetime: (Date, {}), + date: (Date, {"format": "yyyy-MM-dd"}), + } + + def __init__(self, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any]): + meta = attrs.pop("Meta", None) + + # create the mapping instance + self.mapping: Mapping = getattr(meta, "mapping", Mapping()) + + # register the document's fields, which can be given in a few formats: + # + # class MyDocument(Document): + # # required field using native typing + # # (str, int, float, bool, datetime, date) + # field1: str + # + # # optional field using native typing + # field2: Optional[datetime] + # + # # array field using native typing + # field3: list[int] + # + # # sub-object, same as Object(MyInnerDoc) + # field4: MyInnerDoc + # + # # nested sub-objects, same as Nested(MyInnerDoc) + # field5: list[MyInnerDoc] + # + # # use typing, but override with any stock or custom field + # field6: bool = MyCustomField() + # + # # best mypy and pyright support and dataclass-like behavior + # field7: M[date] + # field8: M[str] = mapped_field(MyCustomText(), default="foo") + # + # # legacy format without Python typing + # field9 = Text() + # + # # ignore attributes + # field10: ClassVar[string] = "a regular class variable" + annotations = attrs.get("__annotations__", {}) + fields = set([n for n in attrs if isinstance(attrs[n], Field)]) + fields.update(annotations.keys()) + field_defaults = {} + for name in fields: + value: Any = None + required = None + multi = None + if name in annotations: + # the field has a type annotation, so next we try to figure out + # what field type we can use + type_ = annotations[name] + skip = False + required = True + multi = False + while hasattr(type_, "__origin__"): + if type_.__origin__ == ClassVar: + skip = True + break + elif type_.__origin__ == Mapped: + # M[type] -> extract the wrapped type + type_ = type_.__args__[0] + elif type_.__origin__ == Union: + if len(type_.__args__) == 2 and type_.__args__[1] is type(None): + # Optional[type] -> mark instance as optional + required = False + type_ = type_.__args__[0] + else: + raise TypeError("Unsupported union") + elif type_.__origin__ in [list, List]: + # List[type] -> mark instance as multi + multi = True + required = False + type_ = type_.__args__[0] + else: + break + if skip or type_ == ClassVar: + # skip ClassVar attributes + continue + if type(type_) is UnionType: + # a union given with the pipe syntax + args = get_args(type_) + if len(args) == 2 and args[1] is type(None): + required = False + type_ = type_.__args__[0] + else: + raise TypeError("Unsupported union") + field = None + field_args: List[Any] = [] + field_kwargs: Dict[str, Any] = {} + if isinstance(type_, type) and issubclass(type_, InnerDoc): + # object or nested field + field = Nested if multi else Object + field_args = [type_] + elif type_ in self.type_annotation_map: + # use best field type for the type hint provided + field, field_kwargs = self.type_annotation_map[type_] # type: ignore + + if field: + field_kwargs = { + "multi": multi, + "required": required, + **field_kwargs, + } + value = field(*field_args, **field_kwargs) + + if name in attrs: + # this field has a right-side value, which can be field + # instance on its own or wrapped with mapped_field() + attr_value = attrs[name] + if isinstance(attr_value, dict): + # the mapped_field() wrapper function was used so we need + # to look for the field instance and also record any + # dataclass-style defaults + attr_value = attrs[name].get("_field") + default_value = attrs[name].get("default") or attrs[name].get( + "default_factory" + ) + if default_value: + field_defaults[name] = default_value + if attr_value: + value = attr_value + if required is not None: + value._required = required + if multi is not None: + value._multi = multi + + if value is None: + raise TypeError(f"Cannot map field {name}") + + self.mapping.field(name, value) + if name in attrs: + del attrs[name] + + # store dataclass-style defaults for ObjectBase.__init__ to assign + attrs["_defaults"] = field_defaults + + # add all the mappings for meta fields + for name in dir(meta): + if isinstance(getattr(meta, name, None), MetaField): + params = getattr(meta, name) + self.mapping.meta(name, *params.args, **params.kwargs) + + # document inheritance - include the fields from parents' mappings + for b in bases: + if hasattr(b, "_doc_type") and hasattr(b._doc_type, "mapping"): + self.mapping.update(b._doc_type.mapping, update_only=True) + + @property + def name(self) -> str: + return self.mapping.properties.name + + +_FieldType = TypeVar("_FieldType") + + +class Mapped(Generic[_FieldType]): + """Class that represents the type of a mapped field. + + This class can be used as an optional wrapper on a field type to help type + checkers assign the correct type when the field is used as a class + attribute. + + Consider the following definitions:: + + class MyDocument(Document): + first: str + second: M[str] + + mydoc = MyDocument(first="1", second="2") + + Type checkers have no trouble inferring the type of both ``mydoc.first`` + and ``mydoc.second`` as ``str``, but while ``MyDocument.first`` will be + incorrectly typed as ``str``, ``MyDocument.second`` should be assigned the + correct ``InstrumentedField`` type. + """ + + __slots__: Dict[str, Any] = {} + + if TYPE_CHECKING: + + @overload + def __get__(self, instance: None, owner: Any) -> InstrumentedField: ... + + @overload + def __get__(self, instance: object, owner: Any) -> _FieldType: ... + + def __get__( + self, instance: Optional[object], owner: Any + ) -> Union[InstrumentedField, _FieldType]: ... + + def __set__(self, instance: Optional[object], value: _FieldType) -> None: ... + + def __delete__(self, instance: Any) -> None: ... + + +M = Mapped + + +def mapped_field( + field: Optional[Field] = None, + *, + init: bool = True, + default: Any = None, + default_factory: Optional[Callable[[], Any]] = None, + **kwargs: Any, +) -> Any: + """Construct a field using dataclass behaviors + + This function can be used in the right side of a document field definition + as a wrapper for the field instance or as a way to provide dataclass-compatible + options. + + :param field: The instance of ``Field`` to use for this field. If not provided, + an instance that is appropriate for the type given to the field is used. + :param init: a value of ``True`` adds this field to the constructor, and a + value of ``False`` omits it from it. The default is ``True``. + :param default: a default value to use for this field when one is not provided + explicitly. + :param default_factory: a callable that returns a default value for the field, + when one isn't provided explicitly. Only one of ``factory`` and + ``default_factory`` can be used. + """ + return { + "_field": field, + "init": init, + "default": default, + "default_factory": default_factory, + **kwargs, + } + + +@dataclass_transform(field_specifiers=(mapped_field,)) +class InnerDoc(ObjectBase, metaclass=DocumentMeta): + """ + Common class for inner documents like Object or Nested + """ + + @classmethod + def from_es( + cls, + data: Union[Dict[str, Any], "ObjectApiResponse[Any]"], + data_only: bool = False, + ) -> "InnerDoc": + if data_only: + data = {"_source": data} + return super().from_es(data) + + +class DocumentBase(ObjectBase): + """ + Model-like class for persisting documents in elasticsearch. + """ + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + if cls._index._name is None: + return True + return fnmatch(hit.get("_index", ""), cls._index._name) + + @classmethod + def _default_index(cls, index: Optional[str] = None) -> str: + return index or cls._index._name + + def _get_index( + self, index: Optional[str] = None, required: bool = True + ) -> Optional[str]: + if index is None: + index = getattr(self.meta, "index", None) + if index is None: + index = getattr(self._index, "_name", None) + if index is None and required: + raise ValidationException("No index") + if index and "*" in index: + raise ValidationException("You cannot write to a wildcard index.") + return index + + def __repr__(self) -> str: + return "{}({})".format( + self.__class__.__name__, + ", ".join( + f"{key}={getattr(self.meta, key)!r}" + for key in ("index", "id") + if key in self.meta + ), + ) + + def to_dict(self, include_meta: bool = False, skip_empty: bool = True) -> Dict[str, Any]: # type: ignore[override] + """ + Serialize the instance into a dictionary so that it can be saved in elasticsearch. + + :arg include_meta: if set to ``True`` will include all the metadata + (``_index``, ``_id`` etc). Otherwise just the document's + data is serialized. This is useful when passing multiple instances into + ``elasticsearch.helpers.bulk``. + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in elasticsearch. + """ + d = super().to_dict(skip_empty=skip_empty) + if not include_meta: + return d + + meta = {"_" + k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # in case of to_dict include the index unlike save/update/delete + index = self._get_index(required=False) + if index is not None: + meta["_index"] = index + + meta["_source"] = d + return meta diff --git a/elasticsearch/dsl/exceptions.py b/elasticsearch/dsl/exceptions.py new file mode 100644 index 000000000..8aae0ffa8 --- /dev/null +++ b/elasticsearch/dsl/exceptions.py @@ -0,0 +1,32 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +class ElasticsearchDslException(Exception): + pass + + +class UnknownDslObject(ElasticsearchDslException): + pass + + +class ValidationException(ValueError, ElasticsearchDslException): + pass + + +class IllegalOperation(ElasticsearchDslException): + pass diff --git a/elasticsearch/dsl/faceted_search.py b/elasticsearch/dsl/faceted_search.py new file mode 100644 index 000000000..96941b08c --- /dev/null +++ b/elasticsearch/dsl/faceted_search.py @@ -0,0 +1,28 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ._async.faceted_search import AsyncFacetedSearch # noqa: F401 +from ._sync.faceted_search import FacetedSearch # noqa: F401 +from .faceted_search_base import ( # noqa: F401 + DateHistogramFacet, + Facet, + FacetedResponse, + HistogramFacet, + NestedFacet, + RangeFacet, + TermsFacet, +) diff --git a/elasticsearch/dsl/faceted_search_base.py b/elasticsearch/dsl/faceted_search_base.py new file mode 100644 index 000000000..ee6fed2f9 --- /dev/null +++ b/elasticsearch/dsl/faceted_search_base.py @@ -0,0 +1,489 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime, timedelta +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Generic, + List, + Optional, + Sequence, + Tuple, + Type, + Union, + cast, +) + +from typing_extensions import Self + +from .aggs import A, Agg +from .query import MatchAll, Nested, Query, Range, Terms +from .response import Response +from .utils import _R, AttrDict + +if TYPE_CHECKING: + from .document_base import DocumentBase + from .response.aggs import BucketData + from .search_base import SearchBase + +FilterValueType = Union[str, datetime, Sequence[str]] + +__all__ = [ + "FacetedSearchBase", + "HistogramFacet", + "TermsFacet", + "DateHistogramFacet", + "RangeFacet", + "NestedFacet", +] + + +class Facet(Generic[_R]): + """ + A facet on faceted search. Wraps and aggregation and provides functionality + to create a filter for selected values and return a list of facet values + from the result of the aggregation. + """ + + agg_type: str = "" + + def __init__( + self, metric: Optional[Agg[_R]] = None, metric_sort: str = "desc", **kwargs: Any + ): + self.filter_values = () + self._params = kwargs + self._metric = metric + if metric and metric_sort: + self._params["order"] = {"metric": metric_sort} + + def get_aggregation(self) -> Agg[_R]: + """ + Return the aggregation object. + """ + agg: Agg[_R] = A(self.agg_type, **self._params) + if self._metric: + agg.metric("metric", self._metric) + return agg + + def add_filter(self, filter_values: List[FilterValueType]) -> Optional[Query]: + """ + Construct a filter. + """ + if not filter_values: + return None + + f = self.get_value_filter(filter_values[0]) + for v in filter_values[1:]: + f |= self.get_value_filter(v) + return f + + def get_value_filter(self, filter_value: FilterValueType) -> Query: # type: ignore + """ + Construct a filter for an individual value + """ + pass + + def is_filtered(self, key: str, filter_values: List[FilterValueType]) -> bool: + """ + Is a filter active on the given key. + """ + return key in filter_values + + def get_value(self, bucket: "BucketData[_R]") -> Any: + """ + return a value representing a bucket. Its key as default. + """ + return bucket["key"] + + def get_metric(self, bucket: "BucketData[_R]") -> int: + """ + Return a metric, by default doc_count for a bucket. + """ + if self._metric: + return cast(int, bucket["metric"]["value"]) + return cast(int, bucket["doc_count"]) + + def get_values( + self, data: "BucketData[_R]", filter_values: List[FilterValueType] + ) -> List[Tuple[Any, int, bool]]: + """ + Turn the raw bucket data into a list of tuples containing the key, + number of documents and a flag indicating whether this value has been + selected or not. + """ + out = [] + for bucket in data.buckets: + b = cast("BucketData[_R]", bucket) + key = self.get_value(b) + out.append((key, self.get_metric(b), self.is_filtered(key, filter_values))) + return out + + +class TermsFacet(Facet[_R]): + agg_type = "terms" + + def add_filter(self, filter_values: List[FilterValueType]) -> Optional[Query]: + """Create a terms filter instead of bool containing term filters.""" + if filter_values: + return Terms(self._params["field"], filter_values, _expand__to_dot=False) + return None + + +class RangeFacet(Facet[_R]): + agg_type = "range" + + def _range_to_dict( + self, range: Tuple[Any, Tuple[Optional[int], Optional[int]]] + ) -> Dict[str, Any]: + key, _range = range + out: Dict[str, Any] = {"key": key} + if _range[0] is not None: + out["from"] = _range[0] + if _range[1] is not None: + out["to"] = _range[1] + return out + + def __init__( + self, + ranges: Sequence[Tuple[Any, Tuple[Optional[int], Optional[int]]]], + **kwargs: Any, + ): + super().__init__(**kwargs) + self._params["ranges"] = list(map(self._range_to_dict, ranges)) + self._params["keyed"] = False + self._ranges = dict(ranges) + + def get_value_filter(self, filter_value: FilterValueType) -> Query: + f, t = self._ranges[filter_value] + limits: Dict[str, Any] = {} + if f is not None: + limits["gte"] = f + if t is not None: + limits["lt"] = t + + return Range(self._params["field"], limits, _expand__to_dot=False) + + +class HistogramFacet(Facet[_R]): + agg_type = "histogram" + + def get_value_filter(self, filter_value: FilterValueType) -> Range: + return Range( + self._params["field"], + { + "gte": filter_value, + "lt": filter_value + self._params["interval"], + }, + _expand__to_dot=False, + ) + + +def _date_interval_year(d: datetime) -> datetime: + return d.replace( + year=d.year + 1, day=(28 if d.month == 2 and d.day == 29 else d.day) + ) + + +def _date_interval_month(d: datetime) -> datetime: + return (d + timedelta(days=32)).replace(day=1) + + +def _date_interval_week(d: datetime) -> datetime: + return d + timedelta(days=7) + + +def _date_interval_day(d: datetime) -> datetime: + return d + timedelta(days=1) + + +def _date_interval_hour(d: datetime) -> datetime: + return d + timedelta(hours=1) + + +class DateHistogramFacet(Facet[_R]): + agg_type = "date_histogram" + + DATE_INTERVALS = { + "year": _date_interval_year, + "1Y": _date_interval_year, + "month": _date_interval_month, + "1M": _date_interval_month, + "week": _date_interval_week, + "1w": _date_interval_week, + "day": _date_interval_day, + "1d": _date_interval_day, + "hour": _date_interval_hour, + "1h": _date_interval_hour, + } + + def __init__(self, **kwargs: Any): + kwargs.setdefault("min_doc_count", 0) + super().__init__(**kwargs) + + def get_value(self, bucket: "BucketData[_R]") -> Any: + if not isinstance(bucket["key"], datetime): + # Elasticsearch returns key=None instead of 0 for date 1970-01-01, + # so we need to set key to 0 to avoid TypeError exception + if bucket["key"] is None: + bucket["key"] = 0 + # Preserve milliseconds in the datetime + return datetime.utcfromtimestamp(int(cast(int, bucket["key"])) / 1000.0) + else: + return bucket["key"] + + def get_value_filter(self, filter_value: Any) -> Range: + for interval_type in ("calendar_interval", "fixed_interval"): + if interval_type in self._params: + break + else: + interval_type = "interval" + + return Range( + self._params["field"], + { + "gte": filter_value, + "lt": self.DATE_INTERVALS[self._params[interval_type]](filter_value), + }, + _expand__to_dot=False, + ) + + +class NestedFacet(Facet[_R]): + agg_type = "nested" + + def __init__(self, path: str, nested_facet: Facet[_R]): + self._path = path + self._inner = nested_facet + super().__init__(path=path, aggs={"inner": nested_facet.get_aggregation()}) + + def get_values( + self, data: "BucketData[_R]", filter_values: List[FilterValueType] + ) -> List[Tuple[Any, int, bool]]: + return self._inner.get_values(data.inner, filter_values) + + def add_filter(self, filter_values: List[FilterValueType]) -> Optional[Query]: + inner_q = self._inner.add_filter(filter_values) + if inner_q: + return Nested(path=self._path, query=inner_q) + return None + + +class FacetedResponse(Response[_R]): + if TYPE_CHECKING: + _faceted_search: "FacetedSearchBase[_R]" + _facets: Dict[str, List[Tuple[Any, int, bool]]] + + @property + def query_string(self) -> Optional[Union[str, Query]]: + return self._faceted_search._query + + @property + def facets(self) -> Dict[str, List[Tuple[Any, int, bool]]]: + if not hasattr(self, "_facets"): + super(AttrDict, self).__setattr__("_facets", AttrDict({})) + for name, facet in self._faceted_search.facets.items(): + self._facets[name] = facet.get_values( + getattr(getattr(self.aggregations, "_filter_" + name), name), + self._faceted_search.filter_values.get(name, []), + ) + return self._facets + + +class FacetedSearchBase(Generic[_R]): + """ + Abstraction for creating faceted navigation searches that takes care of + composing the queries, aggregations and filters as needed as well as + presenting the results in an easy-to-consume fashion:: + + class BlogSearch(FacetedSearch): + index = 'blogs' + doc_types = [Blog, Post] + fields = ['title^5', 'category', 'description', 'body'] + + facets = { + 'type': TermsFacet(field='_type'), + 'category': TermsFacet(field='category'), + 'weekly_posts': DateHistogramFacet(field='published_from', interval='week') + } + + def search(self): + ' Override search to add your own filters ' + s = super(BlogSearch, self).search() + return s.filter('term', published=True) + + # when using: + blog_search = BlogSearch("web framework", filters={"category": "python"}) + + # supports pagination + blog_search[10:20] + + response = blog_search.execute() + + # easy access to aggregation results: + for category, hit_count, is_selected in response.facets.category: + print( + "Category %s has %d hits%s." % ( + category, + hit_count, + ' and is chosen' if is_selected else '' + ) + ) + + """ + + index: Optional[str] = None + doc_types: Optional[List[Union[str, Type["DocumentBase"]]]] = None + fields: Sequence[str] = [] + facets: Dict[str, Facet[_R]] = {} + using = "default" + + if TYPE_CHECKING: + + def search(self) -> "SearchBase[_R]": ... + + def __init__( + self, + query: Optional[Union[str, Query]] = None, + filters: Dict[str, FilterValueType] = {}, + sort: Sequence[str] = [], + ): + """ + :arg query: the text to search for + :arg filters: facet values to filter + :arg sort: sort information to be passed to :class:`~elasticsearch.dsl.Search` + """ + self._query = query + self._filters: Dict[str, Query] = {} + self._sort = sort + self.filter_values: Dict[str, List[FilterValueType]] = {} + for name, value in filters.items(): + self.add_filter(name, value) + + self._s = self.build_search() + + def __getitem__(self, k: Union[int, slice]) -> Self: + self._s = self._s[k] + return self + + def add_filter( + self, name: str, filter_values: Union[FilterValueType, List[FilterValueType]] + ) -> None: + """ + Add a filter for a facet. + """ + # normalize the value into a list + if not isinstance(filter_values, (tuple, list)): + if filter_values is None: + return + filter_values = [ + filter_values, + ] + + # remember the filter values for use in FacetedResponse + self.filter_values[name] = filter_values # type: ignore[assignment] + + # get the filter from the facet + f = self.facets[name].add_filter(filter_values) # type: ignore[arg-type] + if f is None: + return + + self._filters[name] = f + + def query( + self, search: "SearchBase[_R]", query: Union[str, Query] + ) -> "SearchBase[_R]": + """ + Add query part to ``search``. + + Override this if you wish to customize the query used. + """ + if query: + if self.fields: + return search.query("multi_match", fields=self.fields, query=query) + else: + return search.query("multi_match", query=query) + return search + + def aggregate(self, search: "SearchBase[_R]") -> None: + """ + Add aggregations representing the facets selected, including potential + filters. + """ + for f, facet in self.facets.items(): + agg = facet.get_aggregation() + agg_filter: Query = MatchAll() + for field, filter in self._filters.items(): + if f == field: + continue + agg_filter &= filter + search.aggs.bucket("_filter_" + f, "filter", filter=agg_filter).bucket( + f, agg + ) + + def filter(self, search: "SearchBase[_R]") -> "SearchBase[_R]": + """ + Add a ``post_filter`` to the search request narrowing the results based + on the facet filters. + """ + if not self._filters: + return search + + post_filter: Query = MatchAll() + for f in self._filters.values(): + post_filter &= f + return search.post_filter(post_filter) + + def highlight(self, search: "SearchBase[_R]") -> "SearchBase[_R]": + """ + Add highlighting for all the fields + """ + return search.highlight( + *(f if "^" not in f else f.split("^", 1)[0] for f in self.fields) + ) + + def sort(self, search: "SearchBase[_R]") -> "SearchBase[_R]": + """ + Add sorting information to the request. + """ + if self._sort: + search = search.sort(*self._sort) + return search + + def params(self, **kwargs: Any) -> None: + """ + Specify query params to be used when executing the search. All the + keyword arguments will override the current values. See + https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.Elasticsearch.search + for all available parameters. + """ + self._s = self._s.params(**kwargs) + + def build_search(self) -> "SearchBase[_R]": + """ + Construct the ``Search`` object. + """ + s = self.search() + if self._query is not None: + s = self.query(s, self._query) + s = self.filter(s) + if self.fields: + s = self.highlight(s) + s = self.sort(s) + self.aggregate(s) + return s diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py new file mode 100644 index 000000000..55ab4f7f9 --- /dev/null +++ b/elasticsearch/dsl/field.py @@ -0,0 +1,587 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import base64 +import collections.abc +import ipaddress +from copy import deepcopy +from datetime import date, datetime +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + Iterator, + Optional, + Tuple, + Type, + Union, + cast, +) + +from dateutil import parser, tz + +from .exceptions import ValidationException +from .query import Q +from .utils import AttrDict, AttrList, DslBase +from .wrappers import Range + +if TYPE_CHECKING: + from datetime import tzinfo + from ipaddress import IPv4Address, IPv6Address + + from _operator import _SupportsComparison + + from .document import InnerDoc + from .mapping_base import MappingBase + from .query import Query + +unicode = str + + +def construct_field( + name_or_field: Union[ + str, + "Field", + Dict[str, Any], + ], + **params: Any, +) -> "Field": + # {"type": "text", "analyzer": "snowball"} + if isinstance(name_or_field, collections.abc.Mapping): + if params: + raise ValueError( + "construct_field() cannot accept parameters when passing in a dict." + ) + params = deepcopy(name_or_field) + if "type" not in params: + # inner object can be implicitly defined + if "properties" in params: + name = "object" + else: + raise ValueError('construct_field() needs to have a "type" key.') + else: + name = params.pop("type") + return Field.get_dsl_class(name)(**params) + + # Text() + if isinstance(name_or_field, Field): + if params: + raise ValueError( + "construct_field() cannot accept parameters " + "when passing in a construct_field object." + ) + return name_or_field + + # "text", analyzer="snowball" + return Field.get_dsl_class(name_or_field)(**params) + + +class Field(DslBase): + _type_name = "field" + _type_shortcut = staticmethod(construct_field) + # all fields can be multifields + _param_defs = {"fields": {"type": "field", "hash": True}} + name = "" + _coerce = False + + def __init__( + self, multi: bool = False, required: bool = False, *args: Any, **kwargs: Any + ): + """ + :arg bool multi: specifies whether field can contain array of values + :arg bool required: specifies whether field is required + """ + self._multi = multi + self._required = required + super().__init__(*args, **kwargs) + + def __getitem__(self, subfield: str) -> "Field": + return cast(Field, self._params.get("fields", {})[subfield]) + + def _serialize(self, data: Any) -> Any: + return data + + def _deserialize(self, data: Any) -> Any: + return data + + def _empty(self) -> Optional[Any]: + return None + + def empty(self) -> Optional[Any]: + if self._multi: + return AttrList([]) + return self._empty() + + def serialize(self, data: Any) -> Any: + if isinstance(data, (list, AttrList, tuple)): + return list(map(self._serialize, cast(Iterable[Any], data))) + return self._serialize(data) + + def deserialize(self, data: Any) -> Any: + if isinstance(data, (list, AttrList, tuple)): + data = [ + None if d is None else self._deserialize(d) + for d in cast(Iterable[Any], data) + ] + return data + if data is None: + return None + return self._deserialize(data) + + def clean(self, data: Any) -> Any: + if data is not None: + data = self.deserialize(data) + if data in (None, [], {}) and self._required: + raise ValidationException("Value required for this field.") + return data + + def to_dict(self) -> Dict[str, Any]: + d = super().to_dict() + name, value = cast(Tuple[str, Dict[str, Any]], d.popitem()) + value["type"] = name + return value + + +class CustomField(Field): + name = "custom" + _coerce = True + + def to_dict(self) -> Dict[str, Any]: + if isinstance(self.builtin_type, Field): + return self.builtin_type.to_dict() + + d = super().to_dict() + d["type"] = self.builtin_type + return d + + +class Object(Field): + name = "object" + _coerce = True + + def __init__( + self, + doc_class: Optional[Type["InnerDoc"]] = None, + dynamic: Optional[Union[bool, str]] = None, + properties: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ): + """ + :arg document.InnerDoc doc_class: base doc class that handles mapping. + If no `doc_class` is provided, new instance of `InnerDoc` will be created, + populated with `properties` and used. Can not be provided together with `properties` + :arg dynamic: whether new properties may be created dynamically. + Valid values are `True`, `False`, `'strict'`. + Can not be provided together with `doc_class`. + See https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic.html + for more details + :arg dict properties: used to construct underlying mapping if no `doc_class` is provided. + Can not be provided together with `doc_class` + """ + if doc_class and (properties or dynamic is not None): + raise ValidationException( + "doc_class and properties/dynamic should not be provided together" + ) + if doc_class: + self._doc_class: Type["InnerDoc"] = doc_class + else: + # FIXME import + from .document import InnerDoc + + # no InnerDoc subclass, creating one instead... + self._doc_class = type("InnerDoc", (InnerDoc,), {}) + for name, field in (properties or {}).items(): + self._doc_class._doc_type.mapping.field(name, field) + if dynamic is not None: + self._doc_class._doc_type.mapping.meta("dynamic", dynamic) + + self._mapping: "MappingBase" = deepcopy(self._doc_class._doc_type.mapping) + super().__init__(**kwargs) + + def __getitem__(self, name: str) -> Field: + return self._mapping[name] + + def __contains__(self, name: str) -> bool: + return name in self._mapping + + def _empty(self) -> "InnerDoc": + return self._wrap({}) + + def _wrap(self, data: Dict[str, Any]) -> "InnerDoc": + return self._doc_class.from_es(data, data_only=True) + + def empty(self) -> Union["InnerDoc", AttrList[Any]]: + if self._multi: + return AttrList[Any]([], self._wrap) + return self._empty() + + def to_dict(self) -> Dict[str, Any]: + d = self._mapping.to_dict() + d.update(super().to_dict()) + return d + + def _collect_fields(self) -> Iterator[Field]: + return self._mapping.properties._collect_fields() + + def _deserialize(self, data: Any) -> "InnerDoc": + # don't wrap already wrapped data + if isinstance(data, self._doc_class): + return data + + if isinstance(data, AttrDict): + data = data._d_ + + return self._wrap(data) + + def _serialize( + self, data: Optional[Union[Dict[str, Any], "InnerDoc"]] + ) -> Optional[Dict[str, Any]]: + if data is None: + return None + + # somebody assigned raw dict to the field, we should tolerate that + if isinstance(data, collections.abc.Mapping): + return data + + return data.to_dict() + + def clean(self, data: Any) -> Any: + data = super().clean(data) + if data is None: + return None + if isinstance(data, (list, AttrList)): + for d in cast(Iterator["InnerDoc"], data): + d.full_clean() + else: + data.full_clean() + return data + + def update(self, other: Any, update_only: bool = False) -> None: + if not isinstance(other, Object): + # not an inner/nested object, no merge possible + return + + self._mapping.update(other._mapping, update_only) + + +class Nested(Object): + name = "nested" + + def __init__(self, *args: Any, **kwargs: Any): + kwargs.setdefault("multi", True) + super().__init__(*args, **kwargs) + + +class Date(Field): + name = "date" + _coerce = True + + def __init__( + self, + default_timezone: Optional[Union[str, "tzinfo"]] = None, + *args: Any, + **kwargs: Any, + ): + """ + :arg default_timezone: timezone that will be automatically used for tz-naive values + May be instance of `datetime.tzinfo` or string containing TZ offset + """ + if isinstance(default_timezone, str): + self._default_timezone = tz.gettz(default_timezone) + else: + self._default_timezone = default_timezone + super().__init__(*args, **kwargs) + + def _deserialize(self, data: Any) -> Union[datetime, date]: + if isinstance(data, str): + try: + data = parser.parse(data) + except Exception as e: + raise ValidationException( + f"Could not parse date from the value ({data!r})", e + ) + # we treat the yyyy-MM-dd format as a special case + if hasattr(self, "format") and self.format == "yyyy-MM-dd": + data = data.date() + + if isinstance(data, datetime): + if self._default_timezone and data.tzinfo is None: + data = data.replace(tzinfo=self._default_timezone) + return data + if isinstance(data, date): + return data + if isinstance(data, int): + # Divide by a float to preserve milliseconds on the datetime. + return datetime.utcfromtimestamp(data / 1000.0) + + raise ValidationException(f"Could not parse date from the value ({data!r})") + + +class Text(Field): + _param_defs = { + "fields": {"type": "field", "hash": True}, + "analyzer": {"type": "analyzer"}, + "search_analyzer": {"type": "analyzer"}, + "search_quote_analyzer": {"type": "analyzer"}, + } + name = "text" + + +class SearchAsYouType(Field): + _param_defs = { + "analyzer": {"type": "analyzer"}, + "search_analyzer": {"type": "analyzer"}, + "search_quote_analyzer": {"type": "analyzer"}, + } + name = "search_as_you_type" + + +class Keyword(Field): + _param_defs = { + "fields": {"type": "field", "hash": True}, + "search_analyzer": {"type": "analyzer"}, + "normalizer": {"type": "normalizer"}, + } + name = "keyword" + + +class ConstantKeyword(Keyword): + name = "constant_keyword" + + +class Boolean(Field): + name = "boolean" + _coerce = True + + def _deserialize(self, data: Any) -> bool: + if data == "false": + return False + return bool(data) + + def clean(self, data: Any) -> Optional[bool]: + if data is not None: + data = self.deserialize(data) + if data is None and self._required: + raise ValidationException("Value required for this field.") + return data # type: ignore + + +class Float(Field): + name = "float" + _coerce = True + + def _deserialize(self, data: Any) -> float: + return float(data) + + +class DenseVector(Field): + name = "dense_vector" + _coerce = True + + def __init__(self, **kwargs: Any): + self._element_type = kwargs.get("element_type", "float") + if self._element_type in ["float", "byte"]: + kwargs["multi"] = True + super().__init__(**kwargs) + + def _deserialize(self, data: Any) -> Any: + if self._element_type == "float": + return float(data) + elif self._element_type == "byte": + return int(data) + return data + + +class SparseVector(Field): + name = "sparse_vector" + + +class HalfFloat(Float): + name = "half_float" + + +class ScaledFloat(Float): + name = "scaled_float" + + def __init__(self, scaling_factor: int, *args: Any, **kwargs: Any): + super().__init__(scaling_factor=scaling_factor, *args, **kwargs) + + +class Double(Float): + name = "double" + + +class RankFeature(Float): + name = "rank_feature" + + +class RankFeatures(Field): + name = "rank_features" + + +class Integer(Field): + name = "integer" + _coerce = True + + def _deserialize(self, data: Any) -> int: + return int(data) + + +class Byte(Integer): + name = "byte" + + +class Short(Integer): + name = "short" + + +class Long(Integer): + name = "long" + + +class Ip(Field): + name = "ip" + _coerce = True + + def _deserialize(self, data: Any) -> Union["IPv4Address", "IPv6Address"]: + # the ipaddress library for pypy only accepts unicode. + return ipaddress.ip_address(unicode(data)) + + def _serialize(self, data: Any) -> Optional[str]: + if data is None: + return None + return str(data) + + +class Binary(Field): + name = "binary" + _coerce = True + + def clean(self, data: str) -> str: + # Binary fields are opaque, so there's not much cleaning + # that can be done. + return data + + def _deserialize(self, data: Any) -> bytes: + return base64.b64decode(data) + + def _serialize(self, data: Any) -> Optional[str]: + if data is None: + return None + return base64.b64encode(data).decode() + + +class Point(Field): + name = "point" + + +class Shape(Field): + name = "shape" + + +class GeoPoint(Field): + name = "geo_point" + + +class GeoShape(Field): + name = "geo_shape" + + +class Completion(Field): + _param_defs = { + "analyzer": {"type": "analyzer"}, + "search_analyzer": {"type": "analyzer"}, + } + name = "completion" + + +class Percolator(Field): + name = "percolator" + _coerce = True + + def _deserialize(self, data: Any) -> "Query": + return Q(data) # type: ignore + + def _serialize(self, data: Any) -> Optional[Dict[str, Any]]: + if data is None: + return None + return data.to_dict() # type: ignore + + +class RangeField(Field): + _coerce = True + _core_field: Optional[Field] = None + + def _deserialize(self, data: Any) -> Range["_SupportsComparison"]: + if isinstance(data, Range): + return data + data = {k: self._core_field.deserialize(v) for k, v in data.items()} # type: ignore + return Range(data) + + def _serialize(self, data: Any) -> Optional[Dict[str, Any]]: + if data is None: + return None + if not isinstance(data, collections.abc.Mapping): + data = data.to_dict() + return {k: self._core_field.serialize(v) for k, v in data.items()} # type: ignore + + +class IntegerRange(RangeField): + name = "integer_range" + _core_field = Integer() + + +class FloatRange(RangeField): + name = "float_range" + _core_field = Float() + + +class LongRange(RangeField): + name = "long_range" + _core_field = Long() + + +class DoubleRange(RangeField): + name = "double_range" + _core_field = Double() + + +class DateRange(RangeField): + name = "date_range" + _core_field = Date() + + +class IpRange(Field): + # not a RangeField since ip_range supports CIDR ranges + name = "ip_range" + + +class Join(Field): + name = "join" + + +class TokenCount(Field): + name = "token_count" + + +class Murmur3(Field): + name = "murmur3" + + +class SemanticText(Field): + name = "semantic_text" diff --git a/elasticsearch/dsl/function.py b/elasticsearch/dsl/function.py new file mode 100644 index 000000000..9744e6f8b --- /dev/null +++ b/elasticsearch/dsl/function.py @@ -0,0 +1,180 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from copy import deepcopy +from typing import ( + Any, + ClassVar, + Dict, + Literal, + MutableMapping, + Optional, + Union, + overload, +) + +from elastic_transport.client_utils import DEFAULT, DefaultType + +from .utils import AttrDict, DslBase + + +@overload +def SF(name_or_sf: MutableMapping[str, Any]) -> "ScoreFunction": ... + + +@overload +def SF(name_or_sf: "ScoreFunction") -> "ScoreFunction": ... + + +@overload +def SF(name_or_sf: str, **params: Any) -> "ScoreFunction": ... + + +def SF( + name_or_sf: Union[str, "ScoreFunction", MutableMapping[str, Any]], + **params: Any, +) -> "ScoreFunction": + # {"script_score": {"script": "_score"}, "filter": {}} + if isinstance(name_or_sf, collections.abc.MutableMapping): + if params: + raise ValueError("SF() cannot accept parameters when passing in a dict.") + + kwargs: Dict[str, Any] = {} + sf = deepcopy(name_or_sf) + for k in ScoreFunction._param_defs: + if k in name_or_sf: + kwargs[k] = sf.pop(k) + + # not sf, so just filter+weight, which used to be boost factor + sf_params = params + if not sf: + name = "boost_factor" + # {'FUNCTION': {...}} + elif len(sf) == 1: + name, sf_params = sf.popitem() + else: + raise ValueError(f"SF() got an unexpected fields in the dictionary: {sf!r}") + + # boost factor special case, see elasticsearch #6343 + if not isinstance(sf_params, collections.abc.Mapping): + sf_params = {"value": sf_params} + + # mix known params (from _param_defs) and from inside the function + kwargs.update(sf_params) + return ScoreFunction.get_dsl_class(name)(**kwargs) + + # ScriptScore(script="_score", filter=Q()) + if isinstance(name_or_sf, ScoreFunction): + if params: + raise ValueError( + "SF() cannot accept parameters when passing in a ScoreFunction object." + ) + return name_or_sf + + # "script_score", script="_score", filter=Q() + return ScoreFunction.get_dsl_class(name_or_sf)(**params) + + +class ScoreFunction(DslBase): + _type_name = "score_function" + _type_shortcut = staticmethod(SF) + _param_defs = { + "query": {"type": "query"}, + "filter": {"type": "query"}, + "weight": {}, + } + name: ClassVar[Optional[str]] = None + + def to_dict(self) -> Dict[str, Any]: + d = super().to_dict() + # filter and query dicts should be at the same level as us + for k in self._param_defs: + if self.name is not None: + val = d[self.name] + if isinstance(val, dict) and k in val: + d[k] = val.pop(k) + return d + + +class ScriptScore(ScoreFunction): + name = "script_score" + + +class BoostFactor(ScoreFunction): + name = "boost_factor" + + def to_dict(self) -> Dict[str, Any]: + d = super().to_dict() + if self.name is not None: + val = d[self.name] + if isinstance(val, dict): + if "value" in val: + d[self.name] = val.pop("value") + else: + del d[self.name] + return d + + +class RandomScore(ScoreFunction): + name = "random_score" + + +class FieldValueFactorScore(ScoreFunction): + name = "field_value_factor" + + +class FieldValueFactor(FieldValueFactorScore): # alias of the above + pass + + +class Linear(ScoreFunction): + name = "linear" + + +class Gauss(ScoreFunction): + name = "gauss" + + +class Exp(ScoreFunction): + name = "exp" + + +class DecayFunction(AttrDict[Any]): + def __init__( + self, + *, + decay: Union[float, "DefaultType"] = DEFAULT, + offset: Any = DEFAULT, + scale: Any = DEFAULT, + origin: Any = DEFAULT, + multi_value_mode: Union[ + Literal["min", "max", "avg", "sum"], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if decay != DEFAULT: + kwargs["decay"] = decay + if offset != DEFAULT: + kwargs["offset"] = offset + if scale != DEFAULT: + kwargs["scale"] = scale + if origin != DEFAULT: + kwargs["origin"] = origin + if multi_value_mode != DEFAULT: + kwargs["multi_value_mode"] = multi_value_mode + super().__init__(kwargs) diff --git a/elasticsearch/dsl/index.py b/elasticsearch/dsl/index.py new file mode 100644 index 000000000..368e58d42 --- /dev/null +++ b/elasticsearch/dsl/index.py @@ -0,0 +1,23 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ._async.index import ( # noqa: F401 + AsyncComposableIndexTemplate, + AsyncIndex, + AsyncIndexTemplate, +) +from ._sync.index import ComposableIndexTemplate, Index, IndexTemplate # noqa: F401 diff --git a/elasticsearch/dsl/index_base.py b/elasticsearch/dsl/index_base.py new file mode 100644 index 000000000..71ff50339 --- /dev/null +++ b/elasticsearch/dsl/index_base.py @@ -0,0 +1,178 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple + +from typing_extensions import Self + +from . import analysis +from .utils import AnyUsingType, merge + +if TYPE_CHECKING: + from .document_base import DocumentMeta + from .field import Field + from .mapping_base import MappingBase + + +class IndexBase: + def __init__(self, name: str, mapping_class: type, using: AnyUsingType = "default"): + """ + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + self._name = name + self._doc_types: List["DocumentMeta"] = [] + self._using = using + self._settings: Dict[str, Any] = {} + self._aliases: Dict[str, Any] = {} + self._analysis: Dict[str, Any] = {} + self._mapping_class = mapping_class + self._mapping: Optional["MappingBase"] = None + + def resolve_nested( + self, field_path: str + ) -> Tuple[List[str], Optional["MappingBase"]]: + for doc in self._doc_types: + nested, field = doc._doc_type.mapping.resolve_nested(field_path) + if field is not None: + return nested, field + if self._mapping: + return self._mapping.resolve_nested(field_path) + return [], None + + def resolve_field(self, field_path: str) -> Optional["Field"]: + for doc in self._doc_types: + field = doc._doc_type.mapping.resolve_field(field_path) + if field is not None: + return field + if self._mapping: + return self._mapping.resolve_field(field_path) + return None + + def get_or_create_mapping(self) -> "MappingBase": + if self._mapping is None: + self._mapping = self._mapping_class() + return self._mapping + + def mapping(self, mapping: "MappingBase") -> None: + """ + Associate a mapping (an instance of + :class:`~elasticsearch.dsl.Mapping`) with this index. + This means that, when this index is created, it will contain the + mappings for the document type defined by those mappings. + """ + self.get_or_create_mapping().update(mapping) + + def document(self, document: "DocumentMeta") -> "DocumentMeta": + """ + Associate a :class:`~elasticsearch.dsl.Document` subclass with an index. + This means that, when this index is created, it will contain the + mappings for the ``Document``. If the ``Document`` class doesn't have a + default index yet (by defining ``class Index``), this instance will be + used. Can be used as a decorator:: + + i = Index('blog') + + @i.document + class Post(Document): + title = Text() + + # create the index, including Post mappings + i.create() + + # .search() will now return a Search object that will return + # properly deserialized Post instances + s = i.search() + """ + self._doc_types.append(document) + + # If the document index does not have any name, that means the user + # did not set any index already to the document. + # So set this index as document index + if document._index._name is None: + document._index = self + + return document + + def settings(self, **kwargs: Any) -> Self: + """ + Add settings to the index:: + + i = Index('i') + i.settings(number_of_shards=1, number_of_replicas=0) + + Multiple calls to ``settings`` will merge the keys, later overriding + the earlier. + """ + self._settings.update(kwargs) + return self + + def aliases(self, **kwargs: Any) -> Self: + """ + Add aliases to the index definition:: + + i = Index('blog-v2') + i.aliases(blog={}, published={'filter': Q('term', published=True)}) + """ + self._aliases.update(kwargs) + return self + + def analyzer(self, *args: Any, **kwargs: Any) -> None: + """ + Explicitly add an analyzer to an index. Note that all custom analyzers + defined in mappings will also be created. This is useful for search analyzers. + + Example:: + + from elasticsearch.dsl import analyzer, tokenizer + + my_analyzer = analyzer('my_analyzer', + tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), + filter=['lowercase'] + ) + + i = Index('blog') + i.analyzer(my_analyzer) + + """ + analyzer = analysis.analyzer(*args, **kwargs) + d = analyzer.get_analysis_definition() + # empty custom analyzer, probably already defined out of our control + if not d: + return + + # merge the definition + merge(self._analysis, d, True) + + def to_dict(self) -> Dict[str, Any]: + out = {} + if self._settings: + out["settings"] = self._settings + if self._aliases: + out["aliases"] = self._aliases + mappings = self._mapping.to_dict() if self._mapping else {} + analysis = self._mapping._collect_analysis() if self._mapping else {} + for d in self._doc_types: + mapping = d._doc_type.mapping + merge(mappings, mapping.to_dict(), True) + merge(analysis, mapping._collect_analysis(), True) + if mappings: + out["mappings"] = mappings + if analysis or self._analysis: + merge(analysis, self._analysis) + out.setdefault("settings", {})["analysis"] = analysis + return out diff --git a/elasticsearch/dsl/mapping.py b/elasticsearch/dsl/mapping.py new file mode 100644 index 000000000..e39dd0490 --- /dev/null +++ b/elasticsearch/dsl/mapping.py @@ -0,0 +1,19 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ._async.mapping import AsyncMapping # noqa: F401 +from ._sync.mapping import Mapping # noqa: F401 diff --git a/elasticsearch/dsl/mapping_base.py b/elasticsearch/dsl/mapping_base.py new file mode 100644 index 000000000..658cf6cfc --- /dev/null +++ b/elasticsearch/dsl/mapping_base.py @@ -0,0 +1,219 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from itertools import chain +from typing import Any, Dict, Iterator, List, Optional, Tuple, cast + +from typing_extensions import Self + +from .field import Field, Nested, Text, construct_field +from .utils import DslBase + +META_FIELDS = frozenset( + ( + "dynamic", + "transform", + "dynamic_date_formats", + "date_detection", + "numeric_detection", + "dynamic_templates", + "enabled", + ) +) + + +class Properties(DslBase): + name = "properties" + _param_defs = {"properties": {"type": "field", "hash": True}} + + properties: Dict[str, Field] + + def __init__(self) -> None: + super().__init__() + + def __repr__(self) -> str: + return "Properties()" + + def __getitem__(self, name: str) -> Field: + return self.properties[name] + + def __contains__(self, name: str) -> bool: + return name in self.properties + + def to_dict(self) -> Dict[str, Any]: + return cast(Dict[str, Field], super().to_dict()["properties"]) + + def field(self, name: str, *args: Any, **kwargs: Any) -> Self: + self.properties[name] = construct_field(*args, **kwargs) + return self + + def _collect_fields(self) -> Iterator[Field]: + """Iterate over all Field objects within, including multi fields.""" + fields = cast(Dict[str, Field], self.properties.to_dict()) # type: ignore + for f in fields.values(): + yield f + # multi fields + if hasattr(f, "fields"): + yield from f.fields.to_dict().values() + # nested and inner objects + if hasattr(f, "_collect_fields"): + yield from f._collect_fields() + + def update(self, other_object: Any) -> None: + if not hasattr(other_object, "properties"): + # not an inner/nested object, no merge possible + return + + our, other = self.properties, other_object.properties + for name in other: + if name in our: + if hasattr(our[name], "update"): + our[name].update(other[name]) + continue + our[name] = other[name] + + +class MappingBase: + def __init__(self) -> None: + self.properties = Properties() + self._meta: Dict[str, Any] = {} + + def __repr__(self) -> str: + return "Mapping()" + + def _clone(self) -> Self: + m = self.__class__() + m.properties._params = self.properties._params.copy() + return m + + def resolve_nested( + self, field_path: str + ) -> Tuple[List[str], Optional["MappingBase"]]: + field = self + nested = [] + parts = field_path.split(".") + for i, step in enumerate(parts): + try: + field = field[step] # type: ignore[assignment] + except KeyError: + return [], None + if isinstance(field, Nested): + nested.append(".".join(parts[: i + 1])) + return nested, field + + def resolve_field(self, field_path: str) -> Optional[Field]: + field = self + for step in field_path.split("."): + try: + field = field[step] # type: ignore[assignment] + except KeyError: + return None + return cast(Field, field) + + def _collect_analysis(self) -> Dict[str, Any]: + analysis: Dict[str, Any] = {} + fields = [] + if "_all" in self._meta: + fields.append(Text(**self._meta["_all"])) + + for f in chain(fields, self.properties._collect_fields()): + for analyzer_name in ( + "analyzer", + "normalizer", + "search_analyzer", + "search_quote_analyzer", + ): + if not hasattr(f, analyzer_name): + continue + analyzer = getattr(f, analyzer_name) + d = analyzer.get_analysis_definition() + # empty custom analyzer, probably already defined out of our control + if not d: + continue + + # merge the definition + # TODO: conflict detection/resolution + for key in d: + analysis.setdefault(key, {}).update(d[key]) + + return analysis + + def _update_from_dict(self, raw: Dict[str, Any]) -> None: + for name, definition in raw.get("properties", {}).items(): + self.field(name, definition) + + # metadata like _all etc + for name, value in raw.items(): + if name != "properties": + if isinstance(value, collections.abc.Mapping): + self.meta(name, **value) + else: + self.meta(name, value) + + def update(self, mapping: "MappingBase", update_only: bool = False) -> None: + for name in mapping: + if update_only and name in self: + # nested and inner objects, merge recursively + if hasattr(self[name], "update"): + # FIXME only merge subfields, not the settings + self[name].update(mapping[name], update_only) + continue + self.field(name, mapping[name]) + + if update_only: + for name in mapping._meta: + if name not in self._meta: + self._meta[name] = mapping._meta[name] + else: + self._meta.update(mapping._meta) + + def __contains__(self, name: str) -> bool: + return name in self.properties.properties + + def __getitem__(self, name: str) -> Field: + return self.properties.properties[name] + + def __iter__(self) -> Iterator[str]: + return iter(self.properties.properties) + + def field(self, *args: Any, **kwargs: Any) -> Self: + self.properties.field(*args, **kwargs) + return self + + def meta(self, name: str, params: Any = None, **kwargs: Any) -> Self: + if not name.startswith("_") and name not in META_FIELDS: + name = "_" + name + + if params and kwargs: + raise ValueError("Meta configs cannot have both value and a dictionary.") + + self._meta[name] = kwargs if params is None else params + return self + + def to_dict(self) -> Dict[str, Any]: + meta = self._meta + + # hard coded serialization of analyzers in _all + if "_all" in meta: + meta = meta.copy() + _all = meta["_all"] = meta["_all"].copy() + for f in ("analyzer", "search_analyzer", "search_quote_analyzer"): + if hasattr(_all.get(f, None), "to_dict"): + _all[f] = _all[f].to_dict() + meta.update(self.properties.to_dict()) + return meta diff --git a/elasticsearch/dsl/py.typed b/elasticsearch/dsl/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/elasticsearch/dsl/query.py b/elasticsearch/dsl/query.py new file mode 100644 index 000000000..1b3d9f22b --- /dev/null +++ b/elasticsearch/dsl/query.py @@ -0,0 +1,2795 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from copy import deepcopy +from itertools import chain +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Dict, + List, + Literal, + Mapping, + MutableMapping, + Optional, + Protocol, + Sequence, + TypeVar, + Union, + cast, + overload, +) + +from elastic_transport.client_utils import DEFAULT + +# 'SF' looks unused but the test suite assumes it's available +# from this module so others are liable to do so as well. +from .function import SF # noqa: F401 +from .function import ScoreFunction +from .utils import DslBase + +if TYPE_CHECKING: + from elastic_transport.client_utils import DefaultType + + from . import types, wrappers + + from .document_base import InstrumentedField + +_T = TypeVar("_T") +_M = TypeVar("_M", bound=Mapping[str, Any]) + + +class QProxiedProtocol(Protocol[_T]): + _proxied: _T + + +@overload +def Q(name_or_query: MutableMapping[str, _M]) -> "Query": ... + + +@overload +def Q(name_or_query: "Query") -> "Query": ... + + +@overload +def Q(name_or_query: QProxiedProtocol[_T]) -> _T: ... + + +@overload +def Q(name_or_query: str = "match_all", **params: Any) -> "Query": ... + + +def Q( + name_or_query: Union[ + str, + "Query", + QProxiedProtocol[_T], + MutableMapping[str, _M], + ] = "match_all", + **params: Any, +) -> Union["Query", _T]: + # {"match": {"title": "python"}} + if isinstance(name_or_query, collections.abc.MutableMapping): + if params: + raise ValueError("Q() cannot accept parameters when passing in a dict.") + if len(name_or_query) != 1: + raise ValueError( + 'Q() can only accept dict with a single query ({"match": {...}}). ' + "Instead it got (%r)" % name_or_query + ) + name, q_params = deepcopy(name_or_query).popitem() + return Query.get_dsl_class(name)(_expand__to_dot=False, **q_params) + + # MatchAll() + if isinstance(name_or_query, Query): + if params: + raise ValueError( + "Q() cannot accept parameters when passing in a Query object." + ) + return name_or_query + + # s.query = Q('filtered', query=s.query) + if hasattr(name_or_query, "_proxied"): + return cast(QProxiedProtocol[_T], name_or_query)._proxied + + # "match", title="python" + return Query.get_dsl_class(name_or_query)(**params) + + +class Query(DslBase): + _type_name = "query" + _type_shortcut = staticmethod(Q) + name: ClassVar[Optional[str]] = None + + # Add type annotations for methods not defined in every subclass + __ror__: ClassVar[Callable[["Query", "Query"], "Query"]] + __radd__: ClassVar[Callable[["Query", "Query"], "Query"]] + __rand__: ClassVar[Callable[["Query", "Query"], "Query"]] + + def __add__(self, other: "Query") -> "Query": + # make sure we give queries that know how to combine themselves + # preference + if hasattr(other, "__radd__"): + return other.__radd__(self) + return Bool(must=[self, other]) + + def __invert__(self) -> "Query": + return Bool(must_not=[self]) + + def __or__(self, other: "Query") -> "Query": + # make sure we give queries that know how to combine themselves + # preference + if hasattr(other, "__ror__"): + return other.__ror__(self) + return Bool(should=[self, other]) + + def __and__(self, other: "Query") -> "Query": + # make sure we give queries that know how to combine themselves + # preference + if hasattr(other, "__rand__"): + return other.__rand__(self) + return Bool(must=[self, other]) + + +class Bool(Query): + """ + matches documents matching boolean combinations of other queries. + + :arg filter: The clause (query) must appear in matching documents. + However, unlike `must`, the score of the query will be ignored. + :arg minimum_should_match: Specifies the number or percentage of + `should` clauses returned documents must match. + :arg must: The clause (query) must appear in matching documents and + will contribute to the score. + :arg must_not: The clause (query) must not appear in the matching + documents. Because scoring is ignored, a score of `0` is returned + for all documents. + :arg should: The clause (query) should appear in the matching + document. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "bool" + _param_defs = { + "filter": {"type": "query", "multi": True}, + "must": {"type": "query", "multi": True}, + "must_not": {"type": "query", "multi": True}, + "should": {"type": "query", "multi": True}, + } + + def __init__( + self, + *, + filter: Union[Query, Sequence[Query], "DefaultType"] = DEFAULT, + minimum_should_match: Union[int, str, "DefaultType"] = DEFAULT, + must: Union[Query, Sequence[Query], "DefaultType"] = DEFAULT, + must_not: Union[Query, Sequence[Query], "DefaultType"] = DEFAULT, + should: Union[Query, Sequence[Query], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + filter=filter, + minimum_should_match=minimum_should_match, + must=must, + must_not=must_not, + should=should, + boost=boost, + _name=_name, + **kwargs, + ) + + def __add__(self, other: Query) -> "Bool": + q = self._clone() + if isinstance(other, Bool): + q.must += other.must + q.should += other.should + q.must_not += other.must_not + q.filter += other.filter + else: + q.must.append(other) + return q + + __radd__ = __add__ + + def __or__(self, other: Query) -> Query: + for q in (self, other): + if isinstance(q, Bool) and not any( + (q.must, q.must_not, q.filter, getattr(q, "minimum_should_match", None)) + ): + other = self if q is other else other + q = q._clone() + if isinstance(other, Bool) and not any( + ( + other.must, + other.must_not, + other.filter, + getattr(other, "minimum_should_match", None), + ) + ): + q.should.extend(other.should) + else: + q.should.append(other) + return q + + return Bool(should=[self, other]) + + __ror__ = __or__ + + @property + def _min_should_match(self) -> int: + return getattr( + self, + "minimum_should_match", + 0 if not self.should or (self.must or self.filter) else 1, + ) + + def __invert__(self) -> Query: + # Because an empty Bool query is treated like + # MatchAll the inverse should be MatchNone + if not any(chain(self.must, self.filter, self.should, self.must_not)): + return MatchNone() + + negations: List[Query] = [] + for q in chain(self.must, self.filter): + negations.append(~q) + + for q in self.must_not: + negations.append(q) + + if self.should and self._min_should_match: + negations.append(Bool(must_not=self.should[:])) + + if len(negations) == 1: + return negations[0] + return Bool(should=negations) + + def __and__(self, other: Query) -> Query: + q = self._clone() + if isinstance(other, Bool): + q.must += other.must + q.must_not += other.must_not + q.filter += other.filter + q.should = [] + + # reset minimum_should_match as it will get calculated below + if "minimum_should_match" in q._params: + del q._params["minimum_should_match"] + + for qx in (self, other): + min_should_match = qx._min_should_match + # TODO: percentages or negative numbers will fail here + # for now we report an error + if not isinstance(min_should_match, int) or min_should_match < 0: + raise ValueError( + "Can only combine queries with positive integer values for minimum_should_match" + ) + # all subqueries are required + if len(qx.should) <= min_should_match: + q.must.extend(qx.should) + # not all of them are required, use it and remember min_should_match + elif not q.should: + q.minimum_should_match = min_should_match + q.should = qx.should + # all queries are optional, just extend should + elif q._min_should_match == 0 and min_should_match == 0: + q.should.extend(qx.should) + # not all are required, add a should list to the must with proper min_should_match + else: + q.must.append( + Bool(should=qx.should, minimum_should_match=min_should_match) + ) + else: + if not (q.must or q.filter) and q.should: + q._params.setdefault("minimum_should_match", 1) + q.must.append(other) + return q + + __rand__ = __and__ + + +class Boosting(Query): + """ + Returns documents matching a `positive` query while reducing the + relevance score of documents that also match a `negative` query. + + :arg negative_boost: (required) Floating point number between 0 and + 1.0 used to decrease the relevance scores of documents matching + the `negative` query. + :arg negative: (required) Query used to decrease the relevance score + of matching documents. + :arg positive: (required) Any returned documents must match this + query. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "boosting" + _param_defs = { + "negative": {"type": "query"}, + "positive": {"type": "query"}, + } + + def __init__( + self, + *, + negative_boost: Union[float, "DefaultType"] = DEFAULT, + negative: Union[Query, "DefaultType"] = DEFAULT, + positive: Union[Query, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + negative_boost=negative_boost, + negative=negative, + positive=positive, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Common(Query): + """ + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "common" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.CommonTermsQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class CombinedFields(Query): + """ + The `combined_fields` query supports searching multiple text fields as + if their contents had been indexed into one combined field. + + :arg fields: (required) List of fields to search. Field wildcard + patterns are allowed. Only `text` fields are supported, and they + must all have the same search `analyzer`. + :arg query: (required) Text to search for in the provided `fields`. + The `combined_fields` query analyzes the provided text before + performing a search. + :arg auto_generate_synonyms_phrase_query: If true, match phrase + queries are automatically created for multi-term synonyms. + Defaults to `True` if omitted. + :arg operator: Boolean logic used to interpret text in the query + value. Defaults to `or` if omitted. + :arg minimum_should_match: Minimum number of clauses that must match + for a document to be returned. + :arg zero_terms_query: Indicates whether no documents are returned if + the analyzer removes all tokens, such as when using a `stop` + filter. Defaults to `none` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "combined_fields" + + def __init__( + self, + *, + fields: Union[ + Sequence[Union[str, "InstrumentedField"]], "DefaultType" + ] = DEFAULT, + query: Union[str, "DefaultType"] = DEFAULT, + auto_generate_synonyms_phrase_query: Union[bool, "DefaultType"] = DEFAULT, + operator: Union[Literal["or", "and"], "DefaultType"] = DEFAULT, + minimum_should_match: Union[int, str, "DefaultType"] = DEFAULT, + zero_terms_query: Union[Literal["none", "all"], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + fields=fields, + query=query, + auto_generate_synonyms_phrase_query=auto_generate_synonyms_phrase_query, + operator=operator, + minimum_should_match=minimum_should_match, + zero_terms_query=zero_terms_query, + boost=boost, + _name=_name, + **kwargs, + ) + + +class ConstantScore(Query): + """ + Wraps a filter query and returns every matching document with a + relevance score equal to the `boost` parameter value. + + :arg filter: (required) Filter query you wish to run. Any returned + documents must match this query. Filter queries do not calculate + relevance scores. To speed up performance, Elasticsearch + automatically caches frequently used filter queries. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "constant_score" + _param_defs = { + "filter": {"type": "query"}, + } + + def __init__( + self, + *, + filter: Union[Query, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(filter=filter, boost=boost, _name=_name, **kwargs) + + +class DisMax(Query): + """ + Returns documents matching one or more wrapped queries, called query + clauses or clauses. If a returned document matches multiple query + clauses, the `dis_max` query assigns the document the highest + relevance score from any matching clause, plus a tie breaking + increment for any additional matching subqueries. + + :arg queries: (required) One or more query clauses. Returned documents + must match one or more of these queries. If a document matches + multiple queries, Elasticsearch uses the highest relevance score. + :arg tie_breaker: Floating point number between 0 and 1.0 used to + increase the relevance scores of documents matching multiple query + clauses. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "dis_max" + _param_defs = { + "queries": {"type": "query", "multi": True}, + } + + def __init__( + self, + *, + queries: Union[Sequence[Query], "DefaultType"] = DEFAULT, + tie_breaker: Union[float, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + queries=queries, tie_breaker=tie_breaker, boost=boost, _name=_name, **kwargs + ) + + +class DistanceFeature(Query): + """ + Boosts the relevance score of documents closer to a provided origin + date or point. For example, you can use this query to give more weight + to documents closer to a certain date or location. + + :arg origin: (required) Date or point of origin used to calculate + distances. If the `field` value is a `date` or `date_nanos` field, + the `origin` value must be a date. Date Math, such as `now-1h`, is + supported. If the field value is a `geo_point` field, the `origin` + value must be a geopoint. + :arg pivot: (required) Distance from the `origin` at which relevance + scores receive half of the `boost` value. If the `field` value is + a `date` or `date_nanos` field, the `pivot` value must be a time + unit, such as `1h` or `10d`. If the `field` value is a `geo_point` + field, the `pivot` value must be a distance unit, such as `1km` or + `12m`. + :arg field: (required) Name of the field used to calculate distances. + This field must meet the following criteria: be a `date`, + `date_nanos` or `geo_point` field; have an `index` mapping + parameter value of `true`, which is the default; have an + `doc_values` mapping parameter value of `true`, which is the + default. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "distance_feature" + + def __init__( + self, + *, + origin: Any = DEFAULT, + pivot: Any = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + origin=origin, pivot=pivot, field=field, boost=boost, _name=_name, **kwargs + ) + + +class Exists(Query): + """ + Returns documents that contain an indexed value for a field. + + :arg field: (required) Name of the field you wish to search. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "exists" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(field=field, boost=boost, _name=_name, **kwargs) + + +class FunctionScore(Query): + """ + The `function_score` enables you to modify the score of documents that + are retrieved by a query. + + :arg boost_mode: Defines how he newly computed score is combined with + the score of the query Defaults to `multiply` if omitted. + :arg functions: One or more functions that compute a new score for + each document returned by the query. + :arg max_boost: Restricts the new score to not exceed the provided + limit. + :arg min_score: Excludes documents that do not meet the provided score + threshold. + :arg query: A query that determines the documents for which a new + score is computed. + :arg score_mode: Specifies how the computed scores are combined + Defaults to `multiply` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "function_score" + _param_defs = { + "functions": {"type": "score_function", "multi": True}, + "query": {"type": "query"}, + "filter": {"type": "query"}, + } + + def __init__( + self, + *, + boost_mode: Union[ + Literal["multiply", "replace", "sum", "avg", "max", "min"], "DefaultType" + ] = DEFAULT, + functions: Union[Sequence[ScoreFunction], "DefaultType"] = DEFAULT, + max_boost: Union[float, "DefaultType"] = DEFAULT, + min_score: Union[float, "DefaultType"] = DEFAULT, + query: Union[Query, "DefaultType"] = DEFAULT, + score_mode: Union[ + Literal["multiply", "sum", "avg", "first", "max", "min"], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if functions is DEFAULT: + functions = [] + for name in ScoreFunction._classes: + if name in kwargs: + functions.append({name: kwargs.pop(name)}) # type: ignore + super().__init__( + boost_mode=boost_mode, + functions=functions, + max_boost=max_boost, + min_score=min_score, + query=query, + score_mode=score_mode, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Fuzzy(Query): + """ + Returns documents that contain terms similar to the search term, as + measured by a Levenshtein edit distance. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "fuzzy" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.FuzzyQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class GeoBoundingBox(Query): + """ + Matches geo_point and geo_shape values that intersect a bounding box. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg type: + :arg validation_method: Set to `IGNORE_MALFORMED` to accept geo points + with invalid latitude or longitude. Set to `COERCE` to also try to + infer correct latitude or longitude. Defaults to `'strict'` if + omitted. + :arg ignore_unmapped: Set to `true` to ignore an unmapped field and + not match any documents for this query. Set to `false` to throw an + exception if the field is not mapped. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "geo_bounding_box" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.CoordsGeoBounds", + "types.TopLeftBottomRightGeoBounds", + "types.TopRightBottomLeftGeoBounds", + "types.WktGeoBounds", + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + *, + type: Union[Literal["memory", "indexed"], "DefaultType"] = DEFAULT, + validation_method: Union[ + Literal["coerce", "ignore_malformed", "strict"], "DefaultType" + ] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__( + type=type, + validation_method=validation_method, + ignore_unmapped=ignore_unmapped, + boost=boost, + _name=_name, + **kwargs, + ) + + +class GeoDistance(Query): + """ + Matches `geo_point` and `geo_shape` values within a given distance of + a geopoint. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg distance: (required) The radius of the circle centred on the + specified location. Points which fall into this circle are + considered to be matches. + :arg distance_type: How to compute the distance. Set to `plane` for a + faster calculation that's inaccurate on long distances and close + to the poles. Defaults to `'arc'` if omitted. + :arg validation_method: Set to `IGNORE_MALFORMED` to accept geo points + with invalid latitude or longitude. Set to `COERCE` to also try to + infer correct latitude or longitude. Defaults to `'strict'` if + omitted. + :arg ignore_unmapped: Set to `true` to ignore an unmapped field and + not match any documents for this query. Set to `false` to throw an + exception if the field is not mapped. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "geo_distance" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.LatLonGeoLocation", + "types.GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + *, + distance: Union[str, "DefaultType"] = DEFAULT, + distance_type: Union[Literal["arc", "plane"], "DefaultType"] = DEFAULT, + validation_method: Union[ + Literal["coerce", "ignore_malformed", "strict"], "DefaultType" + ] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__( + distance=distance, + distance_type=distance_type, + validation_method=validation_method, + ignore_unmapped=ignore_unmapped, + boost=boost, + _name=_name, + **kwargs, + ) + + +class GeoPolygon(Query): + """ + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg validation_method: Defaults to `'strict'` if omitted. + :arg ignore_unmapped: + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "geo_polygon" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.GeoPolygonPoints", Dict[str, Any], "DefaultType" + ] = DEFAULT, + *, + validation_method: Union[ + Literal["coerce", "ignore_malformed", "strict"], "DefaultType" + ] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__( + validation_method=validation_method, + ignore_unmapped=ignore_unmapped, + boost=boost, + _name=_name, + **kwargs, + ) + + +class GeoShape(Query): + """ + Filter documents indexed using either the `geo_shape` or the + `geo_point` type. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg ignore_unmapped: Set to `true` to ignore an unmapped field and + not match any documents for this query. Set to `false` to throw an + exception if the field is not mapped. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "geo_shape" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.GeoShapeFieldQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + *, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__( + ignore_unmapped=ignore_unmapped, boost=boost, _name=_name, **kwargs + ) + + +class HasChild(Query): + """ + Returns parent documents whose joined child documents match a provided + query. + + :arg query: (required) Query you wish to run on child documents of the + `type` field. If a child document matches the search, the query + returns the parent document. + :arg type: (required) Name of the child relationship mapped for the + `join` field. + :arg ignore_unmapped: Indicates whether to ignore an unmapped `type` + and not return any documents instead of an error. + :arg inner_hits: If defined, each search hit will contain inner hits. + :arg max_children: Maximum number of child documents that match the + query allowed for a returned parent document. If the parent + document exceeds this limit, it is excluded from the search + results. + :arg min_children: Minimum number of child documents that match the + query required to match the query for a returned parent document. + If the parent document does not meet this limit, it is excluded + from the search results. + :arg score_mode: Indicates how scores for matching child documents + affect the root parent document’s relevance score. Defaults to + `'none'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "has_child" + _param_defs = { + "query": {"type": "query"}, + } + + def __init__( + self, + *, + query: Union[Query, "DefaultType"] = DEFAULT, + type: Union[str, "DefaultType"] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + inner_hits: Union["types.InnerHits", Dict[str, Any], "DefaultType"] = DEFAULT, + max_children: Union[int, "DefaultType"] = DEFAULT, + min_children: Union[int, "DefaultType"] = DEFAULT, + score_mode: Union[ + Literal["none", "avg", "sum", "max", "min"], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + query=query, + type=type, + ignore_unmapped=ignore_unmapped, + inner_hits=inner_hits, + max_children=max_children, + min_children=min_children, + score_mode=score_mode, + boost=boost, + _name=_name, + **kwargs, + ) + + +class HasParent(Query): + """ + Returns child documents whose joined parent document matches a + provided query. + + :arg parent_type: (required) Name of the parent relationship mapped + for the `join` field. + :arg query: (required) Query you wish to run on parent documents of + the `parent_type` field. If a parent document matches the search, + the query returns its child documents. + :arg ignore_unmapped: Indicates whether to ignore an unmapped + `parent_type` and not return any documents instead of an error. + You can use this parameter to query multiple indices that may not + contain the `parent_type`. + :arg inner_hits: If defined, each search hit will contain inner hits. + :arg score: Indicates whether the relevance score of a matching parent + document is aggregated into its child documents. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "has_parent" + _param_defs = { + "query": {"type": "query"}, + } + + def __init__( + self, + *, + parent_type: Union[str, "DefaultType"] = DEFAULT, + query: Union[Query, "DefaultType"] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + inner_hits: Union["types.InnerHits", Dict[str, Any], "DefaultType"] = DEFAULT, + score: Union[bool, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + parent_type=parent_type, + query=query, + ignore_unmapped=ignore_unmapped, + inner_hits=inner_hits, + score=score, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Ids(Query): + """ + Returns documents based on their IDs. This query uses document IDs + stored in the `_id` field. + + :arg values: An array of document IDs. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "ids" + + def __init__( + self, + *, + values: Union[str, Sequence[str], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(values=values, boost=boost, _name=_name, **kwargs) + + +class Intervals(Query): + """ + Returns documents based on the order and proximity of matching terms. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "intervals" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.IntervalsQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class Knn(Query): + """ + Finds the k nearest vectors to a query vector, as measured by a + similarity metric. knn query finds nearest vectors through approximate + search on indexed dense_vectors. + + :arg field: (required) The name of the vector field to search against + :arg query_vector: The query vector + :arg query_vector_builder: The query vector builder. You must provide + a query_vector_builder or query_vector, but not both. + :arg num_candidates: The number of nearest neighbor candidates to + consider per shard + :arg k: The final number of nearest neighbors to return as top hits + :arg filter: Filters for the kNN search query + :arg similarity: The minimum similarity for a vector to be considered + a match + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "knn" + _param_defs = { + "filter": {"type": "query", "multi": True}, + } + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + query_vector: Union[Sequence[float], "DefaultType"] = DEFAULT, + query_vector_builder: Union[ + "types.QueryVectorBuilder", Dict[str, Any], "DefaultType" + ] = DEFAULT, + num_candidates: Union[int, "DefaultType"] = DEFAULT, + k: Union[int, "DefaultType"] = DEFAULT, + filter: Union[Query, Sequence[Query], "DefaultType"] = DEFAULT, + similarity: Union[float, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + query_vector=query_vector, + query_vector_builder=query_vector_builder, + num_candidates=num_candidates, + k=k, + filter=filter, + similarity=similarity, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Match(Query): + """ + Returns documents that match a provided text, number, date or boolean + value. The provided text is analyzed before matching. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "match" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.MatchQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class MatchAll(Query): + """ + Matches all documents, giving them all a `_score` of 1.0. + + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "match_all" + + def __init__( + self, + *, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(boost=boost, _name=_name, **kwargs) + + def __add__(self, other: "Query") -> "Query": + return other._clone() + + __and__ = __rand__ = __radd__ = __add__ + + def __or__(self, other: "Query") -> "MatchAll": + return self + + __ror__ = __or__ + + def __invert__(self) -> "MatchNone": + return MatchNone() + + +EMPTY_QUERY = MatchAll() + + +class MatchBoolPrefix(Query): + """ + Analyzes its input and constructs a `bool` query from the terms. Each + term except the last is used in a `term` query. The last term is used + in a prefix query. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "match_bool_prefix" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.MatchBoolPrefixQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class MatchNone(Query): + """ + Matches no documents. + + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "match_none" + + def __init__( + self, + *, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(boost=boost, _name=_name, **kwargs) + + def __add__(self, other: "Query") -> "MatchNone": + return self + + __and__ = __rand__ = __radd__ = __add__ + + def __or__(self, other: "Query") -> "Query": + return other._clone() + + __ror__ = __or__ + + def __invert__(self) -> MatchAll: + return MatchAll() + + +class MatchPhrase(Query): + """ + Analyzes the text and creates a phrase query out of the analyzed text. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "match_phrase" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.MatchPhraseQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class MatchPhrasePrefix(Query): + """ + Returns documents that contain the words of a provided text, in the + same order as provided. The last term of the provided text is treated + as a prefix, matching any words that begin with that term. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "match_phrase_prefix" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.MatchPhrasePrefixQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class MoreLikeThis(Query): + """ + Returns documents that are "like" a given set of documents. + + :arg like: (required) Specifies free form text and/or a single or + multiple documents for which you want to find similar documents. + :arg analyzer: The analyzer that is used to analyze the free form + text. Defaults to the analyzer associated with the first field in + fields. + :arg boost_terms: Each term in the formed query could be further + boosted by their tf-idf score. This sets the boost factor to use + when using this feature. Defaults to deactivated (0). + :arg fail_on_unsupported_field: Controls whether the query should fail + (throw an exception) if any of the specified fields are not of the + supported types (`text` or `keyword`). Defaults to `True` if + omitted. + :arg fields: A list of fields to fetch and analyze the text from. + Defaults to the `index.query.default_field` index setting, which + has a default value of `*`. + :arg include: Specifies whether the input documents should also be + included in the search results returned. + :arg max_doc_freq: The maximum document frequency above which the + terms are ignored from the input document. + :arg max_query_terms: The maximum number of query terms that can be + selected. Defaults to `25` if omitted. + :arg max_word_length: The maximum word length above which the terms + are ignored. Defaults to unbounded (`0`). + :arg min_doc_freq: The minimum document frequency below which the + terms are ignored from the input document. Defaults to `5` if + omitted. + :arg minimum_should_match: After the disjunctive query has been + formed, this parameter controls the number of terms that must + match. + :arg min_term_freq: The minimum term frequency below which the terms + are ignored from the input document. Defaults to `2` if omitted. + :arg min_word_length: The minimum word length below which the terms + are ignored. + :arg routing: + :arg stop_words: An array of stop words. Any word in this set is + ignored. + :arg unlike: Used in combination with `like` to exclude documents that + match a set of terms. + :arg version: + :arg version_type: Defaults to `'internal'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "more_like_this" + + def __init__( + self, + *, + like: Union[ + Union[str, "types.LikeDocument"], + Sequence[Union[str, "types.LikeDocument"]], + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + analyzer: Union[str, "DefaultType"] = DEFAULT, + boost_terms: Union[float, "DefaultType"] = DEFAULT, + fail_on_unsupported_field: Union[bool, "DefaultType"] = DEFAULT, + fields: Union[ + Sequence[Union[str, "InstrumentedField"]], "DefaultType" + ] = DEFAULT, + include: Union[bool, "DefaultType"] = DEFAULT, + max_doc_freq: Union[int, "DefaultType"] = DEFAULT, + max_query_terms: Union[int, "DefaultType"] = DEFAULT, + max_word_length: Union[int, "DefaultType"] = DEFAULT, + min_doc_freq: Union[int, "DefaultType"] = DEFAULT, + minimum_should_match: Union[int, str, "DefaultType"] = DEFAULT, + min_term_freq: Union[int, "DefaultType"] = DEFAULT, + min_word_length: Union[int, "DefaultType"] = DEFAULT, + routing: Union[str, "DefaultType"] = DEFAULT, + stop_words: Union[str, Sequence[str], "DefaultType"] = DEFAULT, + unlike: Union[ + Union[str, "types.LikeDocument"], + Sequence[Union[str, "types.LikeDocument"]], + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + version: Union[int, "DefaultType"] = DEFAULT, + version_type: Union[ + Literal["internal", "external", "external_gte", "force"], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + like=like, + analyzer=analyzer, + boost_terms=boost_terms, + fail_on_unsupported_field=fail_on_unsupported_field, + fields=fields, + include=include, + max_doc_freq=max_doc_freq, + max_query_terms=max_query_terms, + max_word_length=max_word_length, + min_doc_freq=min_doc_freq, + minimum_should_match=minimum_should_match, + min_term_freq=min_term_freq, + min_word_length=min_word_length, + routing=routing, + stop_words=stop_words, + unlike=unlike, + version=version, + version_type=version_type, + boost=boost, + _name=_name, + **kwargs, + ) + + +class MultiMatch(Query): + """ + Enables you to search for a provided text, number, date or boolean + value across multiple fields. The provided text is analyzed before + matching. + + :arg query: (required) Text, number, boolean value or date you wish to + find in the provided field. + :arg analyzer: Analyzer used to convert the text in the query value + into tokens. + :arg auto_generate_synonyms_phrase_query: If `true`, match phrase + queries are automatically created for multi-term synonyms. + Defaults to `True` if omitted. + :arg cutoff_frequency: + :arg fields: The fields to be queried. Defaults to the + `index.query.default_field` index settings, which in turn defaults + to `*`. + :arg fuzziness: Maximum edit distance allowed for matching. + :arg fuzzy_rewrite: Method used to rewrite the query. + :arg fuzzy_transpositions: If `true`, edits for fuzzy matching include + transpositions of two adjacent characters (for example, `ab` to + `ba`). Can be applied to the term subqueries constructed for all + terms but the final term. Defaults to `True` if omitted. + :arg lenient: If `true`, format-based errors, such as providing a text + query value for a numeric field, are ignored. + :arg max_expansions: Maximum number of terms to which the query will + expand. Defaults to `50` if omitted. + :arg minimum_should_match: Minimum number of clauses that must match + for a document to be returned. + :arg operator: Boolean logic used to interpret text in the query + value. Defaults to `'or'` if omitted. + :arg prefix_length: Number of beginning characters left unchanged for + fuzzy matching. + :arg slop: Maximum number of positions allowed between matching + tokens. + :arg tie_breaker: Determines how scores for each per-term blended + query and scores across groups are combined. + :arg type: How `the` multi_match query is executed internally. + Defaults to `'best_fields'` if omitted. + :arg zero_terms_query: Indicates whether no documents are returned if + the `analyzer` removes all tokens, such as when using a `stop` + filter. Defaults to `'none'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "multi_match" + + def __init__( + self, + *, + query: Union[str, "DefaultType"] = DEFAULT, + analyzer: Union[str, "DefaultType"] = DEFAULT, + auto_generate_synonyms_phrase_query: Union[bool, "DefaultType"] = DEFAULT, + cutoff_frequency: Union[float, "DefaultType"] = DEFAULT, + fields: Union[ + Union[str, "InstrumentedField"], + Sequence[Union[str, "InstrumentedField"]], + "DefaultType", + ] = DEFAULT, + fuzziness: Union[str, int, "DefaultType"] = DEFAULT, + fuzzy_rewrite: Union[str, "DefaultType"] = DEFAULT, + fuzzy_transpositions: Union[bool, "DefaultType"] = DEFAULT, + lenient: Union[bool, "DefaultType"] = DEFAULT, + max_expansions: Union[int, "DefaultType"] = DEFAULT, + minimum_should_match: Union[int, str, "DefaultType"] = DEFAULT, + operator: Union[Literal["and", "or"], "DefaultType"] = DEFAULT, + prefix_length: Union[int, "DefaultType"] = DEFAULT, + slop: Union[int, "DefaultType"] = DEFAULT, + tie_breaker: Union[float, "DefaultType"] = DEFAULT, + type: Union[ + Literal[ + "best_fields", + "most_fields", + "cross_fields", + "phrase", + "phrase_prefix", + "bool_prefix", + ], + "DefaultType", + ] = DEFAULT, + zero_terms_query: Union[Literal["all", "none"], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + query=query, + analyzer=analyzer, + auto_generate_synonyms_phrase_query=auto_generate_synonyms_phrase_query, + cutoff_frequency=cutoff_frequency, + fields=fields, + fuzziness=fuzziness, + fuzzy_rewrite=fuzzy_rewrite, + fuzzy_transpositions=fuzzy_transpositions, + lenient=lenient, + max_expansions=max_expansions, + minimum_should_match=minimum_should_match, + operator=operator, + prefix_length=prefix_length, + slop=slop, + tie_breaker=tie_breaker, + type=type, + zero_terms_query=zero_terms_query, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Nested(Query): + """ + Wraps another query to search nested fields. If an object matches the + search, the nested query returns the root parent document. + + :arg path: (required) Path to the nested object you wish to search. + :arg query: (required) Query you wish to run on nested objects in the + path. + :arg ignore_unmapped: Indicates whether to ignore an unmapped path and + not return any documents instead of an error. + :arg inner_hits: If defined, each search hit will contain inner hits. + :arg score_mode: How scores for matching child objects affect the root + parent document’s relevance score. Defaults to `'avg'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "nested" + _param_defs = { + "query": {"type": "query"}, + } + + def __init__( + self, + *, + path: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + query: Union[Query, "DefaultType"] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + inner_hits: Union["types.InnerHits", Dict[str, Any], "DefaultType"] = DEFAULT, + score_mode: Union[ + Literal["none", "avg", "sum", "max", "min"], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + path=path, + query=query, + ignore_unmapped=ignore_unmapped, + inner_hits=inner_hits, + score_mode=score_mode, + boost=boost, + _name=_name, + **kwargs, + ) + + +class ParentId(Query): + """ + Returns child documents joined to a specific parent document. + + :arg id: ID of the parent document. + :arg ignore_unmapped: Indicates whether to ignore an unmapped `type` + and not return any documents instead of an error. + :arg type: Name of the child relationship mapped for the `join` field. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "parent_id" + + def __init__( + self, + *, + id: Union[str, "DefaultType"] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + type: Union[str, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + id=id, + ignore_unmapped=ignore_unmapped, + type=type, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Percolate(Query): + """ + Matches queries stored in an index. + + :arg field: (required) Field that holds the indexed queries. The field + must use the `percolator` mapping type. + :arg document: The source of the document being percolated. + :arg documents: An array of sources of the documents being percolated. + :arg id: The ID of a stored document to percolate. + :arg index: The index of a stored document to percolate. + :arg name: The suffix used for the `_percolator_document_slot` field + when multiple `percolate` queries are specified. + :arg preference: Preference used to fetch document to percolate. + :arg routing: Routing used to fetch document to percolate. + :arg version: The expected version of a stored document to percolate. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "percolate" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + document: Any = DEFAULT, + documents: Union[Sequence[Any], "DefaultType"] = DEFAULT, + id: Union[str, "DefaultType"] = DEFAULT, + index: Union[str, "DefaultType"] = DEFAULT, + name: Union[str, "DefaultType"] = DEFAULT, + preference: Union[str, "DefaultType"] = DEFAULT, + routing: Union[str, "DefaultType"] = DEFAULT, + version: Union[int, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + document=document, + documents=documents, + id=id, + index=index, + name=name, + preference=preference, + routing=routing, + version=version, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Pinned(Query): + """ + Promotes selected documents to rank higher than those matching a given + query. + + :arg organic: (required) Any choice of query used to rank documents + which will be ranked below the "pinned" documents. + :arg ids: Document IDs listed in the order they are to appear in + results. Required if `docs` is not specified. + :arg docs: Documents listed in the order they are to appear in + results. Required if `ids` is not specified. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "pinned" + _param_defs = { + "organic": {"type": "query"}, + } + + def __init__( + self, + *, + organic: Union[Query, "DefaultType"] = DEFAULT, + ids: Union[Sequence[str], "DefaultType"] = DEFAULT, + docs: Union[ + Sequence["types.PinnedDoc"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + organic=organic, ids=ids, docs=docs, boost=boost, _name=_name, **kwargs + ) + + +class Prefix(Query): + """ + Returns documents that contain a specific prefix in a provided field. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "prefix" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.PrefixQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class QueryString(Query): + """ + Returns documents based on a provided query string, using a parser + with a strict syntax. + + :arg query: (required) Query string you wish to parse and use for + search. + :arg allow_leading_wildcard: If `true`, the wildcard characters `*` + and `?` are allowed as the first character of the query string. + Defaults to `True` if omitted. + :arg analyzer: Analyzer used to convert text in the query string into + tokens. + :arg analyze_wildcard: If `true`, the query attempts to analyze + wildcard terms in the query string. + :arg auto_generate_synonyms_phrase_query: If `true`, match phrase + queries are automatically created for multi-term synonyms. + Defaults to `True` if omitted. + :arg default_field: Default field to search if no field is provided in + the query string. Supports wildcards (`*`). Defaults to the + `index.query.default_field` index setting, which has a default + value of `*`. + :arg default_operator: Default boolean logic used to interpret text in + the query string if no operators are specified. Defaults to `'or'` + if omitted. + :arg enable_position_increments: If `true`, enable position increments + in queries constructed from a `query_string` search. Defaults to + `True` if omitted. + :arg escape: + :arg fields: Array of fields to search. Supports wildcards (`*`). + :arg fuzziness: Maximum edit distance allowed for fuzzy matching. + :arg fuzzy_max_expansions: Maximum number of terms to which the query + expands for fuzzy matching. Defaults to `50` if omitted. + :arg fuzzy_prefix_length: Number of beginning characters left + unchanged for fuzzy matching. + :arg fuzzy_rewrite: Method used to rewrite the query. + :arg fuzzy_transpositions: If `true`, edits for fuzzy matching include + transpositions of two adjacent characters (for example, `ab` to + `ba`). Defaults to `True` if omitted. + :arg lenient: If `true`, format-based errors, such as providing a text + value for a numeric field, are ignored. + :arg max_determinized_states: Maximum number of automaton states + required for the query. Defaults to `10000` if omitted. + :arg minimum_should_match: Minimum number of clauses that must match + for a document to be returned. + :arg phrase_slop: Maximum number of positions allowed between matching + tokens for phrases. + :arg quote_analyzer: Analyzer used to convert quoted text in the query + string into tokens. For quoted text, this parameter overrides the + analyzer specified in the `analyzer` parameter. + :arg quote_field_suffix: Suffix appended to quoted text in the query + string. You can use this suffix to use a different analysis method + for exact matches. + :arg rewrite: Method used to rewrite the query. + :arg tie_breaker: How to combine the queries generated from the + individual search terms in the resulting `dis_max` query. + :arg time_zone: Coordinated Universal Time (UTC) offset or IANA time + zone used to convert date values in the query string to UTC. + :arg type: Determines how the query matches and scores documents. + Defaults to `'best_fields'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "query_string" + + def __init__( + self, + *, + query: Union[str, "DefaultType"] = DEFAULT, + allow_leading_wildcard: Union[bool, "DefaultType"] = DEFAULT, + analyzer: Union[str, "DefaultType"] = DEFAULT, + analyze_wildcard: Union[bool, "DefaultType"] = DEFAULT, + auto_generate_synonyms_phrase_query: Union[bool, "DefaultType"] = DEFAULT, + default_field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + default_operator: Union[Literal["and", "or"], "DefaultType"] = DEFAULT, + enable_position_increments: Union[bool, "DefaultType"] = DEFAULT, + escape: Union[bool, "DefaultType"] = DEFAULT, + fields: Union[ + Sequence[Union[str, "InstrumentedField"]], "DefaultType" + ] = DEFAULT, + fuzziness: Union[str, int, "DefaultType"] = DEFAULT, + fuzzy_max_expansions: Union[int, "DefaultType"] = DEFAULT, + fuzzy_prefix_length: Union[int, "DefaultType"] = DEFAULT, + fuzzy_rewrite: Union[str, "DefaultType"] = DEFAULT, + fuzzy_transpositions: Union[bool, "DefaultType"] = DEFAULT, + lenient: Union[bool, "DefaultType"] = DEFAULT, + max_determinized_states: Union[int, "DefaultType"] = DEFAULT, + minimum_should_match: Union[int, str, "DefaultType"] = DEFAULT, + phrase_slop: Union[float, "DefaultType"] = DEFAULT, + quote_analyzer: Union[str, "DefaultType"] = DEFAULT, + quote_field_suffix: Union[str, "DefaultType"] = DEFAULT, + rewrite: Union[str, "DefaultType"] = DEFAULT, + tie_breaker: Union[float, "DefaultType"] = DEFAULT, + time_zone: Union[str, "DefaultType"] = DEFAULT, + type: Union[ + Literal[ + "best_fields", + "most_fields", + "cross_fields", + "phrase", + "phrase_prefix", + "bool_prefix", + ], + "DefaultType", + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + query=query, + allow_leading_wildcard=allow_leading_wildcard, + analyzer=analyzer, + analyze_wildcard=analyze_wildcard, + auto_generate_synonyms_phrase_query=auto_generate_synonyms_phrase_query, + default_field=default_field, + default_operator=default_operator, + enable_position_increments=enable_position_increments, + escape=escape, + fields=fields, + fuzziness=fuzziness, + fuzzy_max_expansions=fuzzy_max_expansions, + fuzzy_prefix_length=fuzzy_prefix_length, + fuzzy_rewrite=fuzzy_rewrite, + fuzzy_transpositions=fuzzy_transpositions, + lenient=lenient, + max_determinized_states=max_determinized_states, + minimum_should_match=minimum_should_match, + phrase_slop=phrase_slop, + quote_analyzer=quote_analyzer, + quote_field_suffix=quote_field_suffix, + rewrite=rewrite, + tie_breaker=tie_breaker, + time_zone=time_zone, + type=type, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Range(Query): + """ + Returns documents that contain terms within a provided range. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "range" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["wrappers.Range[Any]", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class RankFeature(Query): + """ + Boosts the relevance score of documents based on the numeric value of + a `rank_feature` or `rank_features` field. + + :arg field: (required) `rank_feature` or `rank_features` field used to + boost relevance scores. + :arg saturation: Saturation function used to boost relevance scores + based on the value of the rank feature `field`. + :arg log: Logarithmic function used to boost relevance scores based on + the value of the rank feature `field`. + :arg linear: Linear function used to boost relevance scores based on + the value of the rank feature `field`. + :arg sigmoid: Sigmoid function used to boost relevance scores based on + the value of the rank feature `field`. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "rank_feature" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + saturation: Union[ + "types.RankFeatureFunctionSaturation", Dict[str, Any], "DefaultType" + ] = DEFAULT, + log: Union[ + "types.RankFeatureFunctionLogarithm", Dict[str, Any], "DefaultType" + ] = DEFAULT, + linear: Union[ + "types.RankFeatureFunctionLinear", Dict[str, Any], "DefaultType" + ] = DEFAULT, + sigmoid: Union[ + "types.RankFeatureFunctionSigmoid", Dict[str, Any], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + saturation=saturation, + log=log, + linear=linear, + sigmoid=sigmoid, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Regexp(Query): + """ + Returns documents that contain terms matching a regular expression. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "regexp" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.RegexpQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class Rule(Query): + """ + :arg organic: (required) + :arg ruleset_ids: (required) + :arg match_criteria: (required) + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "rule" + _param_defs = { + "organic": {"type": "query"}, + } + + def __init__( + self, + *, + organic: Union[Query, "DefaultType"] = DEFAULT, + ruleset_ids: Union[Sequence[str], "DefaultType"] = DEFAULT, + match_criteria: Any = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + organic=organic, + ruleset_ids=ruleset_ids, + match_criteria=match_criteria, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Script(Query): + """ + Filters documents based on a provided script. The script query is + typically used in a filter context. + + :arg script: (required) Contains a script to run as a query. This + script must return a boolean value, `true` or `false`. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "script" + + def __init__( + self, + *, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(script=script, boost=boost, _name=_name, **kwargs) + + +class ScriptScore(Query): + """ + Uses a script to provide a custom score for returned documents. + + :arg query: (required) Query used to return documents. + :arg script: (required) Script used to compute the score of documents + returned by the query. Important: final relevance scores from the + `script_score` query cannot be negative. + :arg min_score: Documents with a score lower than this floating point + number are excluded from the search results. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "script_score" + _param_defs = { + "query": {"type": "query"}, + } + + def __init__( + self, + *, + query: Union[Query, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + min_score: Union[float, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + query=query, + script=script, + min_score=min_score, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Semantic(Query): + """ + A semantic query to semantic_text field types + + :arg field: (required) The field to query, which must be a + semantic_text field type + :arg query: (required) The query text + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "semantic" + + def __init__( + self, + *, + field: Union[str, "DefaultType"] = DEFAULT, + query: Union[str, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(field=field, query=query, boost=boost, _name=_name, **kwargs) + + +class Shape(Query): + """ + Queries documents that contain fields indexed using the `shape` type. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg ignore_unmapped: When set to `true` the query ignores an unmapped + field and will not match any documents. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "shape" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.ShapeFieldQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + *, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__( + ignore_unmapped=ignore_unmapped, boost=boost, _name=_name, **kwargs + ) + + +class SimpleQueryString(Query): + """ + Returns documents based on a provided query string, using a parser + with a limited but fault-tolerant syntax. + + :arg query: (required) Query string in the simple query string syntax + you wish to parse and use for search. + :arg analyzer: Analyzer used to convert text in the query string into + tokens. + :arg analyze_wildcard: If `true`, the query attempts to analyze + wildcard terms in the query string. + :arg auto_generate_synonyms_phrase_query: If `true`, the parser + creates a match_phrase query for each multi-position token. + Defaults to `True` if omitted. + :arg default_operator: Default boolean logic used to interpret text in + the query string if no operators are specified. Defaults to `'or'` + if omitted. + :arg fields: Array of fields you wish to search. Accepts wildcard + expressions. You also can boost relevance scores for matches to + particular fields using a caret (`^`) notation. Defaults to the + `index.query.default_field index` setting, which has a default + value of `*`. + :arg flags: List of enabled operators for the simple query string + syntax. Defaults to `ALL` if omitted. + :arg fuzzy_max_expansions: Maximum number of terms to which the query + expands for fuzzy matching. Defaults to `50` if omitted. + :arg fuzzy_prefix_length: Number of beginning characters left + unchanged for fuzzy matching. + :arg fuzzy_transpositions: If `true`, edits for fuzzy matching include + transpositions of two adjacent characters (for example, `ab` to + `ba`). + :arg lenient: If `true`, format-based errors, such as providing a text + value for a numeric field, are ignored. + :arg minimum_should_match: Minimum number of clauses that must match + for a document to be returned. + :arg quote_field_suffix: Suffix appended to quoted text in the query + string. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "simple_query_string" + + def __init__( + self, + *, + query: Union[str, "DefaultType"] = DEFAULT, + analyzer: Union[str, "DefaultType"] = DEFAULT, + analyze_wildcard: Union[bool, "DefaultType"] = DEFAULT, + auto_generate_synonyms_phrase_query: Union[bool, "DefaultType"] = DEFAULT, + default_operator: Union[Literal["and", "or"], "DefaultType"] = DEFAULT, + fields: Union[ + Sequence[Union[str, "InstrumentedField"]], "DefaultType" + ] = DEFAULT, + flags: Union[ + "types.PipeSeparatedFlags", Dict[str, Any], "DefaultType" + ] = DEFAULT, + fuzzy_max_expansions: Union[int, "DefaultType"] = DEFAULT, + fuzzy_prefix_length: Union[int, "DefaultType"] = DEFAULT, + fuzzy_transpositions: Union[bool, "DefaultType"] = DEFAULT, + lenient: Union[bool, "DefaultType"] = DEFAULT, + minimum_should_match: Union[int, str, "DefaultType"] = DEFAULT, + quote_field_suffix: Union[str, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + query=query, + analyzer=analyzer, + analyze_wildcard=analyze_wildcard, + auto_generate_synonyms_phrase_query=auto_generate_synonyms_phrase_query, + default_operator=default_operator, + fields=fields, + flags=flags, + fuzzy_max_expansions=fuzzy_max_expansions, + fuzzy_prefix_length=fuzzy_prefix_length, + fuzzy_transpositions=fuzzy_transpositions, + lenient=lenient, + minimum_should_match=minimum_should_match, + quote_field_suffix=quote_field_suffix, + boost=boost, + _name=_name, + **kwargs, + ) + + +class SpanContaining(Query): + """ + Returns matches which enclose another span query. + + :arg big: (required) Can be any span query. Matching spans from `big` + that contain matches from `little` are returned. + :arg little: (required) Can be any span query. Matching spans from + `big` that contain matches from `little` are returned. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_containing" + + def __init__( + self, + *, + big: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + little: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(big=big, little=little, boost=boost, _name=_name, **kwargs) + + +class SpanFieldMasking(Query): + """ + Wrapper to allow span queries to participate in composite single-field + span queries by _lying_ about their search field. + + :arg field: (required) + :arg query: (required) + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_field_masking" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + query: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(field=field, query=query, boost=boost, _name=_name, **kwargs) + + +class SpanFirst(Query): + """ + Matches spans near the beginning of a field. + + :arg end: (required) Controls the maximum end position permitted in a + match. + :arg match: (required) Can be any other span type query. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_first" + + def __init__( + self, + *, + end: Union[int, "DefaultType"] = DEFAULT, + match: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(end=end, match=match, boost=boost, _name=_name, **kwargs) + + +class SpanMulti(Query): + """ + Allows you to wrap a multi term query (one of `wildcard`, `fuzzy`, + `prefix`, `range`, or `regexp` query) as a `span` query, so it can be + nested. + + :arg match: (required) Should be a multi term query (one of + `wildcard`, `fuzzy`, `prefix`, `range`, or `regexp` query). + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_multi" + _param_defs = { + "match": {"type": "query"}, + } + + def __init__( + self, + *, + match: Union[Query, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(match=match, boost=boost, _name=_name, **kwargs) + + +class SpanNear(Query): + """ + Matches spans which are near one another. You can specify `slop`, the + maximum number of intervening unmatched positions, as well as whether + matches are required to be in-order. + + :arg clauses: (required) Array of one or more other span type queries. + :arg in_order: Controls whether matches are required to be in-order. + :arg slop: Controls the maximum number of intervening unmatched + positions permitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_near" + + def __init__( + self, + *, + clauses: Union[ + Sequence["types.SpanQuery"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + in_order: Union[bool, "DefaultType"] = DEFAULT, + slop: Union[int, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + clauses=clauses, + in_order=in_order, + slop=slop, + boost=boost, + _name=_name, + **kwargs, + ) + + +class SpanNot(Query): + """ + Removes matches which overlap with another span query or which are + within x tokens before (controlled by the parameter `pre`) or y tokens + after (controlled by the parameter `post`) another span query. + + :arg exclude: (required) Span query whose matches must not overlap + those returned. + :arg include: (required) Span query whose matches are filtered. + :arg dist: The number of tokens from within the include span that + can’t have overlap with the exclude span. Equivalent to setting + both `pre` and `post`. + :arg post: The number of tokens after the include span that can’t have + overlap with the exclude span. + :arg pre: The number of tokens before the include span that can’t have + overlap with the exclude span. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_not" + + def __init__( + self, + *, + exclude: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + include: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + dist: Union[int, "DefaultType"] = DEFAULT, + post: Union[int, "DefaultType"] = DEFAULT, + pre: Union[int, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + exclude=exclude, + include=include, + dist=dist, + post=post, + pre=pre, + boost=boost, + _name=_name, + **kwargs, + ) + + +class SpanOr(Query): + """ + Matches the union of its span clauses. + + :arg clauses: (required) Array of one or more other span type queries. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_or" + + def __init__( + self, + *, + clauses: Union[ + Sequence["types.SpanQuery"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(clauses=clauses, boost=boost, _name=_name, **kwargs) + + +class SpanTerm(Query): + """ + Matches spans containing a term. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "span_term" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.SpanTermQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class SpanWithin(Query): + """ + Returns matches which are enclosed inside another span query. + + :arg big: (required) Can be any span query. Matching spans from + `little` that are enclosed within `big` are returned. + :arg little: (required) Can be any span query. Matching spans from + `little` that are enclosed within `big` are returned. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_within" + + def __init__( + self, + *, + big: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + little: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(big=big, little=little, boost=boost, _name=_name, **kwargs) + + +class SparseVector(Query): + """ + Using input query vectors or a natural language processing model to + convert a query into a list of token-weight pairs, queries against a + sparse vector field. + + :arg field: (required) The name of the field that contains the token- + weight pairs to be searched against. This field must be a mapped + sparse_vector field. + :arg query_vector: Dictionary of precomputed sparse vectors and their + associated weights. Only one of inference_id or query_vector may + be supplied in a request. + :arg inference_id: The inference ID to use to convert the query text + into token-weight pairs. It must be the same inference ID that was + used to create the tokens from the input text. Only one of + inference_id and query_vector is allowed. If inference_id is + specified, query must also be specified. Only one of inference_id + or query_vector may be supplied in a request. + :arg query: The query text you want to use for search. If inference_id + is specified, query must also be specified. + :arg prune: Whether to perform pruning, omitting the non-significant + tokens from the query to improve query performance. If prune is + true but the pruning_config is not specified, pruning will occur + but default values will be used. Default: false + :arg pruning_config: Optional pruning configuration. If enabled, this + will omit non-significant tokens from the query in order to + improve query performance. This is only used if prune is set to + true. If prune is set to true but pruning_config is not specified, + default values will be used. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "sparse_vector" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + query_vector: Union[Mapping[str, float], "DefaultType"] = DEFAULT, + inference_id: Union[str, "DefaultType"] = DEFAULT, + query: Union[str, "DefaultType"] = DEFAULT, + prune: Union[bool, "DefaultType"] = DEFAULT, + pruning_config: Union[ + "types.TokenPruningConfig", Dict[str, Any], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + query_vector=query_vector, + inference_id=inference_id, + query=query, + prune=prune, + pruning_config=pruning_config, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Term(Query): + """ + Returns documents that contain an exact term in a provided field. To + return a document, the query term must exactly match the queried + field's value, including whitespace and capitalization. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "term" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.TermQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class Terms(Query): + """ + Returns documents that contain one or more exact terms in a provided + field. To return a document, one or more terms must exactly match a + field value, including whitespace and capitalization. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "terms" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + Sequence[Union[int, float, str, bool, None, Any]], + "types.TermsLookup", + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + *, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(boost=boost, _name=_name, **kwargs) + + def _setattr(self, name: str, value: Any) -> None: + # here we convert any iterables that are not strings to lists + if hasattr(value, "__iter__") and not isinstance(value, (str, list, dict)): + value = list(value) + super()._setattr(name, value) + + +class TermsSet(Query): + """ + Returns documents that contain a minimum number of exact terms in a + provided field. To return a document, a required number of terms must + exactly match the field values, including whitespace and + capitalization. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "terms_set" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.TermsSetQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class TextExpansion(Query): + """ + Uses a natural language processing model to convert the query text + into a list of token-weight pairs which are then used in a query + against a sparse vector or rank features field. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "text_expansion" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.TextExpansionQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class WeightedTokens(Query): + """ + Supports returning text_expansion query results by sending in + precomputed tokens with the query. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "weighted_tokens" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.WeightedTokensQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class Wildcard(Query): + """ + Returns documents that contain terms matching a wildcard pattern. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "wildcard" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.WildcardQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class Wrapper(Query): + """ + A query that accepts any other query as base64 encoded string. + + :arg query: (required) A base64 encoded query. The binary data format + can be any of JSON, YAML, CBOR or SMILE encodings + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "wrapper" + + def __init__( + self, + *, + query: Union[str, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(query=query, boost=boost, _name=_name, **kwargs) + + +class Type(Query): + """ + :arg value: (required) + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "type" + + def __init__( + self, + *, + value: Union[str, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(value=value, boost=boost, _name=_name, **kwargs) diff --git a/elasticsearch/dsl/response/__init__.py b/elasticsearch/dsl/response/__init__.py new file mode 100644 index 000000000..eea1b87f9 --- /dev/null +++ b/elasticsearch/dsl/response/__init__.py @@ -0,0 +1,354 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Generic, + Iterator, + List, + Mapping, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +from ..utils import _R, AttrDict, AttrList, _wrap +from .hit import Hit, HitMeta + +if TYPE_CHECKING: + from .. import types + from ..aggs import Agg + from ..faceted_search_base import FacetedSearchBase + from ..search_base import Request, SearchBase + from ..update_by_query_base import UpdateByQueryBase + +__all__ = [ + "Response", + "AggResponse", + "UpdateByQueryResponse", + "Hit", + "HitMeta", + "AggregateResponseType", +] + + +class Response(AttrDict[Any], Generic[_R]): + """An Elasticsearch search response. + + :arg took: (required) + :arg timed_out: (required) + :arg _shards: (required) + :arg hits: search results + :arg aggregations: aggregation results + :arg _clusters: + :arg fields: + :arg max_score: + :arg num_reduce_phases: + :arg profile: + :arg pit_id: + :arg _scroll_id: + :arg suggest: + :arg terminated_early: + """ + + _search: "SearchBase[_R]" + _faceted_search: "FacetedSearchBase[_R]" + _doc_class: Optional[_R] + _hits: List[_R] + + took: int + timed_out: bool + _shards: "types.ShardStatistics" + _clusters: "types.ClusterStatistics" + fields: Mapping[str, Any] + max_score: float + num_reduce_phases: int + profile: "types.Profile" + pit_id: str + _scroll_id: str + suggest: Mapping[ + str, + Sequence[ + Union["types.CompletionSuggest", "types.PhraseSuggest", "types.TermSuggest"] + ], + ] + terminated_early: bool + + def __init__( + self, + search: "Request[_R]", + response: Dict[str, Any], + doc_class: Optional[_R] = None, + ): + super(AttrDict, self).__setattr__("_search", search) + super(AttrDict, self).__setattr__("_doc_class", doc_class) + super().__init__(response) + + def __iter__(self) -> Iterator[_R]: # type: ignore[override] + return iter(self.hits) + + def __getitem__(self, key: Union[slice, int, str]) -> Any: + if isinstance(key, (slice, int)): + # for slicing etc + return self.hits[key] + return super().__getitem__(key) + + def __nonzero__(self) -> bool: + return bool(self.hits) + + __bool__ = __nonzero__ + + def __repr__(self) -> str: + return "" % (self.hits or self.aggregations) + + def __len__(self) -> int: + return len(self.hits) + + def __getstate__(self) -> Tuple[Dict[str, Any], "Request[_R]", Optional[_R]]: # type: ignore[override] + return self._d_, self._search, self._doc_class + + def __setstate__( + self, state: Tuple[Dict[str, Any], "Request[_R]", Optional[_R]] # type: ignore[override] + ) -> None: + super(AttrDict, self).__setattr__("_d_", state[0]) + super(AttrDict, self).__setattr__("_search", state[1]) + super(AttrDict, self).__setattr__("_doc_class", state[2]) + + def success(self) -> bool: + return self._shards.total == self._shards.successful and not self.timed_out + + @property + def hits(self) -> List[_R]: + if not hasattr(self, "_hits"): + h = cast(AttrDict[Any], self._d_["hits"]) + + try: + hits = AttrList(list(map(self._search._get_result, h["hits"]))) + except AttributeError as e: + # avoid raising AttributeError since it will be hidden by the property + raise TypeError("Could not parse hits.", e) + + # avoid assigning _hits into self._d_ + super(AttrDict, self).__setattr__("_hits", hits) + for k in h: + setattr(self._hits, k, _wrap(h[k])) + return self._hits + + @property + def aggregations(self) -> "AggResponse[_R]": + return self.aggs + + @property + def aggs(self) -> "AggResponse[_R]": + if not hasattr(self, "_aggs"): + aggs = AggResponse[_R]( + cast("Agg[_R]", self._search.aggs), + self._search, + cast(Dict[str, Any], self._d_.get("aggregations", {})), + ) + + # avoid assigning _aggs into self._d_ + super(AttrDict, self).__setattr__("_aggs", aggs) + return cast("AggResponse[_R]", self._aggs) + + def search_after(self) -> "SearchBase[_R]": + """ + Return a ``Search`` instance that retrieves the next page of results. + + This method provides an easy way to paginate a long list of results using + the ``search_after`` option. For example:: + + page_size = 20 + s = Search()[:page_size].sort("date") + + while True: + # get a page of results + r = await s.execute() + + # do something with this page of results + + # exit the loop if we reached the end + if len(r.hits) < page_size: + break + + # get a search object with the next page of results + s = r.search_after() + + Note that the ``search_after`` option requires the search to have an + explicit ``sort`` order. + """ + if len(self.hits) == 0: + raise ValueError("Cannot use search_after when there are no search results") + if not hasattr(self.hits[-1].meta, "sort"): # type: ignore + raise ValueError("Cannot use search_after when results are not sorted") + return self._search.extra(search_after=self.hits[-1].meta.sort) # type: ignore + + +AggregateResponseType = Union[ + "types.CardinalityAggregate", + "types.HdrPercentilesAggregate", + "types.HdrPercentileRanksAggregate", + "types.TDigestPercentilesAggregate", + "types.TDigestPercentileRanksAggregate", + "types.PercentilesBucketAggregate", + "types.MedianAbsoluteDeviationAggregate", + "types.MinAggregate", + "types.MaxAggregate", + "types.SumAggregate", + "types.AvgAggregate", + "types.WeightedAvgAggregate", + "types.ValueCountAggregate", + "types.SimpleValueAggregate", + "types.DerivativeAggregate", + "types.BucketMetricValueAggregate", + "types.StatsAggregate", + "types.StatsBucketAggregate", + "types.ExtendedStatsAggregate", + "types.ExtendedStatsBucketAggregate", + "types.GeoBoundsAggregate", + "types.GeoCentroidAggregate", + "types.HistogramAggregate", + "types.DateHistogramAggregate", + "types.AutoDateHistogramAggregate", + "types.VariableWidthHistogramAggregate", + "types.StringTermsAggregate", + "types.LongTermsAggregate", + "types.DoubleTermsAggregate", + "types.UnmappedTermsAggregate", + "types.LongRareTermsAggregate", + "types.StringRareTermsAggregate", + "types.UnmappedRareTermsAggregate", + "types.MultiTermsAggregate", + "types.MissingAggregate", + "types.NestedAggregate", + "types.ReverseNestedAggregate", + "types.GlobalAggregate", + "types.FilterAggregate", + "types.ChildrenAggregate", + "types.ParentAggregate", + "types.SamplerAggregate", + "types.UnmappedSamplerAggregate", + "types.GeoHashGridAggregate", + "types.GeoTileGridAggregate", + "types.GeoHexGridAggregate", + "types.RangeAggregate", + "types.DateRangeAggregate", + "types.GeoDistanceAggregate", + "types.IpRangeAggregate", + "types.IpPrefixAggregate", + "types.FiltersAggregate", + "types.AdjacencyMatrixAggregate", + "types.SignificantLongTermsAggregate", + "types.SignificantStringTermsAggregate", + "types.UnmappedSignificantTermsAggregate", + "types.CompositeAggregate", + "types.FrequentItemSetsAggregate", + "types.TimeSeriesAggregate", + "types.ScriptedMetricAggregate", + "types.TopHitsAggregate", + "types.InferenceAggregate", + "types.StringStatsAggregate", + "types.BoxPlotAggregate", + "types.TopMetricsAggregate", + "types.TTestAggregate", + "types.RateAggregate", + "types.CumulativeCardinalityAggregate", + "types.MatrixStatsAggregate", + "types.GeoLineAggregate", +] + + +class AggResponse(AttrDict[Any], Generic[_R]): + """An Elasticsearch aggregation response.""" + + _meta: Dict[str, Any] + + def __init__(self, aggs: "Agg[_R]", search: "Request[_R]", data: Dict[str, Any]): + super(AttrDict, self).__setattr__("_meta", {"search": search, "aggs": aggs}) + super().__init__(data) + + def __getitem__(self, attr_name: str) -> AggregateResponseType: + if attr_name in self._meta["aggs"]: + # don't do self._meta['aggs'][attr_name] to avoid copying + agg = self._meta["aggs"].aggs[attr_name] + return cast( + AggregateResponseType, + agg.result(self._meta["search"], self._d_[attr_name]), + ) + return super().__getitem__(attr_name) # type: ignore + + def __iter__(self) -> Iterator[AggregateResponseType]: # type: ignore[override] + for name in self._meta["aggs"]: + yield self[name] + + +class UpdateByQueryResponse(AttrDict[Any], Generic[_R]): + """An Elasticsearch update by query response. + + :arg batches: + :arg failures: + :arg noops: + :arg deleted: + :arg requests_per_second: + :arg retries: + :arg task: + :arg timed_out: + :arg took: + :arg total: + :arg updated: + :arg version_conflicts: + :arg throttled: + :arg throttled_millis: + :arg throttled_until: + :arg throttled_until_millis: + """ + + _search: "UpdateByQueryBase[_R]" + + batches: int + failures: Sequence["types.BulkIndexByScrollFailure"] + noops: int + deleted: int + requests_per_second: float + retries: "types.Retries" + task: Union[str, int] + timed_out: bool + took: Any + total: int + updated: int + version_conflicts: int + throttled: Any + throttled_millis: Any + throttled_until: Any + throttled_until_millis: Any + + def __init__( + self, + search: "Request[_R]", + response: Dict[str, Any], + doc_class: Optional[_R] = None, + ): + super(AttrDict, self).__setattr__("_search", search) + super(AttrDict, self).__setattr__("_doc_class", doc_class) + super().__init__(response) + + def success(self) -> bool: + return not self.timed_out and not self.failures diff --git a/elasticsearch/dsl/response/aggs.py b/elasticsearch/dsl/response/aggs.py new file mode 100644 index 000000000..3525e1f92 --- /dev/null +++ b/elasticsearch/dsl/response/aggs.py @@ -0,0 +1,100 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union, cast + +from ..utils import _R, AttrDict, AttrList +from . import AggResponse, Response + +if TYPE_CHECKING: + from ..aggs import Agg + from ..field import Field + from ..search_base import SearchBase + + +class Bucket(AggResponse[_R]): + def __init__( + self, + aggs: "Agg[_R]", + search: "SearchBase[_R]", + data: Dict[str, Any], + field: Optional["Field"] = None, + ): + super().__init__(aggs, search, data) + + +class FieldBucket(Bucket[_R]): + def __init__( + self, + aggs: "Agg[_R]", + search: "SearchBase[_R]", + data: Dict[str, Any], + field: Optional["Field"] = None, + ): + if field: + data["key"] = field.deserialize(data["key"]) + super().__init__(aggs, search, data, field) + + +class BucketData(AggResponse[_R]): + _bucket_class = Bucket + _buckets: Union[AttrDict[Any], AttrList[Any]] + + def _wrap_bucket(self, data: Dict[str, Any]) -> Bucket[_R]: + return self._bucket_class( + self._meta["aggs"], + self._meta["search"], + data, + field=self._meta.get("field"), + ) + + def __iter__(self) -> Iterator["Agg"]: # type: ignore[override] + return iter(self.buckets) # type: ignore + + def __len__(self) -> int: + return len(self.buckets) + + def __getitem__(self, key: Any) -> Any: + if isinstance(key, (int, slice)): + return cast(AttrList[Any], self.buckets)[key] + return super().__getitem__(key) + + @property + def buckets(self) -> Union[AttrDict[Any], AttrList[Any]]: + if not hasattr(self, "_buckets"): + field = getattr(self._meta["aggs"], "field", None) + if field: + self._meta["field"] = self._meta["search"]._resolve_field(field) + bs = cast(Union[Dict[str, Any], List[Any]], self._d_["buckets"]) + if isinstance(bs, list): + ret = AttrList(bs, obj_wrapper=self._wrap_bucket) + else: + ret = AttrDict[Any]({k: self._wrap_bucket(bs[k]) for k in bs}) # type: ignore + super(AttrDict, self).__setattr__("_buckets", ret) + return self._buckets + + +class FieldBucketData(BucketData[_R]): + _bucket_class = FieldBucket + + +class TopHitsData(Response[_R]): + def __init__(self, agg: "Agg[_R]", search: "SearchBase[_R]", data: Any): + super(AttrDict, self).__setattr__( + "meta", AttrDict({"agg": agg, "search": search}) + ) + super().__init__(search, data) diff --git a/elasticsearch/dsl/response/hit.py b/elasticsearch/dsl/response/hit.py new file mode 100644 index 000000000..a09d36e9c --- /dev/null +++ b/elasticsearch/dsl/response/hit.py @@ -0,0 +1,53 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, List, Tuple, cast + +from ..utils import AttrDict, HitMeta + + +class Hit(AttrDict[Any]): + def __init__(self, document: Dict[str, Any]): + data: Dict[str, Any] = {} + if "_source" in document: + data = cast(Dict[str, Any], document["_source"]) + if "fields" in document: + data.update(cast(Dict[str, Any], document["fields"])) + + super().__init__(data) + # assign meta as attribute and not as key in self._d_ + super(AttrDict, self).__setattr__("meta", HitMeta(document)) + + def __getstate__(self) -> Tuple[Dict[str, Any], HitMeta]: # type: ignore[override] + # add self.meta since it is not in self.__dict__ + return super().__getstate__() + (self.meta,) + + def __setstate__(self, state: Tuple[Dict[str, Any], HitMeta]) -> None: # type: ignore[override] + super(AttrDict, self).__setattr__("meta", state[-1]) + super().__setstate__(state[:-1]) + + def __dir__(self) -> List[str]: + # be sure to expose meta in dir(self) + return super().__dir__() + ["meta"] + + def __repr__(self) -> str: + return "".format( + "/".join( + getattr(self.meta, key) for key in ("index", "id") if key in self.meta + ), + super().__repr__(), + ) diff --git a/elasticsearch/dsl/search.py b/elasticsearch/dsl/search.py new file mode 100644 index 000000000..eea200e00 --- /dev/null +++ b/elasticsearch/dsl/search.py @@ -0,0 +1,28 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ._async.search import ( # noqa: F401 + AsyncEmptySearch, + AsyncMultiSearch, + AsyncSearch, +) +from ._sync.search import ( # noqa: F401 + EmptySearch, + MultiSearch, + Search, +) +from .search_base import Q # noqa: F401 diff --git a/elasticsearch/dsl/search_base.py b/elasticsearch/dsl/search_base.py new file mode 100644 index 000000000..ad4a56059 --- /dev/null +++ b/elasticsearch/dsl/search_base.py @@ -0,0 +1,1040 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +import copy +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Generic, + Iterator, + List, + Optional, + Protocol, + Tuple, + Type, + Union, + cast, + overload, +) + +from typing_extensions import Self, TypeVar + +from .aggs import A, Agg, AggBase +from .document_base import InstrumentedField +from .exceptions import IllegalOperation +from .query import Bool, Q, Query +from .response import Hit, Response +from .utils import _R, AnyUsingType, AttrDict, DslBase, recursive_to_dict + +if TYPE_CHECKING: + from .field import Field, Object + + +class SupportsClone(Protocol): + def _clone(self) -> Self: ... + + +_S = TypeVar("_S", bound=SupportsClone) + + +class QueryProxy(Generic[_S]): + """ + Simple proxy around DSL objects (queries) that can be called + (to add query/post_filter) and also allows attribute access which is proxied to + the wrapped query. + """ + + def __init__(self, search: _S, attr_name: str): + self._search = search + self._proxied: Optional[Query] = None + self._attr_name = attr_name + + def __nonzero__(self) -> bool: + return self._proxied is not None + + __bool__ = __nonzero__ + + def __call__(self, *args: Any, **kwargs: Any) -> _S: + s = self._search._clone() + + # we cannot use self._proxied since we just cloned self._search and + # need to access the new self on the clone + proxied = getattr(s, self._attr_name) + if proxied._proxied is None: + proxied._proxied = Q(*args, **kwargs) + else: + proxied._proxied &= Q(*args, **kwargs) + + # always return search to be chainable + return s + + def __getattr__(self, attr_name: str) -> Any: + return getattr(self._proxied, attr_name) + + def __setattr__(self, attr_name: str, value: Any) -> None: + if not attr_name.startswith("_"): + if self._proxied is not None: + self._proxied = Q(self._proxied.to_dict()) + setattr(self._proxied, attr_name, value) + super().__setattr__(attr_name, value) + + def __getstate__(self) -> Tuple[_S, Optional[Query], str]: + return self._search, self._proxied, self._attr_name + + def __setstate__(self, state: Tuple[_S, Optional[Query], str]) -> None: + self._search, self._proxied, self._attr_name = state + + +class ProxyDescriptor(Generic[_S]): + """ + Simple descriptor to enable setting of queries and filters as: + + s = Search() + s.query = Q(...) + + """ + + def __init__(self, name: str): + self._attr_name = f"_{name}_proxy" + + def __get__(self, instance: Any, owner: object) -> QueryProxy[_S]: + return cast(QueryProxy[_S], getattr(instance, self._attr_name)) + + def __set__(self, instance: _S, value: Dict[str, Any]) -> None: + proxy: QueryProxy[_S] = getattr(instance, self._attr_name) + proxy._proxied = Q(value) + + +class AggsProxy(AggBase[_R], DslBase): + name = "aggs" + + def __init__(self, search: "SearchBase[_R]"): + self._base = cast("Agg[_R]", self) + self._search = search + self._params = {"aggs": {}} + + def to_dict(self) -> Dict[str, Any]: + return cast(Dict[str, Any], super().to_dict().get("aggs", {})) + + +class Request(Generic[_R]): + def __init__( + self, + using: AnyUsingType = "default", + index: Optional[Union[str, List[str]]] = None, + doc_type: Optional[ + Union[type, str, List[Union[type, str]], Dict[str, Union[type, str]]] + ] = None, + extra: Optional[Dict[str, Any]] = None, + ): + self._using = using + + self._index = None + if isinstance(index, (tuple, list)): + self._index = list(index) + elif index: + self._index = [index] + + self._doc_type: List[Union[type, str]] = [] + self._doc_type_map: Dict[str, Any] = {} + if isinstance(doc_type, (tuple, list)): + self._doc_type.extend(doc_type) + elif isinstance(doc_type, collections.abc.Mapping): + self._doc_type.extend(doc_type.keys()) + self._doc_type_map.update(doc_type) + elif doc_type: + self._doc_type.append(doc_type) + + self._params: Dict[str, Any] = {} + self._extra: Dict[str, Any] = extra or {} + + def __eq__(self, other: Any) -> bool: + return ( + isinstance(other, Request) + and other._params == self._params + and other._index == self._index + and other._doc_type == self._doc_type + and other.to_dict() == self.to_dict() + ) + + def __copy__(self) -> Self: + return self._clone() + + def params(self, **kwargs: Any) -> Self: + """ + Specify query params to be used when executing the search. All the + keyword arguments will override the current values. See + https://elasticsearch-py.readthedocs.io/en/latest/api/elasticsearch.html#elasticsearch.Elasticsearch.search + for all available parameters. + + Example:: + + s = Search() + s = s.params(routing='user-1', preference='local') + """ + s = self._clone() + s._params.update(kwargs) + return s + + def index(self, *index: Union[str, List[str], Tuple[str, ...]]) -> Self: + """ + Set the index for the search. If called empty it will remove all information. + + Example:: + + s = Search() + s = s.index('twitter-2015.01.01', 'twitter-2015.01.02') + s = s.index(['twitter-2015.01.01', 'twitter-2015.01.02']) + """ + # .index() resets + s = self._clone() + if not index: + s._index = None + else: + indexes = [] + for i in index: + if isinstance(i, str): + indexes.append(i) + elif isinstance(i, list): + indexes += i + elif isinstance(i, tuple): + indexes += list(i) + + s._index = (self._index or []) + indexes + + return s + + def _resolve_field(self, path: str) -> Optional["Field"]: + for dt in self._doc_type: + if not hasattr(dt, "_index"): + continue + field = dt._index.resolve_field(path) + if field is not None: + return cast("Field", field) + return None + + def _resolve_nested( + self, hit: AttrDict[Any], parent_class: Optional[type] = None + ) -> Type[_R]: + doc_class = Hit + + nested_path = [] + nesting = hit["_nested"] + while nesting and "field" in nesting: + nested_path.append(nesting["field"]) + nesting = nesting.get("_nested") + nested_path_str = ".".join(nested_path) + + nested_field: Optional["Object"] + if parent_class is not None and hasattr(parent_class, "_index"): + nested_field = cast( + Optional["Object"], parent_class._index.resolve_field(nested_path_str) + ) + else: + nested_field = cast( + Optional["Object"], self._resolve_field(nested_path_str) + ) + + if nested_field is not None: + return cast(Type[_R], nested_field._doc_class) + + return cast(Type[_R], doc_class) + + def _get_result( + self, hit: AttrDict[Any], parent_class: Optional[type] = None + ) -> _R: + doc_class: Any = Hit + dt = hit.get("_type") + + if "_nested" in hit: + doc_class = self._resolve_nested(hit, parent_class) + + elif dt in self._doc_type_map: + doc_class = self._doc_type_map[dt] + + else: + for doc_type in self._doc_type: + if hasattr(doc_type, "_matches") and doc_type._matches(hit): + doc_class = doc_type + break + + for t in hit.get("inner_hits", ()): + hit["inner_hits"][t] = Response[_R]( + self, hit["inner_hits"][t], doc_class=doc_class + ) + + callback = getattr(doc_class, "from_es", doc_class) + return cast(_R, callback(hit)) + + def doc_type( + self, *doc_type: Union[type, str], **kwargs: Callable[[AttrDict[Any]], Any] + ) -> Self: + """ + Set the type to search through. You can supply a single value or + multiple. Values can be strings or subclasses of ``Document``. + + You can also pass in any keyword arguments, mapping a doc_type to a + callback that should be used instead of the Hit class. + + If no doc_type is supplied any information stored on the instance will + be erased. + + Example: + + s = Search().doc_type('product', 'store', User, custom=my_callback) + """ + # .doc_type() resets + s = self._clone() + if not doc_type and not kwargs: + s._doc_type = [] + s._doc_type_map = {} + else: + s._doc_type.extend(doc_type) + s._doc_type.extend(kwargs.keys()) + s._doc_type_map.update(kwargs) + return s + + def using(self, client: AnyUsingType) -> Self: + """ + Associate the search request with an elasticsearch client. A fresh copy + will be returned with current instance remaining unchanged. + + :arg client: an instance of ``elasticsearch.Elasticsearch`` to use or + an alias to look up in ``elasticsearch.dsl.connections`` + + """ + s = self._clone() + s._using = client + return s + + def extra(self, **kwargs: Any) -> Self: + """ + Add extra keys to the request body. Mostly here for backwards + compatibility. + """ + s = self._clone() + if "from_" in kwargs: + kwargs["from"] = kwargs.pop("from_") + s._extra.update(kwargs) + return s + + def _clone(self) -> Self: + s = self.__class__( + using=self._using, index=self._index, doc_type=self._doc_type + ) + s._doc_type_map = self._doc_type_map.copy() + s._extra = self._extra.copy() + s._params = self._params.copy() + return s + + if TYPE_CHECKING: + + def to_dict(self) -> Dict[str, Any]: ... + + +class SearchBase(Request[_R]): + query = ProxyDescriptor[Self]("query") + post_filter = ProxyDescriptor[Self]("post_filter") + _response: Response[_R] + + def __init__(self, **kwargs: Any): + """ + Search request to elasticsearch. + + :arg using: `Elasticsearch` instance to use + :arg index: limit the search to index + :arg doc_type: only query this type. + + All the parameters supplied (or omitted) at creation type can be later + overridden by methods (`using`, `index` and `doc_type` respectively). + """ + super().__init__(**kwargs) + + self.aggs = AggsProxy[_R](self) + self._sort: List[Union[str, Dict[str, Dict[str, str]]]] = [] + self._knn: List[Dict[str, Any]] = [] + self._rank: Dict[str, Any] = {} + self._collapse: Dict[str, Any] = {} + self._source: Optional[Union[bool, List[str], Dict[str, List[str]]]] = None + self._highlight: Dict[str, Any] = {} + self._highlight_opts: Dict[str, Any] = {} + self._suggest: Dict[str, Any] = {} + self._script_fields: Dict[str, Any] = {} + self._response_class = Response[_R] + + self._query_proxy = QueryProxy(self, "query") + self._post_filter_proxy = QueryProxy(self, "post_filter") + + def filter(self, *args: Any, **kwargs: Any) -> Self: + return self.query(Bool(filter=[Q(*args, **kwargs)])) + + def exclude(self, *args: Any, **kwargs: Any) -> Self: + return self.query(Bool(filter=[~Q(*args, **kwargs)])) + + def __getitem__(self, n: Union[int, slice]) -> Self: + """ + Support slicing the `Search` instance for pagination. + + Slicing equates to the from/size parameters. E.g.:: + + s = Search().query(...)[0:25] + + is equivalent to:: + + s = Search().query(...).extra(from_=0, size=25) + + """ + s = self._clone() + + if isinstance(n, slice): + # If negative slicing, abort. + if n.start and n.start < 0 or n.stop and n.stop < 0: + raise ValueError("Search does not support negative slicing.") + slice_start = n.start + slice_stop = n.stop + else: # This is an index lookup, equivalent to slicing by [n:n+1]. + # If negative index, abort. + if n < 0: + raise ValueError("Search does not support negative indexing.") + slice_start = n + slice_stop = n + 1 + + old_from = s._extra.get("from") + old_to = None + if "size" in s._extra: + old_to = (old_from or 0) + s._extra["size"] + + new_from = old_from + if slice_start is not None: + new_from = (old_from or 0) + slice_start + new_to = old_to + if slice_stop is not None: + new_to = (old_from or 0) + slice_stop + if old_to is not None and old_to < new_to: + new_to = old_to + + if new_from is not None: + s._extra["from"] = new_from + if new_to is not None: + s._extra["size"] = max(0, new_to - (new_from or 0)) + return s + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> Self: + """ + Construct a new `Search` instance from a raw dict containing the search + body. Useful when migrating from raw dictionaries. + + Example:: + + s = Search.from_dict({ + "query": { + "bool": { + "must": [...] + } + }, + "aggs": {...} + }) + s = s.filter('term', published=True) + """ + s = cls() + s.update_from_dict(d) + return s + + def _clone(self) -> Self: + """ + Return a clone of the current search request. Performs a shallow copy + of all the underlying objects. Used internally by most state modifying + APIs. + """ + s = super()._clone() + + s._response_class = self._response_class + s._knn = [knn.copy() for knn in self._knn] + s._rank = self._rank.copy() + s._collapse = self._collapse.copy() + s._sort = self._sort[:] + s._source = copy.copy(self._source) if self._source is not None else None + s._highlight = self._highlight.copy() + s._highlight_opts = self._highlight_opts.copy() + s._suggest = self._suggest.copy() + s._script_fields = self._script_fields.copy() + for x in ("query", "post_filter"): + getattr(s, x)._proxied = getattr(self, x)._proxied + + # copy top-level bucket definitions + if self.aggs._params.get("aggs"): + s.aggs._params = {"aggs": self.aggs._params["aggs"].copy()} + return s + + def response_class(self, cls: Type[Response[_R]]) -> Self: + """ + Override the default wrapper used for the response. + """ + s = self._clone() + s._response_class = cls + return s + + def update_from_dict(self, d: Dict[str, Any]) -> Self: + """ + Apply options from a serialized body to the current instance. Modifies + the object in-place. Used mostly by ``from_dict``. + """ + d = d.copy() + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "post_filter" in d: + self.post_filter._proxied = Q(d.pop("post_filter")) + + aggs = d.pop("aggs", d.pop("aggregations", {})) + if aggs: + self.aggs._params = { + "aggs": {name: A(value) for (name, value) in aggs.items()} + } + if "knn" in d: + self._knn = d.pop("knn") + if isinstance(self._knn, dict): + self._knn = [self._knn] + if "rank" in d: + self._rank = d.pop("rank") + if "collapse" in d: + self._collapse = d.pop("collapse") + if "sort" in d: + self._sort = d.pop("sort") + if "_source" in d: + self._source = d.pop("_source") + if "highlight" in d: + high = d.pop("highlight").copy() + self._highlight = high.pop("fields") + self._highlight_opts = high + if "suggest" in d: + self._suggest = d.pop("suggest") + if "text" in self._suggest: + text = self._suggest.pop("text") + for s in self._suggest.values(): + s.setdefault("text", text) + if "script_fields" in d: + self._script_fields = d.pop("script_fields") + self._extra.update(d) + return self + + def script_fields(self, **kwargs: Any) -> Self: + """ + Define script fields to be calculated on hits. See + https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-script-fields.html + for more details. + + Example:: + + s = Search() + s = s.script_fields(times_two="doc['field'].value * 2") + s = s.script_fields( + times_three={ + 'script': { + 'lang': 'painless', + 'source': "doc['field'].value * params.n", + 'params': {'n': 3} + } + } + ) + + """ + s = self._clone() + for name in kwargs: + if isinstance(kwargs[name], str): + kwargs[name] = {"script": kwargs[name]} + s._script_fields.update(kwargs) + return s + + def knn( + self, + field: Union[str, "InstrumentedField"], + k: int, + num_candidates: int, + query_vector: Optional[List[float]] = None, + query_vector_builder: Optional[Dict[str, Any]] = None, + boost: Optional[float] = None, + filter: Optional[Query] = None, + similarity: Optional[float] = None, + inner_hits: Optional[Dict[str, Any]] = None, + ) -> Self: + """ + Add a k-nearest neighbor (kNN) search. + + :arg field: the vector field to search against as a string or document class attribute + :arg k: number of nearest neighbors to return as top hits + :arg num_candidates: number of nearest neighbor candidates to consider per shard + :arg query_vector: the vector to search for + :arg query_vector_builder: A dictionary indicating how to build a query vector + :arg boost: A floating-point boost factor for kNN scores + :arg filter: query to filter the documents that can match + :arg similarity: the minimum similarity required for a document to be considered a match, as a float value + :arg inner_hits: retrieve hits from nested field + + Example:: + + s = Search() + s = s.knn(field='embedding', k=5, num_candidates=10, query_vector=vector, + filter=Q('term', category='blog'))) + """ + s = self._clone() + s._knn.append( + { + "field": str(field), # str() is for InstrumentedField instances + "k": k, + "num_candidates": num_candidates, + } + ) + if query_vector is None and query_vector_builder is None: + raise ValueError("one of query_vector and query_vector_builder is required") + if query_vector is not None and query_vector_builder is not None: + raise ValueError( + "only one of query_vector and query_vector_builder must be given" + ) + if query_vector is not None: + s._knn[-1]["query_vector"] = cast(Any, query_vector) + if query_vector_builder is not None: + s._knn[-1]["query_vector_builder"] = query_vector_builder + if boost is not None: + s._knn[-1]["boost"] = boost + if filter is not None: + if isinstance(filter, Query): + s._knn[-1]["filter"] = filter.to_dict() + else: + s._knn[-1]["filter"] = filter + if similarity is not None: + s._knn[-1]["similarity"] = similarity + if inner_hits is not None: + s._knn[-1]["inner_hits"] = inner_hits + return s + + def rank(self, rrf: Optional[Union[bool, Dict[str, Any]]] = None) -> Self: + """ + Defines a method for combining and ranking results sets from a combination + of searches. Requires a minimum of 2 results sets. + + :arg rrf: Set to ``True`` or an options dictionary to set the rank method to reciprocal rank fusion (RRF). + + Example:: + + s = Search() + s = s.query('match', content='search text') + s = s.knn(field='embedding', k=5, num_candidates=10, query_vector=vector) + s = s.rank(rrf=True) + + Note: This option is in technical preview and may change in the future. The syntax will likely change before GA. + """ + s = self._clone() + s._rank = {} + if rrf is not None and rrf is not False: + s._rank["rrf"] = {} if rrf is True else rrf + return s + + def source( + self, + fields: Optional[ + Union[ + bool, + str, + "InstrumentedField", + List[Union[str, "InstrumentedField"]], + Dict[str, List[Union[str, "InstrumentedField"]]], + ] + ] = None, + **kwargs: Any, + ) -> Self: + """ + Selectively control how the _source field is returned. + + :arg fields: field name, wildcard string, list of field names or wildcards, + or dictionary of includes and excludes + :arg kwargs: ``includes`` or ``excludes`` arguments, when ``fields`` is ``None``. + + When no arguments are given, the entire document will be returned for + each hit. If ``fields`` is a string or list of strings, the field names or field + wildcards given will be included. If ``fields`` is a dictionary with keys of + 'includes' and/or 'excludes' the fields will be either included or excluded + appropriately. + + Calling this multiple times with the same named parameter will override the + previous values with the new ones. + + Example:: + + s = Search() + s = s.source(includes=['obj1.*'], excludes=["*.description"]) + + s = Search() + s = s.source(includes=['obj1.*']).source(excludes=["*.description"]) + + """ + s = self._clone() + + if fields and kwargs: + raise ValueError("You cannot specify fields and kwargs at the same time.") + + @overload + def ensure_strings(fields: str) -> str: ... + + @overload + def ensure_strings(fields: "InstrumentedField") -> str: ... + + @overload + def ensure_strings( + fields: List[Union[str, "InstrumentedField"]] + ) -> List[str]: ... + + @overload + def ensure_strings( + fields: Dict[str, List[Union[str, "InstrumentedField"]]] + ) -> Dict[str, List[str]]: ... + + def ensure_strings( + fields: Union[ + str, + "InstrumentedField", + List[Union[str, "InstrumentedField"]], + Dict[str, List[Union[str, "InstrumentedField"]]], + ] + ) -> Union[str, List[str], Dict[str, List[str]]]: + if isinstance(fields, dict): + return {k: ensure_strings(v) for k, v in fields.items()} + elif not isinstance(fields, (str, InstrumentedField)): + # we assume that if `fields` is not a any of [dict, str, + # InstrumentedField] then it is an iterable of strings or + # InstrumentedFields, so we convert them to a plain list of + # strings + return [str(f) for f in fields] + else: + return str(fields) + + if fields is not None: + s._source = fields if isinstance(fields, bool) else ensure_strings(fields) # type: ignore[assignment] + return s + + if kwargs and not isinstance(s._source, dict): + s._source = {} + + if isinstance(s._source, dict): + for key, value in kwargs.items(): + if value is None: + try: + del s._source[key] + except KeyError: + pass + else: + s._source[key] = ensure_strings(value) + + return s + + def sort( + self, *keys: Union[str, "InstrumentedField", Dict[str, Dict[str, str]]] + ) -> Self: + """ + Add sorting information to the search request. If called without + arguments it will remove all sort requirements. Otherwise it will + replace them. Acceptable arguments are:: + + 'some.field' + '-some.other.field' + {'different.field': {'any': 'dict'}} + + so for example:: + + s = Search().sort( + 'category', + '-title', + {"price" : {"order" : "asc", "mode" : "avg"}} + ) + + will sort by ``category``, ``title`` (in descending order) and + ``price`` in ascending order using the ``avg`` mode. + + The API returns a copy of the Search object and can thus be chained. + """ + s = self._clone() + s._sort = [] + for k in keys: + if not isinstance(k, dict): + sort_field = str(k) + if sort_field.startswith("-"): + if sort_field[1:] == "_score": + raise IllegalOperation("Sorting by `-_score` is not allowed.") + s._sort.append({sort_field[1:]: {"order": "desc"}}) + else: + s._sort.append(sort_field) + else: + s._sort.append(k) + return s + + def collapse( + self, + field: Optional[Union[str, "InstrumentedField"]] = None, + inner_hits: Optional[Dict[str, Any]] = None, + max_concurrent_group_searches: Optional[int] = None, + ) -> Self: + """ + Add collapsing information to the search request. + If called without providing ``field``, it will remove all collapse + requirements, otherwise it will replace them with the provided + arguments. + The API returns a copy of the Search object and can thus be chained. + """ + s = self._clone() + s._collapse = {} + + if field is None: + return s + + s._collapse["field"] = str(field) + if inner_hits: + s._collapse["inner_hits"] = inner_hits + if max_concurrent_group_searches: + s._collapse["max_concurrent_group_searches"] = max_concurrent_group_searches + return s + + def highlight_options(self, **kwargs: Any) -> Self: + """ + Update the global highlighting options used for this request. For + example:: + + s = Search() + s = s.highlight_options(order='score') + """ + s = self._clone() + s._highlight_opts.update(kwargs) + return s + + def highlight( + self, *fields: Union[str, "InstrumentedField"], **kwargs: Any + ) -> Self: + """ + Request highlighting of some fields. All keyword arguments passed in will be + used as parameters for all the fields in the ``fields`` parameter. Example:: + + Search().highlight('title', 'body', fragment_size=50) + + will produce the equivalent of:: + + { + "highlight": { + "fields": { + "body": {"fragment_size": 50}, + "title": {"fragment_size": 50} + } + } + } + + If you want to have different options for different fields + you can call ``highlight`` twice:: + + Search().highlight('title', fragment_size=50).highlight('body', fragment_size=100) + + which will produce:: + + { + "highlight": { + "fields": { + "body": {"fragment_size": 100}, + "title": {"fragment_size": 50} + } + } + } + + """ + s = self._clone() + for f in fields: + s._highlight[str(f)] = kwargs + return s + + def suggest( + self, + name: str, + text: Optional[str] = None, + regex: Optional[str] = None, + **kwargs: Any, + ) -> Self: + """ + Add a suggestions request to the search. + + :arg name: name of the suggestion + :arg text: text to suggest on + + All keyword arguments will be added to the suggestions body. For example:: + + s = Search() + s = s.suggest('suggestion-1', 'Elasticsearch', term={'field': 'body'}) + + # regex query for Completion Suggester + s = Search() + s = s.suggest('suggestion-1', regex='py[thon|py]', completion={'field': 'body'}) + """ + if text is None and regex is None: + raise ValueError('You have to pass "text" or "regex" argument.') + if text and regex: + raise ValueError('You can only pass either "text" or "regex" argument.') + if regex and "completion" not in kwargs: + raise ValueError( + '"regex" argument must be passed with "completion" keyword argument.' + ) + + s = self._clone() + if regex: + s._suggest[name] = {"regex": regex} + elif text: + if "completion" in kwargs: + s._suggest[name] = {"prefix": text} + else: + s._suggest[name] = {"text": text} + s._suggest[name].update(kwargs) + return s + + def search_after(self) -> Self: + """ + Return a ``Search`` instance that retrieves the next page of results. + + This method provides an easy way to paginate a long list of results using + the ``search_after`` option. For example:: + + page_size = 20 + s = Search()[:page_size].sort("date") + + while True: + # get a page of results + r = await s.execute() + + # do something with this page of results + + # exit the loop if we reached the end + if len(r.hits) < page_size: + break + + # get a search object with the next page of results + s = s.search_after() + + Note that the ``search_after`` option requires the search to have an + explicit ``sort`` order. + """ + if not hasattr(self, "_response"): + raise ValueError("A search must be executed before using search_after") + return cast(Self, self._response.search_after()) + + def to_dict(self, count: bool = False, **kwargs: Any) -> Dict[str, Any]: + """ + Serialize the search into the dictionary that will be sent over as the + request's body. + + :arg count: a flag to specify if we are interested in a body for count - + no aggregations, no pagination bounds etc. + + All additional keyword arguments will be included into the dictionary. + """ + d = {} + + if self.query: + d["query"] = recursive_to_dict(self.query) + + if self._knn: + if len(self._knn) == 1: + d["knn"] = self._knn[0] + else: + d["knn"] = self._knn + + if self._rank: + d["rank"] = self._rank + + # count request doesn't care for sorting and other things + if not count: + if self.post_filter: + d["post_filter"] = recursive_to_dict(self.post_filter.to_dict()) + + if self.aggs.aggs: + d.update(recursive_to_dict(self.aggs.to_dict())) + + if self._sort: + d["sort"] = self._sort + + if self._collapse: + d["collapse"] = self._collapse + + d.update(recursive_to_dict(self._extra)) + + if self._source not in (None, {}): + d["_source"] = self._source + + if self._highlight: + d["highlight"] = {"fields": self._highlight} + d["highlight"].update(self._highlight_opts) + + if self._suggest: + d["suggest"] = self._suggest + + if self._script_fields: + d["script_fields"] = self._script_fields + + d.update(recursive_to_dict(kwargs)) + return d + + +class MultiSearchBase(Request[_R]): + """ + Combine multiple :class:`~elasticsearch.dsl.Search` objects into a single + request. + """ + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + self._searches: List[SearchBase[_R]] = [] + + def __getitem__(self, key: Union[int, slice]) -> Any: + return self._searches[key] + + def __iter__(self) -> Iterator[SearchBase[_R]]: + return iter(self._searches) + + def _clone(self) -> Self: + ms = super()._clone() + ms._searches = self._searches[:] + return ms + + def add(self, search: SearchBase[_R]) -> Self: + """ + Adds a new :class:`~elasticsearch.dsl.Search` object to the request:: + + ms = MultiSearch(index='my-index') + ms = ms.add(Search(doc_type=Category).filter('term', category='python')) + ms = ms.add(Search(doc_type=Blog)) + """ + ms = self._clone() + ms._searches.append(search) + return ms + + def to_dict(self) -> List[Dict[str, Any]]: # type: ignore[override] + out: List[Dict[str, Any]] = [] + for s in self._searches: + meta: Dict[str, Any] = {} + if s._index: + meta["index"] = cast(Any, s._index) + meta.update(s._params) + + out.append(meta) + out.append(s.to_dict()) + + return out diff --git a/elasticsearch/dsl/serializer.py b/elasticsearch/dsl/serializer.py new file mode 100644 index 000000000..3080f1dad --- /dev/null +++ b/elasticsearch/dsl/serializer.py @@ -0,0 +1,34 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any + +from elasticsearch.serializer import JSONSerializer + +from .utils import AttrList + + +class AttrJSONSerializer(JSONSerializer): + def default(self, data: Any) -> Any: + if isinstance(data, AttrList): + return data._l_ + if hasattr(data, "to_dict"): + return data.to_dict() + return super().default(data) + + +serializer = AttrJSONSerializer() diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py new file mode 100644 index 000000000..756f6ef2d --- /dev/null +++ b/elasticsearch/dsl/types.py @@ -0,0 +1,6272 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, Literal, Mapping, Sequence, Union + +from elastic_transport.client_utils import DEFAULT, DefaultType + +from . import Query +from .document_base import InstrumentedField +from .utils import AttrDict + +PipeSeparatedFlags = str + + +class AggregationRange(AttrDict[Any]): + """ + :arg from: Start of the range (inclusive). + :arg key: Custom key to return the range with. + :arg to: End of the range (exclusive). + """ + + from_: Union[float, None, DefaultType] + key: Union[str, DefaultType] + to: Union[float, None, DefaultType] + + def __init__( + self, + *, + from_: Union[float, None, DefaultType] = DEFAULT, + key: Union[str, DefaultType] = DEFAULT, + to: Union[float, None, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if from_ is not DEFAULT: + kwargs["from_"] = from_ + if key is not DEFAULT: + kwargs["key"] = key + if to is not DEFAULT: + kwargs["to"] = to + super().__init__(kwargs) + + +class BucketCorrelationFunction(AttrDict[Any]): + """ + :arg count_correlation: (required) The configuration to calculate a + count correlation. This function is designed for determining the + correlation of a term value and a given metric. + """ + + count_correlation: Union[ + "BucketCorrelationFunctionCountCorrelation", Dict[str, Any], DefaultType + ] + + def __init__( + self, + *, + count_correlation: Union[ + "BucketCorrelationFunctionCountCorrelation", Dict[str, Any], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if count_correlation is not DEFAULT: + kwargs["count_correlation"] = count_correlation + super().__init__(kwargs) + + +class BucketCorrelationFunctionCountCorrelation(AttrDict[Any]): + """ + :arg indicator: (required) The indicator with which to correlate the + configured `bucket_path` values. + """ + + indicator: Union[ + "BucketCorrelationFunctionCountCorrelationIndicator", + Dict[str, Any], + DefaultType, + ] + + def __init__( + self, + *, + indicator: Union[ + "BucketCorrelationFunctionCountCorrelationIndicator", + Dict[str, Any], + DefaultType, + ] = DEFAULT, + **kwargs: Any, + ): + if indicator is not DEFAULT: + kwargs["indicator"] = indicator + super().__init__(kwargs) + + +class BucketCorrelationFunctionCountCorrelationIndicator(AttrDict[Any]): + """ + :arg doc_count: (required) The total number of documents that + initially created the expectations. It’s required to be greater + than or equal to the sum of all values in the buckets_path as this + is the originating superset of data to which the term values are + correlated. + :arg expectations: (required) An array of numbers with which to + correlate the configured `bucket_path` values. The length of this + value must always equal the number of buckets returned by the + `bucket_path`. + :arg fractions: An array of fractions to use when averaging and + calculating variance. This should be used if the pre-calculated + data and the buckets_path have known gaps. The length of + fractions, if provided, must equal expectations. + """ + + doc_count: Union[int, DefaultType] + expectations: Union[Sequence[float], DefaultType] + fractions: Union[Sequence[float], DefaultType] + + def __init__( + self, + *, + doc_count: Union[int, DefaultType] = DEFAULT, + expectations: Union[Sequence[float], DefaultType] = DEFAULT, + fractions: Union[Sequence[float], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if doc_count is not DEFAULT: + kwargs["doc_count"] = doc_count + if expectations is not DEFAULT: + kwargs["expectations"] = expectations + if fractions is not DEFAULT: + kwargs["fractions"] = fractions + super().__init__(kwargs) + + +class ChiSquareHeuristic(AttrDict[Any]): + """ + :arg background_is_superset: (required) Set to `false` if you defined + a custom background filter that represents a different set of + documents that you want to compare to. + :arg include_negatives: (required) Set to `false` to filter out the + terms that appear less often in the subset than in documents + outside the subset. + """ + + background_is_superset: Union[bool, DefaultType] + include_negatives: Union[bool, DefaultType] + + def __init__( + self, + *, + background_is_superset: Union[bool, DefaultType] = DEFAULT, + include_negatives: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if background_is_superset is not DEFAULT: + kwargs["background_is_superset"] = background_is_superset + if include_negatives is not DEFAULT: + kwargs["include_negatives"] = include_negatives + super().__init__(kwargs) + + +class ClassificationInferenceOptions(AttrDict[Any]): + """ + :arg num_top_classes: Specifies the number of top class predictions to + return. Defaults to 0. + :arg num_top_feature_importance_values: Specifies the maximum number + of feature importance values per document. + :arg prediction_field_type: Specifies the type of the predicted field + to write. Acceptable values are: string, number, boolean. When + boolean is provided 1.0 is transformed to true and 0.0 to false. + :arg results_field: The field that is added to incoming documents to + contain the inference prediction. Defaults to predicted_value. + :arg top_classes_results_field: Specifies the field to which the top + classes are written. Defaults to top_classes. + """ + + num_top_classes: Union[int, DefaultType] + num_top_feature_importance_values: Union[int, DefaultType] + prediction_field_type: Union[str, DefaultType] + results_field: Union[str, DefaultType] + top_classes_results_field: Union[str, DefaultType] + + def __init__( + self, + *, + num_top_classes: Union[int, DefaultType] = DEFAULT, + num_top_feature_importance_values: Union[int, DefaultType] = DEFAULT, + prediction_field_type: Union[str, DefaultType] = DEFAULT, + results_field: Union[str, DefaultType] = DEFAULT, + top_classes_results_field: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if num_top_classes is not DEFAULT: + kwargs["num_top_classes"] = num_top_classes + if num_top_feature_importance_values is not DEFAULT: + kwargs["num_top_feature_importance_values"] = ( + num_top_feature_importance_values + ) + if prediction_field_type is not DEFAULT: + kwargs["prediction_field_type"] = prediction_field_type + if results_field is not DEFAULT: + kwargs["results_field"] = results_field + if top_classes_results_field is not DEFAULT: + kwargs["top_classes_results_field"] = top_classes_results_field + super().__init__(kwargs) + + +class CommonTermsQuery(AttrDict[Any]): + """ + :arg query: (required) + :arg analyzer: + :arg cutoff_frequency: + :arg high_freq_operator: + :arg low_freq_operator: + :arg minimum_should_match: + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + query: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + cutoff_frequency: Union[float, DefaultType] + high_freq_operator: Union[Literal["and", "or"], DefaultType] + low_freq_operator: Union[Literal["and", "or"], DefaultType] + minimum_should_match: Union[int, str, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + query: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + cutoff_frequency: Union[float, DefaultType] = DEFAULT, + high_freq_operator: Union[Literal["and", "or"], DefaultType] = DEFAULT, + low_freq_operator: Union[Literal["and", "or"], DefaultType] = DEFAULT, + minimum_should_match: Union[int, str, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if query is not DEFAULT: + kwargs["query"] = query + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if cutoff_frequency is not DEFAULT: + kwargs["cutoff_frequency"] = cutoff_frequency + if high_freq_operator is not DEFAULT: + kwargs["high_freq_operator"] = high_freq_operator + if low_freq_operator is not DEFAULT: + kwargs["low_freq_operator"] = low_freq_operator + if minimum_should_match is not DEFAULT: + kwargs["minimum_should_match"] = minimum_should_match + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class CoordsGeoBounds(AttrDict[Any]): + """ + :arg top: (required) + :arg bottom: (required) + :arg left: (required) + :arg right: (required) + """ + + top: Union[float, DefaultType] + bottom: Union[float, DefaultType] + left: Union[float, DefaultType] + right: Union[float, DefaultType] + + def __init__( + self, + *, + top: Union[float, DefaultType] = DEFAULT, + bottom: Union[float, DefaultType] = DEFAULT, + left: Union[float, DefaultType] = DEFAULT, + right: Union[float, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if top is not DEFAULT: + kwargs["top"] = top + if bottom is not DEFAULT: + kwargs["bottom"] = bottom + if left is not DEFAULT: + kwargs["left"] = left + if right is not DEFAULT: + kwargs["right"] = right + super().__init__(kwargs) + + +class CustomCategorizeTextAnalyzer(AttrDict[Any]): + """ + :arg char_filter: + :arg tokenizer: + :arg filter: + """ + + char_filter: Union[Sequence[str], DefaultType] + tokenizer: Union[str, DefaultType] + filter: Union[Sequence[str], DefaultType] + + def __init__( + self, + *, + char_filter: Union[Sequence[str], DefaultType] = DEFAULT, + tokenizer: Union[str, DefaultType] = DEFAULT, + filter: Union[Sequence[str], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if char_filter is not DEFAULT: + kwargs["char_filter"] = char_filter + if tokenizer is not DEFAULT: + kwargs["tokenizer"] = tokenizer + if filter is not DEFAULT: + kwargs["filter"] = filter + super().__init__(kwargs) + + +class DateRangeExpression(AttrDict[Any]): + """ + :arg from: Start of the range (inclusive). + :arg key: Custom key to return the range with. + :arg to: End of the range (exclusive). + """ + + from_: Union[str, float, DefaultType] + key: Union[str, DefaultType] + to: Union[str, float, DefaultType] + + def __init__( + self, + *, + from_: Union[str, float, DefaultType] = DEFAULT, + key: Union[str, DefaultType] = DEFAULT, + to: Union[str, float, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if from_ is not DEFAULT: + kwargs["from_"] = from_ + if key is not DEFAULT: + kwargs["key"] = key + if to is not DEFAULT: + kwargs["to"] = to + super().__init__(kwargs) + + +class EmptyObject(AttrDict[Any]): + """ + For empty Class assignments + """ + + def __init__(self, **kwargs: Any): + super().__init__(kwargs) + + +class EwmaModelSettings(AttrDict[Any]): + """ + :arg alpha: + """ + + alpha: Union[float, DefaultType] + + def __init__(self, *, alpha: Union[float, DefaultType] = DEFAULT, **kwargs: Any): + if alpha is not DEFAULT: + kwargs["alpha"] = alpha + super().__init__(kwargs) + + +class ExtendedBounds(AttrDict[Any]): + """ + :arg max: Maximum value for the bound. + :arg min: Minimum value for the bound. + """ + + max: Any + min: Any + + def __init__(self, *, max: Any = DEFAULT, min: Any = DEFAULT, **kwargs: Any): + if max is not DEFAULT: + kwargs["max"] = max + if min is not DEFAULT: + kwargs["min"] = min + super().__init__(kwargs) + + +class FieldAndFormat(AttrDict[Any]): + """ + A reference to a field with formatting instructions on how to return + the value + + :arg field: (required) Wildcard pattern. The request returns values + for field names matching this pattern. + :arg format: Format in which the values are returned. + :arg include_unmapped: + """ + + field: Union[str, InstrumentedField, DefaultType] + format: Union[str, DefaultType] + include_unmapped: Union[bool, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + format: Union[str, DefaultType] = DEFAULT, + include_unmapped: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if format is not DEFAULT: + kwargs["format"] = format + if include_unmapped is not DEFAULT: + kwargs["include_unmapped"] = include_unmapped + super().__init__(kwargs) + + +class FieldCollapse(AttrDict[Any]): + """ + :arg field: (required) The field to collapse the result set on + :arg inner_hits: The number of inner hits and their sort order + :arg max_concurrent_group_searches: The number of concurrent requests + allowed to retrieve the inner_hits per group + :arg collapse: + """ + + field: Union[str, InstrumentedField, DefaultType] + inner_hits: Union[ + "InnerHits", Sequence["InnerHits"], Sequence[Dict[str, Any]], DefaultType + ] + max_concurrent_group_searches: Union[int, DefaultType] + collapse: Union["FieldCollapse", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + inner_hits: Union[ + "InnerHits", Sequence["InnerHits"], Sequence[Dict[str, Any]], DefaultType + ] = DEFAULT, + max_concurrent_group_searches: Union[int, DefaultType] = DEFAULT, + collapse: Union["FieldCollapse", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if inner_hits is not DEFAULT: + kwargs["inner_hits"] = inner_hits + if max_concurrent_group_searches is not DEFAULT: + kwargs["max_concurrent_group_searches"] = max_concurrent_group_searches + if collapse is not DEFAULT: + kwargs["collapse"] = collapse + super().__init__(kwargs) + + +class FieldLookup(AttrDict[Any]): + """ + :arg id: (required) `id` of the document. + :arg index: Index from which to retrieve the document. + :arg path: Name of the field. + :arg routing: Custom routing value. + """ + + id: Union[str, DefaultType] + index: Union[str, DefaultType] + path: Union[str, InstrumentedField, DefaultType] + routing: Union[str, DefaultType] + + def __init__( + self, + *, + id: Union[str, DefaultType] = DEFAULT, + index: Union[str, DefaultType] = DEFAULT, + path: Union[str, InstrumentedField, DefaultType] = DEFAULT, + routing: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if id is not DEFAULT: + kwargs["id"] = id + if index is not DEFAULT: + kwargs["index"] = index + if path is not DEFAULT: + kwargs["path"] = str(path) + if routing is not DEFAULT: + kwargs["routing"] = routing + super().__init__(kwargs) + + +class FieldSort(AttrDict[Any]): + """ + :arg missing: + :arg mode: + :arg nested: + :arg order: + :arg unmapped_type: + :arg numeric_type: + :arg format: + """ + + missing: Union[str, int, float, bool, DefaultType] + mode: Union[Literal["min", "max", "sum", "avg", "median"], DefaultType] + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] + order: Union[Literal["asc", "desc"], DefaultType] + unmapped_type: Union[ + Literal[ + "none", + "geo_point", + "geo_shape", + "ip", + "binary", + "keyword", + "text", + "search_as_you_type", + "date", + "date_nanos", + "boolean", + "completion", + "nested", + "object", + "version", + "murmur3", + "token_count", + "percolator", + "integer", + "long", + "short", + "byte", + "float", + "half_float", + "scaled_float", + "double", + "integer_range", + "float_range", + "long_range", + "double_range", + "date_range", + "ip_range", + "alias", + "join", + "rank_feature", + "rank_features", + "flattened", + "shape", + "histogram", + "constant_keyword", + "aggregate_metric_double", + "dense_vector", + "semantic_text", + "sparse_vector", + "match_only_text", + "icu_collation_keyword", + ], + DefaultType, + ] + numeric_type: Union[Literal["long", "double", "date", "date_nanos"], DefaultType] + format: Union[str, DefaultType] + + def __init__( + self, + *, + missing: Union[str, int, float, bool, DefaultType] = DEFAULT, + mode: Union[ + Literal["min", "max", "sum", "avg", "median"], DefaultType + ] = DEFAULT, + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] = DEFAULT, + order: Union[Literal["asc", "desc"], DefaultType] = DEFAULT, + unmapped_type: Union[ + Literal[ + "none", + "geo_point", + "geo_shape", + "ip", + "binary", + "keyword", + "text", + "search_as_you_type", + "date", + "date_nanos", + "boolean", + "completion", + "nested", + "object", + "version", + "murmur3", + "token_count", + "percolator", + "integer", + "long", + "short", + "byte", + "float", + "half_float", + "scaled_float", + "double", + "integer_range", + "float_range", + "long_range", + "double_range", + "date_range", + "ip_range", + "alias", + "join", + "rank_feature", + "rank_features", + "flattened", + "shape", + "histogram", + "constant_keyword", + "aggregate_metric_double", + "dense_vector", + "semantic_text", + "sparse_vector", + "match_only_text", + "icu_collation_keyword", + ], + DefaultType, + ] = DEFAULT, + numeric_type: Union[ + Literal["long", "double", "date", "date_nanos"], DefaultType + ] = DEFAULT, + format: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if missing is not DEFAULT: + kwargs["missing"] = missing + if mode is not DEFAULT: + kwargs["mode"] = mode + if nested is not DEFAULT: + kwargs["nested"] = nested + if order is not DEFAULT: + kwargs["order"] = order + if unmapped_type is not DEFAULT: + kwargs["unmapped_type"] = unmapped_type + if numeric_type is not DEFAULT: + kwargs["numeric_type"] = numeric_type + if format is not DEFAULT: + kwargs["format"] = format + super().__init__(kwargs) + + +class FrequentItemSetsField(AttrDict[Any]): + """ + :arg field: (required) + :arg exclude: Values to exclude. Can be regular expression strings or + arrays of strings of exact terms. + :arg include: Values to include. Can be regular expression strings or + arrays of strings of exact terms. + """ + + field: Union[str, InstrumentedField, DefaultType] + exclude: Union[str, Sequence[str], DefaultType] + include: Union[str, Sequence[str], "TermsPartition", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + exclude: Union[str, Sequence[str], DefaultType] = DEFAULT, + include: Union[ + str, Sequence[str], "TermsPartition", Dict[str, Any], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if exclude is not DEFAULT: + kwargs["exclude"] = exclude + if include is not DEFAULT: + kwargs["include"] = include + super().__init__(kwargs) + + +class FuzzyQuery(AttrDict[Any]): + """ + :arg value: (required) Term you wish to find in the provided field. + :arg max_expansions: Maximum number of variations created. Defaults to + `50` if omitted. + :arg prefix_length: Number of beginning characters left unchanged when + creating expansions. + :arg rewrite: Number of beginning characters left unchanged when + creating expansions. Defaults to `constant_score` if omitted. + :arg transpositions: Indicates whether edits include transpositions of + two adjacent characters (for example `ab` to `ba`). Defaults to + `True` if omitted. + :arg fuzziness: Maximum edit distance allowed for matching. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + value: Union[str, float, bool, DefaultType] + max_expansions: Union[int, DefaultType] + prefix_length: Union[int, DefaultType] + rewrite: Union[str, DefaultType] + transpositions: Union[bool, DefaultType] + fuzziness: Union[str, int, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + value: Union[str, float, bool, DefaultType] = DEFAULT, + max_expansions: Union[int, DefaultType] = DEFAULT, + prefix_length: Union[int, DefaultType] = DEFAULT, + rewrite: Union[str, DefaultType] = DEFAULT, + transpositions: Union[bool, DefaultType] = DEFAULT, + fuzziness: Union[str, int, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if value is not DEFAULT: + kwargs["value"] = value + if max_expansions is not DEFAULT: + kwargs["max_expansions"] = max_expansions + if prefix_length is not DEFAULT: + kwargs["prefix_length"] = prefix_length + if rewrite is not DEFAULT: + kwargs["rewrite"] = rewrite + if transpositions is not DEFAULT: + kwargs["transpositions"] = transpositions + if fuzziness is not DEFAULT: + kwargs["fuzziness"] = fuzziness + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class GeoDistanceSort(AttrDict[Any]): + """ + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg mode: + :arg distance_type: + :arg ignore_unmapped: + :arg order: + :arg unit: + :arg nested: + """ + + _field: Union[str, "InstrumentedField", "DefaultType"] + _value: Union[ + Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str], + Sequence[Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str]], + Dict[str, Any], + "DefaultType", + ] + mode: Union[Literal["min", "max", "sum", "avg", "median"], DefaultType] + distance_type: Union[Literal["arc", "plane"], DefaultType] + ignore_unmapped: Union[bool, DefaultType] + order: Union[Literal["asc", "desc"], DefaultType] + unit: Union[ + Literal["in", "ft", "yd", "mi", "nmi", "km", "m", "cm", "mm"], DefaultType + ] + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str], + Sequence[ + Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str] + ], + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + *, + mode: Union[ + Literal["min", "max", "sum", "avg", "median"], DefaultType + ] = DEFAULT, + distance_type: Union[Literal["arc", "plane"], DefaultType] = DEFAULT, + ignore_unmapped: Union[bool, DefaultType] = DEFAULT, + order: Union[Literal["asc", "desc"], DefaultType] = DEFAULT, + unit: Union[ + Literal["in", "ft", "yd", "mi", "nmi", "km", "m", "cm", "mm"], DefaultType + ] = DEFAULT, + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + if mode is not DEFAULT: + kwargs["mode"] = mode + if distance_type is not DEFAULT: + kwargs["distance_type"] = distance_type + if ignore_unmapped is not DEFAULT: + kwargs["ignore_unmapped"] = ignore_unmapped + if order is not DEFAULT: + kwargs["order"] = order + if unit is not DEFAULT: + kwargs["unit"] = unit + if nested is not DEFAULT: + kwargs["nested"] = nested + super().__init__(kwargs) + + +class GeoHashLocation(AttrDict[Any]): + """ + :arg geohash: (required) + """ + + geohash: Union[str, DefaultType] + + def __init__(self, *, geohash: Union[str, DefaultType] = DEFAULT, **kwargs: Any): + if geohash is not DEFAULT: + kwargs["geohash"] = geohash + super().__init__(kwargs) + + +class GeoLinePoint(AttrDict[Any]): + """ + :arg field: (required) The name of the geo_point field. + """ + + field: Union[str, InstrumentedField, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + super().__init__(kwargs) + + +class GeoLineSort(AttrDict[Any]): + """ + :arg field: (required) The name of the numeric field to use as the + sort key for ordering the points. + """ + + field: Union[str, InstrumentedField, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + super().__init__(kwargs) + + +class GeoPolygonPoints(AttrDict[Any]): + """ + :arg points: (required) + """ + + points: Union[ + Sequence[Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str]], + Dict[str, Any], + DefaultType, + ] + + def __init__( + self, + *, + points: Union[ + Sequence[ + Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str] + ], + Dict[str, Any], + DefaultType, + ] = DEFAULT, + **kwargs: Any, + ): + if points is not DEFAULT: + kwargs["points"] = points + super().__init__(kwargs) + + +class GeoShapeFieldQuery(AttrDict[Any]): + """ + :arg shape: + :arg indexed_shape: Query using an indexed shape retrieved from the + the specified document and path. + :arg relation: Spatial relation operator used to search a geo field. + Defaults to `intersects` if omitted. + """ + + shape: Any + indexed_shape: Union["FieldLookup", Dict[str, Any], DefaultType] + relation: Union[ + Literal["intersects", "disjoint", "within", "contains"], DefaultType + ] + + def __init__( + self, + *, + shape: Any = DEFAULT, + indexed_shape: Union["FieldLookup", Dict[str, Any], DefaultType] = DEFAULT, + relation: Union[ + Literal["intersects", "disjoint", "within", "contains"], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if shape is not DEFAULT: + kwargs["shape"] = shape + if indexed_shape is not DEFAULT: + kwargs["indexed_shape"] = indexed_shape + if relation is not DEFAULT: + kwargs["relation"] = relation + super().__init__(kwargs) + + +class GoogleNormalizedDistanceHeuristic(AttrDict[Any]): + """ + :arg background_is_superset: Set to `false` if you defined a custom + background filter that represents a different set of documents + that you want to compare to. + """ + + background_is_superset: Union[bool, DefaultType] + + def __init__( + self, + *, + background_is_superset: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if background_is_superset is not DEFAULT: + kwargs["background_is_superset"] = background_is_superset + super().__init__(kwargs) + + +class HdrMethod(AttrDict[Any]): + """ + :arg number_of_significant_value_digits: Specifies the resolution of + values for the histogram in number of significant digits. + """ + + number_of_significant_value_digits: Union[int, DefaultType] + + def __init__( + self, + *, + number_of_significant_value_digits: Union[int, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if number_of_significant_value_digits is not DEFAULT: + kwargs["number_of_significant_value_digits"] = ( + number_of_significant_value_digits + ) + super().__init__(kwargs) + + +class Highlight(AttrDict[Any]): + """ + :arg fields: (required) + :arg encoder: + :arg type: + :arg boundary_chars: A string that contains each boundary character. + Defaults to `.,!? \t\n` if omitted. + :arg boundary_max_scan: How far to scan for boundary characters. + Defaults to `20` if omitted. + :arg boundary_scanner: Specifies how to break the highlighted + fragments: chars, sentence, or word. Only valid for the unified + and fvh highlighters. Defaults to `sentence` for the `unified` + highlighter. Defaults to `chars` for the `fvh` highlighter. + :arg boundary_scanner_locale: Controls which locale is used to search + for sentence and word boundaries. This parameter takes a form of a + language tag, for example: `"en-US"`, `"fr-FR"`, `"ja-JP"`. + Defaults to `Locale.ROOT` if omitted. + :arg force_source: + :arg fragmenter: Specifies how text should be broken up in highlight + snippets: `simple` or `span`. Only valid for the `plain` + highlighter. Defaults to `span` if omitted. + :arg fragment_size: The size of the highlighted fragment in + characters. Defaults to `100` if omitted. + :arg highlight_filter: + :arg highlight_query: Highlight matches for a query other than the + search query. This is especially useful if you use a rescore query + because those are not taken into account by highlighting by + default. + :arg max_fragment_length: + :arg max_analyzed_offset: If set to a non-negative value, highlighting + stops at this defined maximum limit. The rest of the text is not + processed, thus not highlighted and no error is returned The + `max_analyzed_offset` query setting does not override the + `index.highlight.max_analyzed_offset` setting, which prevails when + it’s set to lower value than the query setting. + :arg no_match_size: The amount of text you want to return from the + beginning of the field if there are no matching fragments to + highlight. + :arg number_of_fragments: The maximum number of fragments to return. + If the number of fragments is set to `0`, no fragments are + returned. Instead, the entire field contents are highlighted and + returned. This can be handy when you need to highlight short texts + such as a title or address, but fragmentation is not required. If + `number_of_fragments` is `0`, `fragment_size` is ignored. Defaults + to `5` if omitted. + :arg options: + :arg order: Sorts highlighted fragments by score when set to `score`. + By default, fragments will be output in the order they appear in + the field (order: `none`). Setting this option to `score` will + output the most relevant fragments first. Each highlighter applies + its own logic to compute relevancy scores. Defaults to `none` if + omitted. + :arg phrase_limit: Controls the number of matching phrases in a + document that are considered. Prevents the `fvh` highlighter from + analyzing too many phrases and consuming too much memory. When + using `matched_fields`, `phrase_limit` phrases per matched field + are considered. Raising the limit increases query time and + consumes more memory. Only supported by the `fvh` highlighter. + Defaults to `256` if omitted. + :arg post_tags: Use in conjunction with `pre_tags` to define the HTML + tags to use for the highlighted text. By default, highlighted text + is wrapped in `` and `` tags. + :arg pre_tags: Use in conjunction with `post_tags` to define the HTML + tags to use for the highlighted text. By default, highlighted text + is wrapped in `` and `` tags. + :arg require_field_match: By default, only fields that contains a + query match are highlighted. Set to `false` to highlight all + fields. Defaults to `True` if omitted. + :arg tags_schema: Set to `styled` to use the built-in tag schema. + """ + + fields: Union[ + Mapping[Union[str, InstrumentedField], "HighlightField"], + Dict[str, Any], + DefaultType, + ] + encoder: Union[Literal["default", "html"], DefaultType] + type: Union[Literal["plain", "fvh", "unified"], DefaultType] + boundary_chars: Union[str, DefaultType] + boundary_max_scan: Union[int, DefaultType] + boundary_scanner: Union[Literal["chars", "sentence", "word"], DefaultType] + boundary_scanner_locale: Union[str, DefaultType] + force_source: Union[bool, DefaultType] + fragmenter: Union[Literal["simple", "span"], DefaultType] + fragment_size: Union[int, DefaultType] + highlight_filter: Union[bool, DefaultType] + highlight_query: Union[Query, DefaultType] + max_fragment_length: Union[int, DefaultType] + max_analyzed_offset: Union[int, DefaultType] + no_match_size: Union[int, DefaultType] + number_of_fragments: Union[int, DefaultType] + options: Union[Mapping[str, Any], DefaultType] + order: Union[Literal["score"], DefaultType] + phrase_limit: Union[int, DefaultType] + post_tags: Union[Sequence[str], DefaultType] + pre_tags: Union[Sequence[str], DefaultType] + require_field_match: Union[bool, DefaultType] + tags_schema: Union[Literal["styled"], DefaultType] + + def __init__( + self, + *, + fields: Union[ + Mapping[Union[str, InstrumentedField], "HighlightField"], + Dict[str, Any], + DefaultType, + ] = DEFAULT, + encoder: Union[Literal["default", "html"], DefaultType] = DEFAULT, + type: Union[Literal["plain", "fvh", "unified"], DefaultType] = DEFAULT, + boundary_chars: Union[str, DefaultType] = DEFAULT, + boundary_max_scan: Union[int, DefaultType] = DEFAULT, + boundary_scanner: Union[ + Literal["chars", "sentence", "word"], DefaultType + ] = DEFAULT, + boundary_scanner_locale: Union[str, DefaultType] = DEFAULT, + force_source: Union[bool, DefaultType] = DEFAULT, + fragmenter: Union[Literal["simple", "span"], DefaultType] = DEFAULT, + fragment_size: Union[int, DefaultType] = DEFAULT, + highlight_filter: Union[bool, DefaultType] = DEFAULT, + highlight_query: Union[Query, DefaultType] = DEFAULT, + max_fragment_length: Union[int, DefaultType] = DEFAULT, + max_analyzed_offset: Union[int, DefaultType] = DEFAULT, + no_match_size: Union[int, DefaultType] = DEFAULT, + number_of_fragments: Union[int, DefaultType] = DEFAULT, + options: Union[Mapping[str, Any], DefaultType] = DEFAULT, + order: Union[Literal["score"], DefaultType] = DEFAULT, + phrase_limit: Union[int, DefaultType] = DEFAULT, + post_tags: Union[Sequence[str], DefaultType] = DEFAULT, + pre_tags: Union[Sequence[str], DefaultType] = DEFAULT, + require_field_match: Union[bool, DefaultType] = DEFAULT, + tags_schema: Union[Literal["styled"], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if fields is not DEFAULT: + kwargs["fields"] = str(fields) + if encoder is not DEFAULT: + kwargs["encoder"] = encoder + if type is not DEFAULT: + kwargs["type"] = type + if boundary_chars is not DEFAULT: + kwargs["boundary_chars"] = boundary_chars + if boundary_max_scan is not DEFAULT: + kwargs["boundary_max_scan"] = boundary_max_scan + if boundary_scanner is not DEFAULT: + kwargs["boundary_scanner"] = boundary_scanner + if boundary_scanner_locale is not DEFAULT: + kwargs["boundary_scanner_locale"] = boundary_scanner_locale + if force_source is not DEFAULT: + kwargs["force_source"] = force_source + if fragmenter is not DEFAULT: + kwargs["fragmenter"] = fragmenter + if fragment_size is not DEFAULT: + kwargs["fragment_size"] = fragment_size + if highlight_filter is not DEFAULT: + kwargs["highlight_filter"] = highlight_filter + if highlight_query is not DEFAULT: + kwargs["highlight_query"] = highlight_query + if max_fragment_length is not DEFAULT: + kwargs["max_fragment_length"] = max_fragment_length + if max_analyzed_offset is not DEFAULT: + kwargs["max_analyzed_offset"] = max_analyzed_offset + if no_match_size is not DEFAULT: + kwargs["no_match_size"] = no_match_size + if number_of_fragments is not DEFAULT: + kwargs["number_of_fragments"] = number_of_fragments + if options is not DEFAULT: + kwargs["options"] = options + if order is not DEFAULT: + kwargs["order"] = order + if phrase_limit is not DEFAULT: + kwargs["phrase_limit"] = phrase_limit + if post_tags is not DEFAULT: + kwargs["post_tags"] = post_tags + if pre_tags is not DEFAULT: + kwargs["pre_tags"] = pre_tags + if require_field_match is not DEFAULT: + kwargs["require_field_match"] = require_field_match + if tags_schema is not DEFAULT: + kwargs["tags_schema"] = tags_schema + super().__init__(kwargs) + + +class HighlightField(AttrDict[Any]): + """ + :arg fragment_offset: + :arg matched_fields: + :arg type: + :arg boundary_chars: A string that contains each boundary character. + Defaults to `.,!? \t\n` if omitted. + :arg boundary_max_scan: How far to scan for boundary characters. + Defaults to `20` if omitted. + :arg boundary_scanner: Specifies how to break the highlighted + fragments: chars, sentence, or word. Only valid for the unified + and fvh highlighters. Defaults to `sentence` for the `unified` + highlighter. Defaults to `chars` for the `fvh` highlighter. + :arg boundary_scanner_locale: Controls which locale is used to search + for sentence and word boundaries. This parameter takes a form of a + language tag, for example: `"en-US"`, `"fr-FR"`, `"ja-JP"`. + Defaults to `Locale.ROOT` if omitted. + :arg force_source: + :arg fragmenter: Specifies how text should be broken up in highlight + snippets: `simple` or `span`. Only valid for the `plain` + highlighter. Defaults to `span` if omitted. + :arg fragment_size: The size of the highlighted fragment in + characters. Defaults to `100` if omitted. + :arg highlight_filter: + :arg highlight_query: Highlight matches for a query other than the + search query. This is especially useful if you use a rescore query + because those are not taken into account by highlighting by + default. + :arg max_fragment_length: + :arg max_analyzed_offset: If set to a non-negative value, highlighting + stops at this defined maximum limit. The rest of the text is not + processed, thus not highlighted and no error is returned The + `max_analyzed_offset` query setting does not override the + `index.highlight.max_analyzed_offset` setting, which prevails when + it’s set to lower value than the query setting. + :arg no_match_size: The amount of text you want to return from the + beginning of the field if there are no matching fragments to + highlight. + :arg number_of_fragments: The maximum number of fragments to return. + If the number of fragments is set to `0`, no fragments are + returned. Instead, the entire field contents are highlighted and + returned. This can be handy when you need to highlight short texts + such as a title or address, but fragmentation is not required. If + `number_of_fragments` is `0`, `fragment_size` is ignored. Defaults + to `5` if omitted. + :arg options: + :arg order: Sorts highlighted fragments by score when set to `score`. + By default, fragments will be output in the order they appear in + the field (order: `none`). Setting this option to `score` will + output the most relevant fragments first. Each highlighter applies + its own logic to compute relevancy scores. Defaults to `none` if + omitted. + :arg phrase_limit: Controls the number of matching phrases in a + document that are considered. Prevents the `fvh` highlighter from + analyzing too many phrases and consuming too much memory. When + using `matched_fields`, `phrase_limit` phrases per matched field + are considered. Raising the limit increases query time and + consumes more memory. Only supported by the `fvh` highlighter. + Defaults to `256` if omitted. + :arg post_tags: Use in conjunction with `pre_tags` to define the HTML + tags to use for the highlighted text. By default, highlighted text + is wrapped in `` and `` tags. + :arg pre_tags: Use in conjunction with `post_tags` to define the HTML + tags to use for the highlighted text. By default, highlighted text + is wrapped in `` and `` tags. + :arg require_field_match: By default, only fields that contains a + query match are highlighted. Set to `false` to highlight all + fields. Defaults to `True` if omitted. + :arg tags_schema: Set to `styled` to use the built-in tag schema. + """ + + fragment_offset: Union[int, DefaultType] + matched_fields: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] + type: Union[Literal["plain", "fvh", "unified"], DefaultType] + boundary_chars: Union[str, DefaultType] + boundary_max_scan: Union[int, DefaultType] + boundary_scanner: Union[Literal["chars", "sentence", "word"], DefaultType] + boundary_scanner_locale: Union[str, DefaultType] + force_source: Union[bool, DefaultType] + fragmenter: Union[Literal["simple", "span"], DefaultType] + fragment_size: Union[int, DefaultType] + highlight_filter: Union[bool, DefaultType] + highlight_query: Union[Query, DefaultType] + max_fragment_length: Union[int, DefaultType] + max_analyzed_offset: Union[int, DefaultType] + no_match_size: Union[int, DefaultType] + number_of_fragments: Union[int, DefaultType] + options: Union[Mapping[str, Any], DefaultType] + order: Union[Literal["score"], DefaultType] + phrase_limit: Union[int, DefaultType] + post_tags: Union[Sequence[str], DefaultType] + pre_tags: Union[Sequence[str], DefaultType] + require_field_match: Union[bool, DefaultType] + tags_schema: Union[Literal["styled"], DefaultType] + + def __init__( + self, + *, + fragment_offset: Union[int, DefaultType] = DEFAULT, + matched_fields: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] = DEFAULT, + type: Union[Literal["plain", "fvh", "unified"], DefaultType] = DEFAULT, + boundary_chars: Union[str, DefaultType] = DEFAULT, + boundary_max_scan: Union[int, DefaultType] = DEFAULT, + boundary_scanner: Union[ + Literal["chars", "sentence", "word"], DefaultType + ] = DEFAULT, + boundary_scanner_locale: Union[str, DefaultType] = DEFAULT, + force_source: Union[bool, DefaultType] = DEFAULT, + fragmenter: Union[Literal["simple", "span"], DefaultType] = DEFAULT, + fragment_size: Union[int, DefaultType] = DEFAULT, + highlight_filter: Union[bool, DefaultType] = DEFAULT, + highlight_query: Union[Query, DefaultType] = DEFAULT, + max_fragment_length: Union[int, DefaultType] = DEFAULT, + max_analyzed_offset: Union[int, DefaultType] = DEFAULT, + no_match_size: Union[int, DefaultType] = DEFAULT, + number_of_fragments: Union[int, DefaultType] = DEFAULT, + options: Union[Mapping[str, Any], DefaultType] = DEFAULT, + order: Union[Literal["score"], DefaultType] = DEFAULT, + phrase_limit: Union[int, DefaultType] = DEFAULT, + post_tags: Union[Sequence[str], DefaultType] = DEFAULT, + pre_tags: Union[Sequence[str], DefaultType] = DEFAULT, + require_field_match: Union[bool, DefaultType] = DEFAULT, + tags_schema: Union[Literal["styled"], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if fragment_offset is not DEFAULT: + kwargs["fragment_offset"] = fragment_offset + if matched_fields is not DEFAULT: + kwargs["matched_fields"] = str(matched_fields) + if type is not DEFAULT: + kwargs["type"] = type + if boundary_chars is not DEFAULT: + kwargs["boundary_chars"] = boundary_chars + if boundary_max_scan is not DEFAULT: + kwargs["boundary_max_scan"] = boundary_max_scan + if boundary_scanner is not DEFAULT: + kwargs["boundary_scanner"] = boundary_scanner + if boundary_scanner_locale is not DEFAULT: + kwargs["boundary_scanner_locale"] = boundary_scanner_locale + if force_source is not DEFAULT: + kwargs["force_source"] = force_source + if fragmenter is not DEFAULT: + kwargs["fragmenter"] = fragmenter + if fragment_size is not DEFAULT: + kwargs["fragment_size"] = fragment_size + if highlight_filter is not DEFAULT: + kwargs["highlight_filter"] = highlight_filter + if highlight_query is not DEFAULT: + kwargs["highlight_query"] = highlight_query + if max_fragment_length is not DEFAULT: + kwargs["max_fragment_length"] = max_fragment_length + if max_analyzed_offset is not DEFAULT: + kwargs["max_analyzed_offset"] = max_analyzed_offset + if no_match_size is not DEFAULT: + kwargs["no_match_size"] = no_match_size + if number_of_fragments is not DEFAULT: + kwargs["number_of_fragments"] = number_of_fragments + if options is not DEFAULT: + kwargs["options"] = options + if order is not DEFAULT: + kwargs["order"] = order + if phrase_limit is not DEFAULT: + kwargs["phrase_limit"] = phrase_limit + if post_tags is not DEFAULT: + kwargs["post_tags"] = post_tags + if pre_tags is not DEFAULT: + kwargs["pre_tags"] = pre_tags + if require_field_match is not DEFAULT: + kwargs["require_field_match"] = require_field_match + if tags_schema is not DEFAULT: + kwargs["tags_schema"] = tags_schema + super().__init__(kwargs) + + +class HoltLinearModelSettings(AttrDict[Any]): + """ + :arg alpha: + :arg beta: + """ + + alpha: Union[float, DefaultType] + beta: Union[float, DefaultType] + + def __init__( + self, + *, + alpha: Union[float, DefaultType] = DEFAULT, + beta: Union[float, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if alpha is not DEFAULT: + kwargs["alpha"] = alpha + if beta is not DEFAULT: + kwargs["beta"] = beta + super().__init__(kwargs) + + +class HoltWintersModelSettings(AttrDict[Any]): + """ + :arg alpha: + :arg beta: + :arg gamma: + :arg pad: + :arg period: + :arg type: + """ + + alpha: Union[float, DefaultType] + beta: Union[float, DefaultType] + gamma: Union[float, DefaultType] + pad: Union[bool, DefaultType] + period: Union[int, DefaultType] + type: Union[Literal["add", "mult"], DefaultType] + + def __init__( + self, + *, + alpha: Union[float, DefaultType] = DEFAULT, + beta: Union[float, DefaultType] = DEFAULT, + gamma: Union[float, DefaultType] = DEFAULT, + pad: Union[bool, DefaultType] = DEFAULT, + period: Union[int, DefaultType] = DEFAULT, + type: Union[Literal["add", "mult"], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if alpha is not DEFAULT: + kwargs["alpha"] = alpha + if beta is not DEFAULT: + kwargs["beta"] = beta + if gamma is not DEFAULT: + kwargs["gamma"] = gamma + if pad is not DEFAULT: + kwargs["pad"] = pad + if period is not DEFAULT: + kwargs["period"] = period + if type is not DEFAULT: + kwargs["type"] = type + super().__init__(kwargs) + + +class InferenceConfigContainer(AttrDict[Any]): + """ + :arg regression: Regression configuration for inference. + :arg classification: Classification configuration for inference. + """ + + regression: Union["RegressionInferenceOptions", Dict[str, Any], DefaultType] + classification: Union["ClassificationInferenceOptions", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + regression: Union[ + "RegressionInferenceOptions", Dict[str, Any], DefaultType + ] = DEFAULT, + classification: Union[ + "ClassificationInferenceOptions", Dict[str, Any], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if regression is not DEFAULT: + kwargs["regression"] = regression + if classification is not DEFAULT: + kwargs["classification"] = classification + super().__init__(kwargs) + + +class InnerHits(AttrDict[Any]): + """ + :arg name: The name for the particular inner hit definition in the + response. Useful when a search request contains multiple inner + hits. + :arg size: The maximum number of hits to return per `inner_hits`. + Defaults to `3` if omitted. + :arg from: Inner hit starting document offset. + :arg collapse: + :arg docvalue_fields: + :arg explain: + :arg highlight: + :arg ignore_unmapped: + :arg script_fields: + :arg seq_no_primary_term: + :arg fields: + :arg sort: How the inner hits should be sorted per `inner_hits`. By + default, inner hits are sorted by score. + :arg _source: + :arg stored_fields: + :arg track_scores: + :arg version: + """ + + name: Union[str, DefaultType] + size: Union[int, DefaultType] + from_: Union[int, DefaultType] + collapse: Union["FieldCollapse", Dict[str, Any], DefaultType] + docvalue_fields: Union[ + Sequence["FieldAndFormat"], Sequence[Dict[str, Any]], DefaultType + ] + explain: Union[bool, DefaultType] + highlight: Union["Highlight", Dict[str, Any], DefaultType] + ignore_unmapped: Union[bool, DefaultType] + script_fields: Union[ + Mapping[Union[str, InstrumentedField], "ScriptField"], + Dict[str, Any], + DefaultType, + ] + seq_no_primary_term: Union[bool, DefaultType] + fields: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] + sort: Union[ + Union[Union[str, InstrumentedField], "SortOptions"], + Sequence[Union[Union[str, InstrumentedField], "SortOptions"]], + Dict[str, Any], + DefaultType, + ] + _source: Union[bool, "SourceFilter", Dict[str, Any], DefaultType] + stored_fields: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] + track_scores: Union[bool, DefaultType] + version: Union[bool, DefaultType] + + def __init__( + self, + *, + name: Union[str, DefaultType] = DEFAULT, + size: Union[int, DefaultType] = DEFAULT, + from_: Union[int, DefaultType] = DEFAULT, + collapse: Union["FieldCollapse", Dict[str, Any], DefaultType] = DEFAULT, + docvalue_fields: Union[ + Sequence["FieldAndFormat"], Sequence[Dict[str, Any]], DefaultType + ] = DEFAULT, + explain: Union[bool, DefaultType] = DEFAULT, + highlight: Union["Highlight", Dict[str, Any], DefaultType] = DEFAULT, + ignore_unmapped: Union[bool, DefaultType] = DEFAULT, + script_fields: Union[ + Mapping[Union[str, InstrumentedField], "ScriptField"], + Dict[str, Any], + DefaultType, + ] = DEFAULT, + seq_no_primary_term: Union[bool, DefaultType] = DEFAULT, + fields: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] = DEFAULT, + sort: Union[ + Union[Union[str, InstrumentedField], "SortOptions"], + Sequence[Union[Union[str, InstrumentedField], "SortOptions"]], + Dict[str, Any], + DefaultType, + ] = DEFAULT, + _source: Union[bool, "SourceFilter", Dict[str, Any], DefaultType] = DEFAULT, + stored_fields: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] = DEFAULT, + track_scores: Union[bool, DefaultType] = DEFAULT, + version: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if name is not DEFAULT: + kwargs["name"] = name + if size is not DEFAULT: + kwargs["size"] = size + if from_ is not DEFAULT: + kwargs["from_"] = from_ + if collapse is not DEFAULT: + kwargs["collapse"] = collapse + if docvalue_fields is not DEFAULT: + kwargs["docvalue_fields"] = docvalue_fields + if explain is not DEFAULT: + kwargs["explain"] = explain + if highlight is not DEFAULT: + kwargs["highlight"] = highlight + if ignore_unmapped is not DEFAULT: + kwargs["ignore_unmapped"] = ignore_unmapped + if script_fields is not DEFAULT: + kwargs["script_fields"] = str(script_fields) + if seq_no_primary_term is not DEFAULT: + kwargs["seq_no_primary_term"] = seq_no_primary_term + if fields is not DEFAULT: + kwargs["fields"] = str(fields) + if sort is not DEFAULT: + kwargs["sort"] = str(sort) + if _source is not DEFAULT: + kwargs["_source"] = _source + if stored_fields is not DEFAULT: + kwargs["stored_fields"] = str(stored_fields) + if track_scores is not DEFAULT: + kwargs["track_scores"] = track_scores + if version is not DEFAULT: + kwargs["version"] = version + super().__init__(kwargs) + + +class IntervalsAllOf(AttrDict[Any]): + """ + :arg intervals: (required) An array of rules to combine. All rules + must produce a match in a document for the overall source to + match. + :arg max_gaps: Maximum number of positions between the matching terms. + Intervals produced by the rules further apart than this are not + considered matches. Defaults to `-1` if omitted. + :arg ordered: If `true`, intervals produced by the rules should appear + in the order in which they are specified. + :arg filter: Rule used to filter returned intervals. + """ + + intervals: Union[ + Sequence["IntervalsContainer"], Sequence[Dict[str, Any]], DefaultType + ] + max_gaps: Union[int, DefaultType] + ordered: Union[bool, DefaultType] + filter: Union["IntervalsFilter", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + intervals: Union[ + Sequence["IntervalsContainer"], Sequence[Dict[str, Any]], DefaultType + ] = DEFAULT, + max_gaps: Union[int, DefaultType] = DEFAULT, + ordered: Union[bool, DefaultType] = DEFAULT, + filter: Union["IntervalsFilter", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if intervals is not DEFAULT: + kwargs["intervals"] = intervals + if max_gaps is not DEFAULT: + kwargs["max_gaps"] = max_gaps + if ordered is not DEFAULT: + kwargs["ordered"] = ordered + if filter is not DEFAULT: + kwargs["filter"] = filter + super().__init__(kwargs) + + +class IntervalsAnyOf(AttrDict[Any]): + """ + :arg intervals: (required) An array of rules to match. + :arg filter: Rule used to filter returned intervals. + """ + + intervals: Union[ + Sequence["IntervalsContainer"], Sequence[Dict[str, Any]], DefaultType + ] + filter: Union["IntervalsFilter", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + intervals: Union[ + Sequence["IntervalsContainer"], Sequence[Dict[str, Any]], DefaultType + ] = DEFAULT, + filter: Union["IntervalsFilter", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if intervals is not DEFAULT: + kwargs["intervals"] = intervals + if filter is not DEFAULT: + kwargs["filter"] = filter + super().__init__(kwargs) + + +class IntervalsContainer(AttrDict[Any]): + """ + :arg all_of: Returns matches that span a combination of other rules. + :arg any_of: Returns intervals produced by any of its sub-rules. + :arg fuzzy: Matches analyzed text. + :arg match: Matches analyzed text. + :arg prefix: Matches terms that start with a specified set of + characters. + :arg wildcard: Matches terms using a wildcard pattern. + """ + + all_of: Union["IntervalsAllOf", Dict[str, Any], DefaultType] + any_of: Union["IntervalsAnyOf", Dict[str, Any], DefaultType] + fuzzy: Union["IntervalsFuzzy", Dict[str, Any], DefaultType] + match: Union["IntervalsMatch", Dict[str, Any], DefaultType] + prefix: Union["IntervalsPrefix", Dict[str, Any], DefaultType] + wildcard: Union["IntervalsWildcard", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + all_of: Union["IntervalsAllOf", Dict[str, Any], DefaultType] = DEFAULT, + any_of: Union["IntervalsAnyOf", Dict[str, Any], DefaultType] = DEFAULT, + fuzzy: Union["IntervalsFuzzy", Dict[str, Any], DefaultType] = DEFAULT, + match: Union["IntervalsMatch", Dict[str, Any], DefaultType] = DEFAULT, + prefix: Union["IntervalsPrefix", Dict[str, Any], DefaultType] = DEFAULT, + wildcard: Union["IntervalsWildcard", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if all_of is not DEFAULT: + kwargs["all_of"] = all_of + if any_of is not DEFAULT: + kwargs["any_of"] = any_of + if fuzzy is not DEFAULT: + kwargs["fuzzy"] = fuzzy + if match is not DEFAULT: + kwargs["match"] = match + if prefix is not DEFAULT: + kwargs["prefix"] = prefix + if wildcard is not DEFAULT: + kwargs["wildcard"] = wildcard + super().__init__(kwargs) + + +class IntervalsFilter(AttrDict[Any]): + """ + :arg after: Query used to return intervals that follow an interval + from the `filter` rule. + :arg before: Query used to return intervals that occur before an + interval from the `filter` rule. + :arg contained_by: Query used to return intervals contained by an + interval from the `filter` rule. + :arg containing: Query used to return intervals that contain an + interval from the `filter` rule. + :arg not_contained_by: Query used to return intervals that are **not** + contained by an interval from the `filter` rule. + :arg not_containing: Query used to return intervals that do **not** + contain an interval from the `filter` rule. + :arg not_overlapping: Query used to return intervals that do **not** + overlap with an interval from the `filter` rule. + :arg overlapping: Query used to return intervals that overlap with an + interval from the `filter` rule. + :arg script: Script used to return matching documents. This script + must return a boolean value: `true` or `false`. + """ + + after: Union["IntervalsContainer", Dict[str, Any], DefaultType] + before: Union["IntervalsContainer", Dict[str, Any], DefaultType] + contained_by: Union["IntervalsContainer", Dict[str, Any], DefaultType] + containing: Union["IntervalsContainer", Dict[str, Any], DefaultType] + not_contained_by: Union["IntervalsContainer", Dict[str, Any], DefaultType] + not_containing: Union["IntervalsContainer", Dict[str, Any], DefaultType] + not_overlapping: Union["IntervalsContainer", Dict[str, Any], DefaultType] + overlapping: Union["IntervalsContainer", Dict[str, Any], DefaultType] + script: Union["Script", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + after: Union["IntervalsContainer", Dict[str, Any], DefaultType] = DEFAULT, + before: Union["IntervalsContainer", Dict[str, Any], DefaultType] = DEFAULT, + contained_by: Union[ + "IntervalsContainer", Dict[str, Any], DefaultType + ] = DEFAULT, + containing: Union["IntervalsContainer", Dict[str, Any], DefaultType] = DEFAULT, + not_contained_by: Union[ + "IntervalsContainer", Dict[str, Any], DefaultType + ] = DEFAULT, + not_containing: Union[ + "IntervalsContainer", Dict[str, Any], DefaultType + ] = DEFAULT, + not_overlapping: Union[ + "IntervalsContainer", Dict[str, Any], DefaultType + ] = DEFAULT, + overlapping: Union["IntervalsContainer", Dict[str, Any], DefaultType] = DEFAULT, + script: Union["Script", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if after is not DEFAULT: + kwargs["after"] = after + if before is not DEFAULT: + kwargs["before"] = before + if contained_by is not DEFAULT: + kwargs["contained_by"] = contained_by + if containing is not DEFAULT: + kwargs["containing"] = containing + if not_contained_by is not DEFAULT: + kwargs["not_contained_by"] = not_contained_by + if not_containing is not DEFAULT: + kwargs["not_containing"] = not_containing + if not_overlapping is not DEFAULT: + kwargs["not_overlapping"] = not_overlapping + if overlapping is not DEFAULT: + kwargs["overlapping"] = overlapping + if script is not DEFAULT: + kwargs["script"] = script + super().__init__(kwargs) + + +class IntervalsFuzzy(AttrDict[Any]): + """ + :arg term: (required) The term to match. + :arg analyzer: Analyzer used to normalize the term. + :arg fuzziness: Maximum edit distance allowed for matching. Defaults + to `auto` if omitted. + :arg prefix_length: Number of beginning characters left unchanged when + creating expansions. + :arg transpositions: Indicates whether edits include transpositions of + two adjacent characters (for example, `ab` to `ba`). Defaults to + `True` if omitted. + :arg use_field: If specified, match intervals from this field rather + than the top-level field. The `term` is normalized using the + search analyzer from this field, unless `analyzer` is specified + separately. + """ + + term: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + fuzziness: Union[str, int, DefaultType] + prefix_length: Union[int, DefaultType] + transpositions: Union[bool, DefaultType] + use_field: Union[str, InstrumentedField, DefaultType] + + def __init__( + self, + *, + term: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + fuzziness: Union[str, int, DefaultType] = DEFAULT, + prefix_length: Union[int, DefaultType] = DEFAULT, + transpositions: Union[bool, DefaultType] = DEFAULT, + use_field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if term is not DEFAULT: + kwargs["term"] = term + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if fuzziness is not DEFAULT: + kwargs["fuzziness"] = fuzziness + if prefix_length is not DEFAULT: + kwargs["prefix_length"] = prefix_length + if transpositions is not DEFAULT: + kwargs["transpositions"] = transpositions + if use_field is not DEFAULT: + kwargs["use_field"] = str(use_field) + super().__init__(kwargs) + + +class IntervalsMatch(AttrDict[Any]): + """ + :arg query: (required) Text you wish to find in the provided field. + :arg analyzer: Analyzer used to analyze terms in the query. + :arg max_gaps: Maximum number of positions between the matching terms. + Terms further apart than this are not considered matches. Defaults + to `-1` if omitted. + :arg ordered: If `true`, matching terms must appear in their specified + order. + :arg use_field: If specified, match intervals from this field rather + than the top-level field. The `term` is normalized using the + search analyzer from this field, unless `analyzer` is specified + separately. + :arg filter: An optional interval filter. + """ + + query: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + max_gaps: Union[int, DefaultType] + ordered: Union[bool, DefaultType] + use_field: Union[str, InstrumentedField, DefaultType] + filter: Union["IntervalsFilter", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + query: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + max_gaps: Union[int, DefaultType] = DEFAULT, + ordered: Union[bool, DefaultType] = DEFAULT, + use_field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + filter: Union["IntervalsFilter", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if query is not DEFAULT: + kwargs["query"] = query + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if max_gaps is not DEFAULT: + kwargs["max_gaps"] = max_gaps + if ordered is not DEFAULT: + kwargs["ordered"] = ordered + if use_field is not DEFAULT: + kwargs["use_field"] = str(use_field) + if filter is not DEFAULT: + kwargs["filter"] = filter + super().__init__(kwargs) + + +class IntervalsPrefix(AttrDict[Any]): + """ + :arg prefix: (required) Beginning characters of terms you wish to find + in the top-level field. + :arg analyzer: Analyzer used to analyze the `prefix`. + :arg use_field: If specified, match intervals from this field rather + than the top-level field. The `prefix` is normalized using the + search analyzer from this field, unless `analyzer` is specified + separately. + """ + + prefix: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + use_field: Union[str, InstrumentedField, DefaultType] + + def __init__( + self, + *, + prefix: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + use_field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if prefix is not DEFAULT: + kwargs["prefix"] = prefix + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if use_field is not DEFAULT: + kwargs["use_field"] = str(use_field) + super().__init__(kwargs) + + +class IntervalsQuery(AttrDict[Any]): + """ + :arg all_of: Returns matches that span a combination of other rules. + :arg any_of: Returns intervals produced by any of its sub-rules. + :arg fuzzy: Matches terms that are similar to the provided term, + within an edit distance defined by `fuzziness`. + :arg match: Matches analyzed text. + :arg prefix: Matches terms that start with a specified set of + characters. + :arg wildcard: Matches terms using a wildcard pattern. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + all_of: Union["IntervalsAllOf", Dict[str, Any], DefaultType] + any_of: Union["IntervalsAnyOf", Dict[str, Any], DefaultType] + fuzzy: Union["IntervalsFuzzy", Dict[str, Any], DefaultType] + match: Union["IntervalsMatch", Dict[str, Any], DefaultType] + prefix: Union["IntervalsPrefix", Dict[str, Any], DefaultType] + wildcard: Union["IntervalsWildcard", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + all_of: Union["IntervalsAllOf", Dict[str, Any], DefaultType] = DEFAULT, + any_of: Union["IntervalsAnyOf", Dict[str, Any], DefaultType] = DEFAULT, + fuzzy: Union["IntervalsFuzzy", Dict[str, Any], DefaultType] = DEFAULT, + match: Union["IntervalsMatch", Dict[str, Any], DefaultType] = DEFAULT, + prefix: Union["IntervalsPrefix", Dict[str, Any], DefaultType] = DEFAULT, + wildcard: Union["IntervalsWildcard", Dict[str, Any], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if all_of is not DEFAULT: + kwargs["all_of"] = all_of + if any_of is not DEFAULT: + kwargs["any_of"] = any_of + if fuzzy is not DEFAULT: + kwargs["fuzzy"] = fuzzy + if match is not DEFAULT: + kwargs["match"] = match + if prefix is not DEFAULT: + kwargs["prefix"] = prefix + if wildcard is not DEFAULT: + kwargs["wildcard"] = wildcard + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class IntervalsWildcard(AttrDict[Any]): + """ + :arg pattern: (required) Wildcard pattern used to find matching terms. + :arg analyzer: Analyzer used to analyze the `pattern`. Defaults to the + top-level field's analyzer. + :arg use_field: If specified, match intervals from this field rather + than the top-level field. The `pattern` is normalized using the + search analyzer from this field, unless `analyzer` is specified + separately. + """ + + pattern: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + use_field: Union[str, InstrumentedField, DefaultType] + + def __init__( + self, + *, + pattern: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + use_field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if pattern is not DEFAULT: + kwargs["pattern"] = pattern + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if use_field is not DEFAULT: + kwargs["use_field"] = str(use_field) + super().__init__(kwargs) + + +class IpRangeAggregationRange(AttrDict[Any]): + """ + :arg from: Start of the range. + :arg mask: IP range defined as a CIDR mask. + :arg to: End of the range. + """ + + from_: Union[str, None, DefaultType] + mask: Union[str, DefaultType] + to: Union[str, None, DefaultType] + + def __init__( + self, + *, + from_: Union[str, None, DefaultType] = DEFAULT, + mask: Union[str, DefaultType] = DEFAULT, + to: Union[str, None, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if from_ is not DEFAULT: + kwargs["from_"] = from_ + if mask is not DEFAULT: + kwargs["mask"] = mask + if to is not DEFAULT: + kwargs["to"] = to + super().__init__(kwargs) + + +class LatLonGeoLocation(AttrDict[Any]): + """ + :arg lat: (required) Latitude + :arg lon: (required) Longitude + """ + + lat: Union[float, DefaultType] + lon: Union[float, DefaultType] + + def __init__( + self, + *, + lat: Union[float, DefaultType] = DEFAULT, + lon: Union[float, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if lat is not DEFAULT: + kwargs["lat"] = lat + if lon is not DEFAULT: + kwargs["lon"] = lon + super().__init__(kwargs) + + +class LikeDocument(AttrDict[Any]): + """ + :arg doc: A document not present in the index. + :arg fields: + :arg _id: ID of a document. + :arg _index: Index of a document. + :arg per_field_analyzer: Overrides the default analyzer. + :arg routing: + :arg version: + :arg version_type: Defaults to `'internal'` if omitted. + """ + + doc: Any + fields: Union[Sequence[Union[str, InstrumentedField]], DefaultType] + _id: Union[str, DefaultType] + _index: Union[str, DefaultType] + per_field_analyzer: Union[Mapping[Union[str, InstrumentedField], str], DefaultType] + routing: Union[str, DefaultType] + version: Union[int, DefaultType] + version_type: Union[ + Literal["internal", "external", "external_gte", "force"], DefaultType + ] + + def __init__( + self, + *, + doc: Any = DEFAULT, + fields: Union[Sequence[Union[str, InstrumentedField]], DefaultType] = DEFAULT, + _id: Union[str, DefaultType] = DEFAULT, + _index: Union[str, DefaultType] = DEFAULT, + per_field_analyzer: Union[ + Mapping[Union[str, InstrumentedField], str], DefaultType + ] = DEFAULT, + routing: Union[str, DefaultType] = DEFAULT, + version: Union[int, DefaultType] = DEFAULT, + version_type: Union[ + Literal["internal", "external", "external_gte", "force"], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if doc is not DEFAULT: + kwargs["doc"] = doc + if fields is not DEFAULT: + kwargs["fields"] = str(fields) + if _id is not DEFAULT: + kwargs["_id"] = _id + if _index is not DEFAULT: + kwargs["_index"] = _index + if per_field_analyzer is not DEFAULT: + kwargs["per_field_analyzer"] = str(per_field_analyzer) + if routing is not DEFAULT: + kwargs["routing"] = routing + if version is not DEFAULT: + kwargs["version"] = version + if version_type is not DEFAULT: + kwargs["version_type"] = version_type + super().__init__(kwargs) + + +class MatchBoolPrefixQuery(AttrDict[Any]): + """ + :arg query: (required) Terms you wish to find in the provided field. + The last term is used in a prefix query. + :arg analyzer: Analyzer used to convert the text in the query value + into tokens. + :arg fuzziness: Maximum edit distance allowed for matching. Can be + applied to the term subqueries constructed for all terms but the + final term. + :arg fuzzy_rewrite: Method used to rewrite the query. Can be applied + to the term subqueries constructed for all terms but the final + term. + :arg fuzzy_transpositions: If `true`, edits for fuzzy matching include + transpositions of two adjacent characters (for example, `ab` to + `ba`). Can be applied to the term subqueries constructed for all + terms but the final term. Defaults to `True` if omitted. + :arg max_expansions: Maximum number of terms to which the query will + expand. Can be applied to the term subqueries constructed for all + terms but the final term. Defaults to `50` if omitted. + :arg minimum_should_match: Minimum number of clauses that must match + for a document to be returned. Applied to the constructed bool + query. + :arg operator: Boolean logic used to interpret text in the query + value. Applied to the constructed bool query. Defaults to `'or'` + if omitted. + :arg prefix_length: Number of beginning characters left unchanged for + fuzzy matching. Can be applied to the term subqueries constructed + for all terms but the final term. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + query: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + fuzziness: Union[str, int, DefaultType] + fuzzy_rewrite: Union[str, DefaultType] + fuzzy_transpositions: Union[bool, DefaultType] + max_expansions: Union[int, DefaultType] + minimum_should_match: Union[int, str, DefaultType] + operator: Union[Literal["and", "or"], DefaultType] + prefix_length: Union[int, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + query: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + fuzziness: Union[str, int, DefaultType] = DEFAULT, + fuzzy_rewrite: Union[str, DefaultType] = DEFAULT, + fuzzy_transpositions: Union[bool, DefaultType] = DEFAULT, + max_expansions: Union[int, DefaultType] = DEFAULT, + minimum_should_match: Union[int, str, DefaultType] = DEFAULT, + operator: Union[Literal["and", "or"], DefaultType] = DEFAULT, + prefix_length: Union[int, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if query is not DEFAULT: + kwargs["query"] = query + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if fuzziness is not DEFAULT: + kwargs["fuzziness"] = fuzziness + if fuzzy_rewrite is not DEFAULT: + kwargs["fuzzy_rewrite"] = fuzzy_rewrite + if fuzzy_transpositions is not DEFAULT: + kwargs["fuzzy_transpositions"] = fuzzy_transpositions + if max_expansions is not DEFAULT: + kwargs["max_expansions"] = max_expansions + if minimum_should_match is not DEFAULT: + kwargs["minimum_should_match"] = minimum_should_match + if operator is not DEFAULT: + kwargs["operator"] = operator + if prefix_length is not DEFAULT: + kwargs["prefix_length"] = prefix_length + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class MatchPhrasePrefixQuery(AttrDict[Any]): + """ + :arg query: (required) Text you wish to find in the provided field. + :arg analyzer: Analyzer used to convert text in the query value into + tokens. + :arg max_expansions: Maximum number of terms to which the last + provided term of the query value will expand. Defaults to `50` if + omitted. + :arg slop: Maximum number of positions allowed between matching + tokens. + :arg zero_terms_query: Indicates whether no documents are returned if + the analyzer removes all tokens, such as when using a `stop` + filter. Defaults to `none` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + query: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + max_expansions: Union[int, DefaultType] + slop: Union[int, DefaultType] + zero_terms_query: Union[Literal["all", "none"], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + query: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + max_expansions: Union[int, DefaultType] = DEFAULT, + slop: Union[int, DefaultType] = DEFAULT, + zero_terms_query: Union[Literal["all", "none"], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if query is not DEFAULT: + kwargs["query"] = query + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if max_expansions is not DEFAULT: + kwargs["max_expansions"] = max_expansions + if slop is not DEFAULT: + kwargs["slop"] = slop + if zero_terms_query is not DEFAULT: + kwargs["zero_terms_query"] = zero_terms_query + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class MatchPhraseQuery(AttrDict[Any]): + """ + :arg query: (required) Query terms that are analyzed and turned into a + phrase query. + :arg analyzer: Analyzer used to convert the text in the query value + into tokens. + :arg slop: Maximum number of positions allowed between matching + tokens. + :arg zero_terms_query: Indicates whether no documents are returned if + the `analyzer` removes all tokens, such as when using a `stop` + filter. Defaults to `'none'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + query: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + slop: Union[int, DefaultType] + zero_terms_query: Union[Literal["all", "none"], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + query: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + slop: Union[int, DefaultType] = DEFAULT, + zero_terms_query: Union[Literal["all", "none"], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if query is not DEFAULT: + kwargs["query"] = query + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if slop is not DEFAULT: + kwargs["slop"] = slop + if zero_terms_query is not DEFAULT: + kwargs["zero_terms_query"] = zero_terms_query + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class MatchQuery(AttrDict[Any]): + """ + :arg query: (required) Text, number, boolean value or date you wish to + find in the provided field. + :arg analyzer: Analyzer used to convert the text in the query value + into tokens. + :arg auto_generate_synonyms_phrase_query: If `true`, match phrase + queries are automatically created for multi-term synonyms. + Defaults to `True` if omitted. + :arg cutoff_frequency: + :arg fuzziness: Maximum edit distance allowed for matching. + :arg fuzzy_rewrite: Method used to rewrite the query. + :arg fuzzy_transpositions: If `true`, edits for fuzzy matching include + transpositions of two adjacent characters (for example, `ab` to + `ba`). Defaults to `True` if omitted. + :arg lenient: If `true`, format-based errors, such as providing a text + query value for a numeric field, are ignored. + :arg max_expansions: Maximum number of terms to which the query will + expand. Defaults to `50` if omitted. + :arg minimum_should_match: Minimum number of clauses that must match + for a document to be returned. + :arg operator: Boolean logic used to interpret text in the query + value. Defaults to `'or'` if omitted. + :arg prefix_length: Number of beginning characters left unchanged for + fuzzy matching. + :arg zero_terms_query: Indicates whether no documents are returned if + the `analyzer` removes all tokens, such as when using a `stop` + filter. Defaults to `'none'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + query: Union[str, float, bool, DefaultType] + analyzer: Union[str, DefaultType] + auto_generate_synonyms_phrase_query: Union[bool, DefaultType] + cutoff_frequency: Union[float, DefaultType] + fuzziness: Union[str, int, DefaultType] + fuzzy_rewrite: Union[str, DefaultType] + fuzzy_transpositions: Union[bool, DefaultType] + lenient: Union[bool, DefaultType] + max_expansions: Union[int, DefaultType] + minimum_should_match: Union[int, str, DefaultType] + operator: Union[Literal["and", "or"], DefaultType] + prefix_length: Union[int, DefaultType] + zero_terms_query: Union[Literal["all", "none"], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + query: Union[str, float, bool, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + auto_generate_synonyms_phrase_query: Union[bool, DefaultType] = DEFAULT, + cutoff_frequency: Union[float, DefaultType] = DEFAULT, + fuzziness: Union[str, int, DefaultType] = DEFAULT, + fuzzy_rewrite: Union[str, DefaultType] = DEFAULT, + fuzzy_transpositions: Union[bool, DefaultType] = DEFAULT, + lenient: Union[bool, DefaultType] = DEFAULT, + max_expansions: Union[int, DefaultType] = DEFAULT, + minimum_should_match: Union[int, str, DefaultType] = DEFAULT, + operator: Union[Literal["and", "or"], DefaultType] = DEFAULT, + prefix_length: Union[int, DefaultType] = DEFAULT, + zero_terms_query: Union[Literal["all", "none"], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if query is not DEFAULT: + kwargs["query"] = query + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if auto_generate_synonyms_phrase_query is not DEFAULT: + kwargs["auto_generate_synonyms_phrase_query"] = ( + auto_generate_synonyms_phrase_query + ) + if cutoff_frequency is not DEFAULT: + kwargs["cutoff_frequency"] = cutoff_frequency + if fuzziness is not DEFAULT: + kwargs["fuzziness"] = fuzziness + if fuzzy_rewrite is not DEFAULT: + kwargs["fuzzy_rewrite"] = fuzzy_rewrite + if fuzzy_transpositions is not DEFAULT: + kwargs["fuzzy_transpositions"] = fuzzy_transpositions + if lenient is not DEFAULT: + kwargs["lenient"] = lenient + if max_expansions is not DEFAULT: + kwargs["max_expansions"] = max_expansions + if minimum_should_match is not DEFAULT: + kwargs["minimum_should_match"] = minimum_should_match + if operator is not DEFAULT: + kwargs["operator"] = operator + if prefix_length is not DEFAULT: + kwargs["prefix_length"] = prefix_length + if zero_terms_query is not DEFAULT: + kwargs["zero_terms_query"] = zero_terms_query + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class MultiTermLookup(AttrDict[Any]): + """ + :arg field: (required) A fields from which to retrieve terms. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + """ + + field: Union[str, InstrumentedField, DefaultType] + missing: Union[str, int, float, bool, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + missing: Union[str, int, float, bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if missing is not DEFAULT: + kwargs["missing"] = missing + super().__init__(kwargs) + + +class MutualInformationHeuristic(AttrDict[Any]): + """ + :arg background_is_superset: Set to `false` if you defined a custom + background filter that represents a different set of documents + that you want to compare to. + :arg include_negatives: Set to `false` to filter out the terms that + appear less often in the subset than in documents outside the + subset. + """ + + background_is_superset: Union[bool, DefaultType] + include_negatives: Union[bool, DefaultType] + + def __init__( + self, + *, + background_is_superset: Union[bool, DefaultType] = DEFAULT, + include_negatives: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if background_is_superset is not DEFAULT: + kwargs["background_is_superset"] = background_is_superset + if include_negatives is not DEFAULT: + kwargs["include_negatives"] = include_negatives + super().__init__(kwargs) + + +class NestedSortValue(AttrDict[Any]): + """ + :arg path: (required) + :arg filter: + :arg max_children: + :arg nested: + """ + + path: Union[str, InstrumentedField, DefaultType] + filter: Union[Query, DefaultType] + max_children: Union[int, DefaultType] + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + path: Union[str, InstrumentedField, DefaultType] = DEFAULT, + filter: Union[Query, DefaultType] = DEFAULT, + max_children: Union[int, DefaultType] = DEFAULT, + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if path is not DEFAULT: + kwargs["path"] = str(path) + if filter is not DEFAULT: + kwargs["filter"] = filter + if max_children is not DEFAULT: + kwargs["max_children"] = max_children + if nested is not DEFAULT: + kwargs["nested"] = nested + super().__init__(kwargs) + + +class PercentageScoreHeuristic(AttrDict[Any]): + pass + + +class PinnedDoc(AttrDict[Any]): + """ + :arg _id: (required) The unique document ID. + :arg _index: (required) The index that contains the document. + """ + + _id: Union[str, DefaultType] + _index: Union[str, DefaultType] + + def __init__( + self, + *, + _id: Union[str, DefaultType] = DEFAULT, + _index: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if _id is not DEFAULT: + kwargs["_id"] = _id + if _index is not DEFAULT: + kwargs["_index"] = _index + super().__init__(kwargs) + + +class PrefixQuery(AttrDict[Any]): + """ + :arg value: (required) Beginning characters of terms you wish to find + in the provided field. + :arg rewrite: Method used to rewrite the query. + :arg case_insensitive: Allows ASCII case insensitive matching of the + value with the indexed field values when set to `true`. Default is + `false` which means the case sensitivity of matching depends on + the underlying field’s mapping. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + value: Union[str, DefaultType] + rewrite: Union[str, DefaultType] + case_insensitive: Union[bool, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + value: Union[str, DefaultType] = DEFAULT, + rewrite: Union[str, DefaultType] = DEFAULT, + case_insensitive: Union[bool, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if value is not DEFAULT: + kwargs["value"] = value + if rewrite is not DEFAULT: + kwargs["rewrite"] = rewrite + if case_insensitive is not DEFAULT: + kwargs["case_insensitive"] = case_insensitive + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class QueryVectorBuilder(AttrDict[Any]): + """ + :arg text_embedding: + """ + + text_embedding: Union["TextEmbedding", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + text_embedding: Union["TextEmbedding", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if text_embedding is not DEFAULT: + kwargs["text_embedding"] = text_embedding + super().__init__(kwargs) + + +class RankFeatureFunctionLinear(AttrDict[Any]): + pass + + +class RankFeatureFunctionLogarithm(AttrDict[Any]): + """ + :arg scaling_factor: (required) Configurable scaling factor. + """ + + scaling_factor: Union[float, DefaultType] + + def __init__( + self, *, scaling_factor: Union[float, DefaultType] = DEFAULT, **kwargs: Any + ): + if scaling_factor is not DEFAULT: + kwargs["scaling_factor"] = scaling_factor + super().__init__(kwargs) + + +class RankFeatureFunctionSaturation(AttrDict[Any]): + """ + :arg pivot: Configurable pivot value so that the result will be less + than 0.5. + """ + + pivot: Union[float, DefaultType] + + def __init__(self, *, pivot: Union[float, DefaultType] = DEFAULT, **kwargs: Any): + if pivot is not DEFAULT: + kwargs["pivot"] = pivot + super().__init__(kwargs) + + +class RankFeatureFunctionSigmoid(AttrDict[Any]): + """ + :arg pivot: (required) Configurable pivot value so that the result + will be less than 0.5. + :arg exponent: (required) Configurable Exponent. + """ + + pivot: Union[float, DefaultType] + exponent: Union[float, DefaultType] + + def __init__( + self, + *, + pivot: Union[float, DefaultType] = DEFAULT, + exponent: Union[float, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if pivot is not DEFAULT: + kwargs["pivot"] = pivot + if exponent is not DEFAULT: + kwargs["exponent"] = exponent + super().__init__(kwargs) + + +class RegexpQuery(AttrDict[Any]): + """ + :arg value: (required) Regular expression for terms you wish to find + in the provided field. + :arg case_insensitive: Allows case insensitive matching of the regular + expression value with the indexed field values when set to `true`. + When `false`, case sensitivity of matching depends on the + underlying field’s mapping. + :arg flags: Enables optional operators for the regular expression. + :arg max_determinized_states: Maximum number of automaton states + required for the query. Defaults to `10000` if omitted. + :arg rewrite: Method used to rewrite the query. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + value: Union[str, DefaultType] + case_insensitive: Union[bool, DefaultType] + flags: Union[str, DefaultType] + max_determinized_states: Union[int, DefaultType] + rewrite: Union[str, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + value: Union[str, DefaultType] = DEFAULT, + case_insensitive: Union[bool, DefaultType] = DEFAULT, + flags: Union[str, DefaultType] = DEFAULT, + max_determinized_states: Union[int, DefaultType] = DEFAULT, + rewrite: Union[str, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if value is not DEFAULT: + kwargs["value"] = value + if case_insensitive is not DEFAULT: + kwargs["case_insensitive"] = case_insensitive + if flags is not DEFAULT: + kwargs["flags"] = flags + if max_determinized_states is not DEFAULT: + kwargs["max_determinized_states"] = max_determinized_states + if rewrite is not DEFAULT: + kwargs["rewrite"] = rewrite + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class RegressionInferenceOptions(AttrDict[Any]): + """ + :arg results_field: The field that is added to incoming documents to + contain the inference prediction. Defaults to predicted_value. + :arg num_top_feature_importance_values: Specifies the maximum number + of feature importance values per document. + """ + + results_field: Union[str, InstrumentedField, DefaultType] + num_top_feature_importance_values: Union[int, DefaultType] + + def __init__( + self, + *, + results_field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + num_top_feature_importance_values: Union[int, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if results_field is not DEFAULT: + kwargs["results_field"] = str(results_field) + if num_top_feature_importance_values is not DEFAULT: + kwargs["num_top_feature_importance_values"] = ( + num_top_feature_importance_values + ) + super().__init__(kwargs) + + +class ScoreSort(AttrDict[Any]): + """ + :arg order: + """ + + order: Union[Literal["asc", "desc"], DefaultType] + + def __init__( + self, + *, + order: Union[Literal["asc", "desc"], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if order is not DEFAULT: + kwargs["order"] = order + super().__init__(kwargs) + + +class Script(AttrDict[Any]): + """ + :arg source: The script source. + :arg id: The `id` for a stored script. + :arg params: Specifies any named parameters that are passed into the + script as variables. Use parameters instead of hard-coded values + to decrease compile time. + :arg lang: Specifies the language the script is written in. Defaults + to `painless` if omitted. + :arg options: + """ + + source: Union[str, DefaultType] + id: Union[str, DefaultType] + params: Union[Mapping[str, Any], DefaultType] + lang: Union[Literal["painless", "expression", "mustache", "java"], DefaultType] + options: Union[Mapping[str, str], DefaultType] + + def __init__( + self, + *, + source: Union[str, DefaultType] = DEFAULT, + id: Union[str, DefaultType] = DEFAULT, + params: Union[Mapping[str, Any], DefaultType] = DEFAULT, + lang: Union[ + Literal["painless", "expression", "mustache", "java"], DefaultType + ] = DEFAULT, + options: Union[Mapping[str, str], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if source is not DEFAULT: + kwargs["source"] = source + if id is not DEFAULT: + kwargs["id"] = id + if params is not DEFAULT: + kwargs["params"] = params + if lang is not DEFAULT: + kwargs["lang"] = lang + if options is not DEFAULT: + kwargs["options"] = options + super().__init__(kwargs) + + +class ScriptField(AttrDict[Any]): + """ + :arg script: (required) + :arg ignore_failure: + """ + + script: Union["Script", Dict[str, Any], DefaultType] + ignore_failure: Union[bool, DefaultType] + + def __init__( + self, + *, + script: Union["Script", Dict[str, Any], DefaultType] = DEFAULT, + ignore_failure: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if script is not DEFAULT: + kwargs["script"] = script + if ignore_failure is not DEFAULT: + kwargs["ignore_failure"] = ignore_failure + super().__init__(kwargs) + + +class ScriptSort(AttrDict[Any]): + """ + :arg script: (required) + :arg order: + :arg type: + :arg mode: + :arg nested: + """ + + script: Union["Script", Dict[str, Any], DefaultType] + order: Union[Literal["asc", "desc"], DefaultType] + type: Union[Literal["string", "number", "version"], DefaultType] + mode: Union[Literal["min", "max", "sum", "avg", "median"], DefaultType] + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + script: Union["Script", Dict[str, Any], DefaultType] = DEFAULT, + order: Union[Literal["asc", "desc"], DefaultType] = DEFAULT, + type: Union[Literal["string", "number", "version"], DefaultType] = DEFAULT, + mode: Union[ + Literal["min", "max", "sum", "avg", "median"], DefaultType + ] = DEFAULT, + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if script is not DEFAULT: + kwargs["script"] = script + if order is not DEFAULT: + kwargs["order"] = order + if type is not DEFAULT: + kwargs["type"] = type + if mode is not DEFAULT: + kwargs["mode"] = mode + if nested is not DEFAULT: + kwargs["nested"] = nested + super().__init__(kwargs) + + +class ScriptedHeuristic(AttrDict[Any]): + """ + :arg script: (required) + """ + + script: Union["Script", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + script: Union["Script", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if script is not DEFAULT: + kwargs["script"] = script + super().__init__(kwargs) + + +class ShapeFieldQuery(AttrDict[Any]): + """ + :arg indexed_shape: Queries using a pre-indexed shape. + :arg relation: Spatial relation between the query shape and the + document shape. + :arg shape: Queries using an inline shape definition in GeoJSON or + Well Known Text (WKT) format. + """ + + indexed_shape: Union["FieldLookup", Dict[str, Any], DefaultType] + relation: Union[ + Literal["intersects", "disjoint", "within", "contains"], DefaultType + ] + shape: Any + + def __init__( + self, + *, + indexed_shape: Union["FieldLookup", Dict[str, Any], DefaultType] = DEFAULT, + relation: Union[ + Literal["intersects", "disjoint", "within", "contains"], DefaultType + ] = DEFAULT, + shape: Any = DEFAULT, + **kwargs: Any, + ): + if indexed_shape is not DEFAULT: + kwargs["indexed_shape"] = indexed_shape + if relation is not DEFAULT: + kwargs["relation"] = relation + if shape is not DEFAULT: + kwargs["shape"] = shape + super().__init__(kwargs) + + +class SortOptions(AttrDict[Any]): + """ + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg _score: + :arg _doc: + :arg _geo_distance: + :arg _script: + """ + + _field: Union[str, "InstrumentedField", "DefaultType"] + _value: Union["FieldSort", Dict[str, Any], "DefaultType"] + _score: Union["ScoreSort", Dict[str, Any], DefaultType] + _doc: Union["ScoreSort", Dict[str, Any], DefaultType] + _geo_distance: Union["GeoDistanceSort", Dict[str, Any], DefaultType] + _script: Union["ScriptSort", Dict[str, Any], DefaultType] + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["FieldSort", Dict[str, Any], "DefaultType"] = DEFAULT, + *, + _score: Union["ScoreSort", Dict[str, Any], DefaultType] = DEFAULT, + _doc: Union["ScoreSort", Dict[str, Any], DefaultType] = DEFAULT, + _geo_distance: Union["GeoDistanceSort", Dict[str, Any], DefaultType] = DEFAULT, + _script: Union["ScriptSort", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + if _score is not DEFAULT: + kwargs["_score"] = _score + if _doc is not DEFAULT: + kwargs["_doc"] = _doc + if _geo_distance is not DEFAULT: + kwargs["_geo_distance"] = _geo_distance + if _script is not DEFAULT: + kwargs["_script"] = _script + super().__init__(kwargs) + + +class SourceFilter(AttrDict[Any]): + """ + :arg excludes: + :arg includes: + """ + + excludes: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] + includes: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] + + def __init__( + self, + *, + excludes: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] = DEFAULT, + includes: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] = DEFAULT, + **kwargs: Any, + ): + if excludes is not DEFAULT: + kwargs["excludes"] = str(excludes) + if includes is not DEFAULT: + kwargs["includes"] = str(includes) + super().__init__(kwargs) + + +class SpanContainingQuery(AttrDict[Any]): + """ + :arg big: (required) Can be any span query. Matching spans from `big` + that contain matches from `little` are returned. + :arg little: (required) Can be any span query. Matching spans from + `big` that contain matches from `little` are returned. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + big: Union["SpanQuery", Dict[str, Any], DefaultType] + little: Union["SpanQuery", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + big: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + little: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if big is not DEFAULT: + kwargs["big"] = big + if little is not DEFAULT: + kwargs["little"] = little + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanFieldMaskingQuery(AttrDict[Any]): + """ + :arg field: (required) + :arg query: (required) + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + field: Union[str, InstrumentedField, DefaultType] + query: Union["SpanQuery", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + query: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if query is not DEFAULT: + kwargs["query"] = query + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanFirstQuery(AttrDict[Any]): + """ + :arg end: (required) Controls the maximum end position permitted in a + match. + :arg match: (required) Can be any other span type query. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + end: Union[int, DefaultType] + match: Union["SpanQuery", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + end: Union[int, DefaultType] = DEFAULT, + match: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if end is not DEFAULT: + kwargs["end"] = end + if match is not DEFAULT: + kwargs["match"] = match + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanMultiTermQuery(AttrDict[Any]): + """ + :arg match: (required) Should be a multi term query (one of + `wildcard`, `fuzzy`, `prefix`, `range`, or `regexp` query). + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + match: Union[Query, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + match: Union[Query, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if match is not DEFAULT: + kwargs["match"] = match + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanNearQuery(AttrDict[Any]): + """ + :arg clauses: (required) Array of one or more other span type queries. + :arg in_order: Controls whether matches are required to be in-order. + :arg slop: Controls the maximum number of intervening unmatched + positions permitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + clauses: Union[Sequence["SpanQuery"], Sequence[Dict[str, Any]], DefaultType] + in_order: Union[bool, DefaultType] + slop: Union[int, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + clauses: Union[ + Sequence["SpanQuery"], Sequence[Dict[str, Any]], DefaultType + ] = DEFAULT, + in_order: Union[bool, DefaultType] = DEFAULT, + slop: Union[int, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if clauses is not DEFAULT: + kwargs["clauses"] = clauses + if in_order is not DEFAULT: + kwargs["in_order"] = in_order + if slop is not DEFAULT: + kwargs["slop"] = slop + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanNotQuery(AttrDict[Any]): + """ + :arg exclude: (required) Span query whose matches must not overlap + those returned. + :arg include: (required) Span query whose matches are filtered. + :arg dist: The number of tokens from within the include span that + can’t have overlap with the exclude span. Equivalent to setting + both `pre` and `post`. + :arg post: The number of tokens after the include span that can’t have + overlap with the exclude span. + :arg pre: The number of tokens before the include span that can’t have + overlap with the exclude span. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + exclude: Union["SpanQuery", Dict[str, Any], DefaultType] + include: Union["SpanQuery", Dict[str, Any], DefaultType] + dist: Union[int, DefaultType] + post: Union[int, DefaultType] + pre: Union[int, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + exclude: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + include: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + dist: Union[int, DefaultType] = DEFAULT, + post: Union[int, DefaultType] = DEFAULT, + pre: Union[int, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if exclude is not DEFAULT: + kwargs["exclude"] = exclude + if include is not DEFAULT: + kwargs["include"] = include + if dist is not DEFAULT: + kwargs["dist"] = dist + if post is not DEFAULT: + kwargs["post"] = post + if pre is not DEFAULT: + kwargs["pre"] = pre + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanOrQuery(AttrDict[Any]): + """ + :arg clauses: (required) Array of one or more other span type queries. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + clauses: Union[Sequence["SpanQuery"], Sequence[Dict[str, Any]], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + clauses: Union[ + Sequence["SpanQuery"], Sequence[Dict[str, Any]], DefaultType + ] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if clauses is not DEFAULT: + kwargs["clauses"] = clauses + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanQuery(AttrDict[Any]): + """ + :arg span_containing: Accepts a list of span queries, but only returns + those spans which also match a second span query. + :arg span_field_masking: Allows queries like `span_near` or `span_or` + across different fields. + :arg span_first: Accepts another span query whose matches must appear + within the first N positions of the field. + :arg span_gap: + :arg span_multi: Wraps a `term`, `range`, `prefix`, `wildcard`, + `regexp`, or `fuzzy` query. + :arg span_near: Accepts multiple span queries whose matches must be + within the specified distance of each other, and possibly in the + same order. + :arg span_not: Wraps another span query, and excludes any documents + which match that query. + :arg span_or: Combines multiple span queries and returns documents + which match any of the specified queries. + :arg span_term: The equivalent of the `term` query but for use with + other span queries. + :arg span_within: The result from a single span query is returned as + long is its span falls within the spans returned by a list of + other span queries. + """ + + span_containing: Union["SpanContainingQuery", Dict[str, Any], DefaultType] + span_field_masking: Union["SpanFieldMaskingQuery", Dict[str, Any], DefaultType] + span_first: Union["SpanFirstQuery", Dict[str, Any], DefaultType] + span_gap: Union[Mapping[Union[str, InstrumentedField], int], DefaultType] + span_multi: Union["SpanMultiTermQuery", Dict[str, Any], DefaultType] + span_near: Union["SpanNearQuery", Dict[str, Any], DefaultType] + span_not: Union["SpanNotQuery", Dict[str, Any], DefaultType] + span_or: Union["SpanOrQuery", Dict[str, Any], DefaultType] + span_term: Union[ + Mapping[Union[str, InstrumentedField], "SpanTermQuery"], + Dict[str, Any], + DefaultType, + ] + span_within: Union["SpanWithinQuery", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + span_containing: Union[ + "SpanContainingQuery", Dict[str, Any], DefaultType + ] = DEFAULT, + span_field_masking: Union[ + "SpanFieldMaskingQuery", Dict[str, Any], DefaultType + ] = DEFAULT, + span_first: Union["SpanFirstQuery", Dict[str, Any], DefaultType] = DEFAULT, + span_gap: Union[ + Mapping[Union[str, InstrumentedField], int], DefaultType + ] = DEFAULT, + span_multi: Union["SpanMultiTermQuery", Dict[str, Any], DefaultType] = DEFAULT, + span_near: Union["SpanNearQuery", Dict[str, Any], DefaultType] = DEFAULT, + span_not: Union["SpanNotQuery", Dict[str, Any], DefaultType] = DEFAULT, + span_or: Union["SpanOrQuery", Dict[str, Any], DefaultType] = DEFAULT, + span_term: Union[ + Mapping[Union[str, InstrumentedField], "SpanTermQuery"], + Dict[str, Any], + DefaultType, + ] = DEFAULT, + span_within: Union["SpanWithinQuery", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if span_containing is not DEFAULT: + kwargs["span_containing"] = span_containing + if span_field_masking is not DEFAULT: + kwargs["span_field_masking"] = span_field_masking + if span_first is not DEFAULT: + kwargs["span_first"] = span_first + if span_gap is not DEFAULT: + kwargs["span_gap"] = str(span_gap) + if span_multi is not DEFAULT: + kwargs["span_multi"] = span_multi + if span_near is not DEFAULT: + kwargs["span_near"] = span_near + if span_not is not DEFAULT: + kwargs["span_not"] = span_not + if span_or is not DEFAULT: + kwargs["span_or"] = span_or + if span_term is not DEFAULT: + kwargs["span_term"] = str(span_term) + if span_within is not DEFAULT: + kwargs["span_within"] = span_within + super().__init__(kwargs) + + +class SpanTermQuery(AttrDict[Any]): + """ + :arg value: (required) + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + value: Union[str, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + value: Union[str, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if value is not DEFAULT: + kwargs["value"] = value + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanWithinQuery(AttrDict[Any]): + """ + :arg big: (required) Can be any span query. Matching spans from + `little` that are enclosed within `big` are returned. + :arg little: (required) Can be any span query. Matching spans from + `little` that are enclosed within `big` are returned. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + big: Union["SpanQuery", Dict[str, Any], DefaultType] + little: Union["SpanQuery", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + big: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + little: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if big is not DEFAULT: + kwargs["big"] = big + if little is not DEFAULT: + kwargs["little"] = little + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class TDigest(AttrDict[Any]): + """ + :arg compression: Limits the maximum number of nodes used by the + underlying TDigest algorithm to `20 * compression`, enabling + control of memory usage and approximation error. + """ + + compression: Union[int, DefaultType] + + def __init__( + self, *, compression: Union[int, DefaultType] = DEFAULT, **kwargs: Any + ): + if compression is not DEFAULT: + kwargs["compression"] = compression + super().__init__(kwargs) + + +class TermQuery(AttrDict[Any]): + """ + :arg value: (required) Term you wish to find in the provided field. + :arg case_insensitive: Allows ASCII case insensitive matching of the + value with the indexed field values when set to `true`. When + `false`, the case sensitivity of matching depends on the + underlying field’s mapping. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + value: Union[int, float, str, bool, None, Any, DefaultType] + case_insensitive: Union[bool, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + value: Union[int, float, str, bool, None, Any, DefaultType] = DEFAULT, + case_insensitive: Union[bool, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if value is not DEFAULT: + kwargs["value"] = value + if case_insensitive is not DEFAULT: + kwargs["case_insensitive"] = case_insensitive + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class TermsLookup(AttrDict[Any]): + """ + :arg index: (required) + :arg id: (required) + :arg path: (required) + :arg routing: + """ + + index: Union[str, DefaultType] + id: Union[str, DefaultType] + path: Union[str, InstrumentedField, DefaultType] + routing: Union[str, DefaultType] + + def __init__( + self, + *, + index: Union[str, DefaultType] = DEFAULT, + id: Union[str, DefaultType] = DEFAULT, + path: Union[str, InstrumentedField, DefaultType] = DEFAULT, + routing: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if index is not DEFAULT: + kwargs["index"] = index + if id is not DEFAULT: + kwargs["id"] = id + if path is not DEFAULT: + kwargs["path"] = str(path) + if routing is not DEFAULT: + kwargs["routing"] = routing + super().__init__(kwargs) + + +class TermsPartition(AttrDict[Any]): + """ + :arg num_partitions: (required) The number of partitions. + :arg partition: (required) The partition number for this request. + """ + + num_partitions: Union[int, DefaultType] + partition: Union[int, DefaultType] + + def __init__( + self, + *, + num_partitions: Union[int, DefaultType] = DEFAULT, + partition: Union[int, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if num_partitions is not DEFAULT: + kwargs["num_partitions"] = num_partitions + if partition is not DEFAULT: + kwargs["partition"] = partition + super().__init__(kwargs) + + +class TermsSetQuery(AttrDict[Any]): + """ + :arg terms: (required) Array of terms you wish to find in the provided + field. + :arg minimum_should_match: Specification describing number of matching + terms required to return a document. + :arg minimum_should_match_field: Numeric field containing the number + of matching terms required to return a document. + :arg minimum_should_match_script: Custom script containing the number + of matching terms required to return a document. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + terms: Union[Sequence[str], DefaultType] + minimum_should_match: Union[int, str, DefaultType] + minimum_should_match_field: Union[str, InstrumentedField, DefaultType] + minimum_should_match_script: Union["Script", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + terms: Union[Sequence[str], DefaultType] = DEFAULT, + minimum_should_match: Union[int, str, DefaultType] = DEFAULT, + minimum_should_match_field: Union[ + str, InstrumentedField, DefaultType + ] = DEFAULT, + minimum_should_match_script: Union[ + "Script", Dict[str, Any], DefaultType + ] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if terms is not DEFAULT: + kwargs["terms"] = terms + if minimum_should_match is not DEFAULT: + kwargs["minimum_should_match"] = minimum_should_match + if minimum_should_match_field is not DEFAULT: + kwargs["minimum_should_match_field"] = str(minimum_should_match_field) + if minimum_should_match_script is not DEFAULT: + kwargs["minimum_should_match_script"] = minimum_should_match_script + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class TestPopulation(AttrDict[Any]): + """ + :arg field: (required) The field to aggregate. + :arg script: + :arg filter: A filter used to define a set of records to run unpaired + t-test on. + """ + + field: Union[str, InstrumentedField, DefaultType] + script: Union["Script", Dict[str, Any], DefaultType] + filter: Union[Query, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + script: Union["Script", Dict[str, Any], DefaultType] = DEFAULT, + filter: Union[Query, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if script is not DEFAULT: + kwargs["script"] = script + if filter is not DEFAULT: + kwargs["filter"] = filter + super().__init__(kwargs) + + +class TextEmbedding(AttrDict[Any]): + """ + :arg model_id: (required) + :arg model_text: (required) + """ + + model_id: Union[str, DefaultType] + model_text: Union[str, DefaultType] + + def __init__( + self, + *, + model_id: Union[str, DefaultType] = DEFAULT, + model_text: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if model_id is not DEFAULT: + kwargs["model_id"] = model_id + if model_text is not DEFAULT: + kwargs["model_text"] = model_text + super().__init__(kwargs) + + +class TextExpansionQuery(AttrDict[Any]): + """ + :arg model_id: (required) The text expansion NLP model to use + :arg model_text: (required) The query text + :arg pruning_config: Token pruning configurations + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + model_id: Union[str, DefaultType] + model_text: Union[str, DefaultType] + pruning_config: Union["TokenPruningConfig", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + model_id: Union[str, DefaultType] = DEFAULT, + model_text: Union[str, DefaultType] = DEFAULT, + pruning_config: Union[ + "TokenPruningConfig", Dict[str, Any], DefaultType + ] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if model_id is not DEFAULT: + kwargs["model_id"] = model_id + if model_text is not DEFAULT: + kwargs["model_text"] = model_text + if pruning_config is not DEFAULT: + kwargs["pruning_config"] = pruning_config + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class TokenPruningConfig(AttrDict[Any]): + """ + :arg tokens_freq_ratio_threshold: Tokens whose frequency is more than + this threshold times the average frequency of all tokens in the + specified field are considered outliers and pruned. Defaults to + `5` if omitted. + :arg tokens_weight_threshold: Tokens whose weight is less than this + threshold are considered nonsignificant and pruned. Defaults to + `0.4` if omitted. + :arg only_score_pruned_tokens: Whether to only score pruned tokens, vs + only scoring kept tokens. + """ + + tokens_freq_ratio_threshold: Union[int, DefaultType] + tokens_weight_threshold: Union[float, DefaultType] + only_score_pruned_tokens: Union[bool, DefaultType] + + def __init__( + self, + *, + tokens_freq_ratio_threshold: Union[int, DefaultType] = DEFAULT, + tokens_weight_threshold: Union[float, DefaultType] = DEFAULT, + only_score_pruned_tokens: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if tokens_freq_ratio_threshold is not DEFAULT: + kwargs["tokens_freq_ratio_threshold"] = tokens_freq_ratio_threshold + if tokens_weight_threshold is not DEFAULT: + kwargs["tokens_weight_threshold"] = tokens_weight_threshold + if only_score_pruned_tokens is not DEFAULT: + kwargs["only_score_pruned_tokens"] = only_score_pruned_tokens + super().__init__(kwargs) + + +class TopLeftBottomRightGeoBounds(AttrDict[Any]): + """ + :arg top_left: (required) + :arg bottom_right: (required) + """ + + top_left: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] + bottom_right: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] + + def __init__( + self, + *, + top_left: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] = DEFAULT, + bottom_right: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] = DEFAULT, + **kwargs: Any, + ): + if top_left is not DEFAULT: + kwargs["top_left"] = top_left + if bottom_right is not DEFAULT: + kwargs["bottom_right"] = bottom_right + super().__init__(kwargs) + + +class TopMetricsValue(AttrDict[Any]): + """ + :arg field: (required) A field to return as a metric. + """ + + field: Union[str, InstrumentedField, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + super().__init__(kwargs) + + +class TopRightBottomLeftGeoBounds(AttrDict[Any]): + """ + :arg top_right: (required) + :arg bottom_left: (required) + """ + + top_right: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] + bottom_left: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] + + def __init__( + self, + *, + top_right: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] = DEFAULT, + bottom_left: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] = DEFAULT, + **kwargs: Any, + ): + if top_right is not DEFAULT: + kwargs["top_right"] = top_right + if bottom_left is not DEFAULT: + kwargs["bottom_left"] = bottom_left + super().__init__(kwargs) + + +class WeightedAverageValue(AttrDict[Any]): + """ + :arg field: The field from which to extract the values or weights. + :arg missing: A value or weight to use if the field is missing. + :arg script: + """ + + field: Union[str, InstrumentedField, DefaultType] + missing: Union[float, DefaultType] + script: Union["Script", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + missing: Union[float, DefaultType] = DEFAULT, + script: Union["Script", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if missing is not DEFAULT: + kwargs["missing"] = missing + if script is not DEFAULT: + kwargs["script"] = script + super().__init__(kwargs) + + +class WeightedTokensQuery(AttrDict[Any]): + """ + :arg tokens: (required) The tokens representing this query + :arg pruning_config: Token pruning configurations + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + tokens: Union[Mapping[str, float], DefaultType] + pruning_config: Union["TokenPruningConfig", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + tokens: Union[Mapping[str, float], DefaultType] = DEFAULT, + pruning_config: Union[ + "TokenPruningConfig", Dict[str, Any], DefaultType + ] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if tokens is not DEFAULT: + kwargs["tokens"] = tokens + if pruning_config is not DEFAULT: + kwargs["pruning_config"] = pruning_config + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class WildcardQuery(AttrDict[Any]): + """ + :arg case_insensitive: Allows case insensitive matching of the pattern + with the indexed field values when set to true. Default is false + which means the case sensitivity of matching depends on the + underlying field’s mapping. + :arg rewrite: Method used to rewrite the query. + :arg value: Wildcard pattern for terms you wish to find in the + provided field. Required, when wildcard is not set. + :arg wildcard: Wildcard pattern for terms you wish to find in the + provided field. Required, when value is not set. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + case_insensitive: Union[bool, DefaultType] + rewrite: Union[str, DefaultType] + value: Union[str, DefaultType] + wildcard: Union[str, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + case_insensitive: Union[bool, DefaultType] = DEFAULT, + rewrite: Union[str, DefaultType] = DEFAULT, + value: Union[str, DefaultType] = DEFAULT, + wildcard: Union[str, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if case_insensitive is not DEFAULT: + kwargs["case_insensitive"] = case_insensitive + if rewrite is not DEFAULT: + kwargs["rewrite"] = rewrite + if value is not DEFAULT: + kwargs["value"] = value + if wildcard is not DEFAULT: + kwargs["wildcard"] = wildcard + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class WktGeoBounds(AttrDict[Any]): + """ + :arg wkt: (required) + """ + + wkt: Union[str, DefaultType] + + def __init__(self, *, wkt: Union[str, DefaultType] = DEFAULT, **kwargs: Any): + if wkt is not DEFAULT: + kwargs["wkt"] = wkt + super().__init__(kwargs) + + +class AdjacencyMatrixAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["AdjacencyMatrixBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "AdjacencyMatrixBucket"]: + return self.buckets # type: ignore + + +class AdjacencyMatrixBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: str + doc_count: int + + +class AggregationBreakdown(AttrDict[Any]): + """ + :arg build_aggregation: (required) + :arg build_aggregation_count: (required) + :arg build_leaf_collector: (required) + :arg build_leaf_collector_count: (required) + :arg collect: (required) + :arg collect_count: (required) + :arg initialize: (required) + :arg initialize_count: (required) + :arg reduce: (required) + :arg reduce_count: (required) + :arg post_collection: + :arg post_collection_count: + """ + + build_aggregation: int + build_aggregation_count: int + build_leaf_collector: int + build_leaf_collector_count: int + collect: int + collect_count: int + initialize: int + initialize_count: int + reduce: int + reduce_count: int + post_collection: int + post_collection_count: int + + +class AggregationProfile(AttrDict[Any]): + """ + :arg breakdown: (required) + :arg description: (required) + :arg time_in_nanos: (required) + :arg type: (required) + :arg debug: + :arg children: + """ + + breakdown: "AggregationBreakdown" + description: str + time_in_nanos: Any + type: str + debug: "AggregationProfileDebug" + children: Sequence["AggregationProfile"] + + +class AggregationProfileDebug(AttrDict[Any]): + """ + :arg segments_with_multi_valued_ords: + :arg collection_strategy: + :arg segments_with_single_valued_ords: + :arg total_buckets: + :arg built_buckets: + :arg result_strategy: + :arg has_filter: + :arg delegate: + :arg delegate_debug: + :arg chars_fetched: + :arg extract_count: + :arg extract_ns: + :arg values_fetched: + :arg collect_analyzed_ns: + :arg collect_analyzed_count: + :arg surviving_buckets: + :arg ordinals_collectors_used: + :arg ordinals_collectors_overhead_too_high: + :arg string_hashing_collectors_used: + :arg numeric_collectors_used: + :arg empty_collectors_used: + :arg deferred_aggregators: + :arg segments_with_doc_count_field: + :arg segments_with_deleted_docs: + :arg filters: + :arg segments_counted: + :arg segments_collected: + :arg map_reducer: + :arg brute_force_used: + :arg dynamic_pruning_attempted: + :arg dynamic_pruning_used: + :arg skipped_due_to_no_data: + """ + + segments_with_multi_valued_ords: int + collection_strategy: str + segments_with_single_valued_ords: int + total_buckets: int + built_buckets: int + result_strategy: str + has_filter: bool + delegate: str + delegate_debug: "AggregationProfileDebug" + chars_fetched: int + extract_count: int + extract_ns: int + values_fetched: int + collect_analyzed_ns: int + collect_analyzed_count: int + surviving_buckets: int + ordinals_collectors_used: int + ordinals_collectors_overhead_too_high: int + string_hashing_collectors_used: int + numeric_collectors_used: int + empty_collectors_used: int + deferred_aggregators: Sequence[str] + segments_with_doc_count_field: int + segments_with_deleted_docs: int + filters: Sequence["AggregationProfileDelegateDebugFilter"] + segments_counted: int + segments_collected: int + map_reducer: str + brute_force_used: int + dynamic_pruning_attempted: int + dynamic_pruning_used: int + skipped_due_to_no_data: int + + +class AggregationProfileDelegateDebugFilter(AttrDict[Any]): + """ + :arg results_from_metadata: + :arg query: + :arg specialized_for: + :arg segments_counted_in_constant_time: + """ + + results_from_metadata: int + query: str + specialized_for: str + segments_counted_in_constant_time: int + + +class ArrayPercentilesItem(AttrDict[Any]): + """ + :arg key: (required) + :arg value: (required) + :arg value_as_string: + """ + + key: str + value: Union[float, None] + value_as_string: str + + +class AutoDateHistogramAggregate(AttrDict[Any]): + """ + :arg interval: (required) + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + interval: str + buckets: Sequence["DateHistogramBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "DateHistogramBucket"]: + return self.buckets # type: ignore + + +class AvgAggregate(AttrDict[Any]): + """ + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class BoxPlotAggregate(AttrDict[Any]): + """ + :arg min: (required) + :arg max: (required) + :arg q1: (required) + :arg q2: (required) + :arg q3: (required) + :arg lower: (required) + :arg upper: (required) + :arg min_as_string: + :arg max_as_string: + :arg q1_as_string: + :arg q2_as_string: + :arg q3_as_string: + :arg lower_as_string: + :arg upper_as_string: + :arg meta: + """ + + min: float + max: float + q1: float + q2: float + q3: float + lower: float + upper: float + min_as_string: str + max_as_string: str + q1_as_string: str + q2_as_string: str + q3_as_string: str + lower_as_string: str + upper_as_string: str + meta: Mapping[str, Any] + + +class BucketMetricValueAggregate(AttrDict[Any]): + """ + :arg keys: (required) + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + keys: Sequence[str] # type: ignore[assignment] + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class BulkIndexByScrollFailure(AttrDict[Any]): + """ + :arg cause: (required) + :arg id: (required) + :arg index: (required) + :arg status: (required) + :arg type: (required) + """ + + cause: "ErrorCause" + id: str + index: str + status: int + type: str + + +class CardinalityAggregate(AttrDict[Any]): + """ + :arg value: (required) + :arg meta: + """ + + value: int + meta: Mapping[str, Any] + + +class ChildrenAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class ClusterDetails(AttrDict[Any]): + """ + :arg status: (required) + :arg indices: (required) + :arg timed_out: (required) + :arg took: + :arg _shards: + :arg failures: + """ + + status: Literal["running", "successful", "partial", "skipped", "failed"] + indices: str + timed_out: bool + took: Any + _shards: "ShardStatistics" + failures: Sequence["ShardFailure"] + + +class ClusterStatistics(AttrDict[Any]): + """ + :arg skipped: (required) + :arg successful: (required) + :arg total: (required) + :arg running: (required) + :arg partial: (required) + :arg failed: (required) + :arg details: + """ + + skipped: int + successful: int + total: int + running: int + partial: int + failed: int + details: Mapping[str, "ClusterDetails"] + + +class Collector(AttrDict[Any]): + """ + :arg name: (required) + :arg reason: (required) + :arg time_in_nanos: (required) + :arg children: + """ + + name: str + reason: str + time_in_nanos: Any + children: Sequence["Collector"] + + +class CompletionSuggest(AttrDict[Any]): + """ + :arg options: (required) + :arg length: (required) + :arg offset: (required) + :arg text: (required) + """ + + options: Sequence["CompletionSuggestOption"] + length: int + offset: int + text: str + + +class CompletionSuggestOption(AttrDict[Any]): + """ + :arg text: (required) + :arg collate_match: + :arg contexts: + :arg fields: + :arg _id: + :arg _index: + :arg _routing: + :arg _score: + :arg _source: + :arg score: + """ + + text: str + collate_match: bool + contexts: Mapping[ + str, + Sequence[ + Union[ + str, Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str] + ] + ], + ] + fields: Mapping[str, Any] + _id: str + _index: str + _routing: str + _score: float + _source: Any + score: float + + +class CompositeAggregate(AttrDict[Any]): + """ + :arg after_key: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + after_key: Mapping[str, Union[int, float, str, bool, None, Any]] + buckets: Sequence["CompositeBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "CompositeBucket"]: + return self.buckets # type: ignore + + +class CompositeBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: Mapping[str, Union[int, float, str, bool, None, Any]] + doc_count: int + + +class CumulativeCardinalityAggregate(AttrDict[Any]): + """ + Result of the `cumulative_cardinality` aggregation + + :arg value: (required) + :arg value_as_string: + :arg meta: + """ + + value: int + value_as_string: str + meta: Mapping[str, Any] + + +class DateHistogramAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["DateHistogramBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "DateHistogramBucket"]: + return self.buckets # type: ignore + + +class DateHistogramBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg key_as_string: + """ + + key: Any + doc_count: int + key_as_string: str + + +class DateRangeAggregate(AttrDict[Any]): + """ + Result of a `date_range` aggregation. Same format as a for a `range` + aggregation: `from` and `to` in `buckets` are milliseconds since the + Epoch, represented as a floating point number. + + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["RangeBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "RangeBucket"]: + return self.buckets # type: ignore + + +class DerivativeAggregate(AttrDict[Any]): + """ + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg normalized_value: + :arg normalized_value_as_string: + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + normalized_value: float + normalized_value_as_string: str + value_as_string: str + meta: Mapping[str, Any] + + +class DfsKnnProfile(AttrDict[Any]): + """ + :arg query: (required) + :arg rewrite_time: (required) + :arg collector: (required) + :arg vector_operations_count: + """ + + query: Sequence["KnnQueryProfileResult"] + rewrite_time: int + collector: Sequence["KnnCollectorResult"] + vector_operations_count: int + + +class DfsProfile(AttrDict[Any]): + """ + :arg statistics: + :arg knn: + """ + + statistics: "DfsStatisticsProfile" + knn: Sequence["DfsKnnProfile"] + + +class DfsStatisticsBreakdown(AttrDict[Any]): + """ + :arg collection_statistics: (required) + :arg collection_statistics_count: (required) + :arg create_weight: (required) + :arg create_weight_count: (required) + :arg rewrite: (required) + :arg rewrite_count: (required) + :arg term_statistics: (required) + :arg term_statistics_count: (required) + """ + + collection_statistics: int + collection_statistics_count: int + create_weight: int + create_weight_count: int + rewrite: int + rewrite_count: int + term_statistics: int + term_statistics_count: int + + +class DfsStatisticsProfile(AttrDict[Any]): + """ + :arg type: (required) + :arg description: (required) + :arg time_in_nanos: (required) + :arg breakdown: (required) + :arg time: + :arg debug: + :arg children: + """ + + type: str + description: str + time_in_nanos: Any + breakdown: "DfsStatisticsBreakdown" + time: Any + debug: Mapping[str, Any] + children: Sequence["DfsStatisticsProfile"] + + +class DoubleTermsAggregate(AttrDict[Any]): + """ + Result of a `terms` aggregation when the field is some kind of decimal + number like a float, double, or distance. + + :arg doc_count_error_upper_bound: + :arg sum_other_doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + doc_count_error_upper_bound: int + sum_other_doc_count: int + buckets: Sequence["DoubleTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "DoubleTermsBucket"]: + return self.buckets # type: ignore + + +class DoubleTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg key_as_string: + :arg doc_count_error_upper_bound: + """ + + key: float + doc_count: int + key_as_string: str + doc_count_error_upper_bound: int + + +class ErrorCause(AttrDict[Any]): + """ + Cause and details about a request failure. This class defines the + properties common to all error types. Additional details are also + provided, that depend on the error type. + + :arg type: (required) The type of error + :arg reason: A human-readable explanation of the error, in english + :arg stack_trace: The server stack trace. Present only if the + `error_trace=true` parameter was sent with the request. + :arg caused_by: + :arg root_cause: + :arg suppressed: + """ + + type: str + reason: str + stack_trace: str + caused_by: "ErrorCause" + root_cause: Sequence["ErrorCause"] + suppressed: Sequence["ErrorCause"] + + +class Explanation(AttrDict[Any]): + """ + :arg description: (required) + :arg details: (required) + :arg value: (required) + """ + + description: str + details: Sequence["ExplanationDetail"] + value: float + + +class ExplanationDetail(AttrDict[Any]): + """ + :arg description: (required) + :arg value: (required) + :arg details: + """ + + description: str + value: float + details: Sequence["ExplanationDetail"] + + +class ExtendedStatsAggregate(AttrDict[Any]): + """ + :arg sum_of_squares: (required) + :arg variance: (required) + :arg variance_population: (required) + :arg variance_sampling: (required) + :arg std_deviation: (required) + :arg std_deviation_population: (required) + :arg std_deviation_sampling: (required) + :arg count: (required) + :arg min: (required) + :arg max: (required) + :arg avg: (required) + :arg sum: (required) + :arg std_deviation_bounds: + :arg sum_of_squares_as_string: + :arg variance_as_string: + :arg variance_population_as_string: + :arg variance_sampling_as_string: + :arg std_deviation_as_string: + :arg std_deviation_bounds_as_string: + :arg min_as_string: + :arg max_as_string: + :arg avg_as_string: + :arg sum_as_string: + :arg meta: + """ + + sum_of_squares: Union[float, None] + variance: Union[float, None] + variance_population: Union[float, None] + variance_sampling: Union[float, None] + std_deviation: Union[float, None] + std_deviation_population: Union[float, None] + std_deviation_sampling: Union[float, None] + count: int + min: Union[float, None] + max: Union[float, None] + avg: Union[float, None] + sum: float + std_deviation_bounds: "StandardDeviationBounds" + sum_of_squares_as_string: str + variance_as_string: str + variance_population_as_string: str + variance_sampling_as_string: str + std_deviation_as_string: str + std_deviation_bounds_as_string: "StandardDeviationBoundsAsString" + min_as_string: str + max_as_string: str + avg_as_string: str + sum_as_string: str + meta: Mapping[str, Any] + + +class ExtendedStatsBucketAggregate(AttrDict[Any]): + """ + :arg sum_of_squares: (required) + :arg variance: (required) + :arg variance_population: (required) + :arg variance_sampling: (required) + :arg std_deviation: (required) + :arg std_deviation_population: (required) + :arg std_deviation_sampling: (required) + :arg count: (required) + :arg min: (required) + :arg max: (required) + :arg avg: (required) + :arg sum: (required) + :arg std_deviation_bounds: + :arg sum_of_squares_as_string: + :arg variance_as_string: + :arg variance_population_as_string: + :arg variance_sampling_as_string: + :arg std_deviation_as_string: + :arg std_deviation_bounds_as_string: + :arg min_as_string: + :arg max_as_string: + :arg avg_as_string: + :arg sum_as_string: + :arg meta: + """ + + sum_of_squares: Union[float, None] + variance: Union[float, None] + variance_population: Union[float, None] + variance_sampling: Union[float, None] + std_deviation: Union[float, None] + std_deviation_population: Union[float, None] + std_deviation_sampling: Union[float, None] + count: int + min: Union[float, None] + max: Union[float, None] + avg: Union[float, None] + sum: float + std_deviation_bounds: "StandardDeviationBounds" + sum_of_squares_as_string: str + variance_as_string: str + variance_population_as_string: str + variance_sampling_as_string: str + std_deviation_as_string: str + std_deviation_bounds_as_string: "StandardDeviationBoundsAsString" + min_as_string: str + max_as_string: str + avg_as_string: str + sum_as_string: str + meta: Mapping[str, Any] + + +class FetchProfile(AttrDict[Any]): + """ + :arg type: (required) + :arg description: (required) + :arg time_in_nanos: (required) + :arg breakdown: (required) + :arg debug: + :arg children: + """ + + type: str + description: str + time_in_nanos: Any + breakdown: "FetchProfileBreakdown" + debug: "FetchProfileDebug" + children: Sequence["FetchProfile"] + + +class FetchProfileBreakdown(AttrDict[Any]): + """ + :arg load_source: + :arg load_source_count: + :arg load_stored_fields: + :arg load_stored_fields_count: + :arg next_reader: + :arg next_reader_count: + :arg process_count: + :arg process: + """ + + load_source: int + load_source_count: int + load_stored_fields: int + load_stored_fields_count: int + next_reader: int + next_reader_count: int + process_count: int + process: int + + +class FetchProfileDebug(AttrDict[Any]): + """ + :arg stored_fields: + :arg fast_path: + """ + + stored_fields: Sequence[str] + fast_path: int + + +class FilterAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class FiltersAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["FiltersBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "FiltersBucket"]: + return self.buckets # type: ignore + + +class FiltersBucket(AttrDict[Any]): + """ + :arg doc_count: (required) + """ + + doc_count: int + + +class FrequentItemSetsAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["FrequentItemSetsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "FrequentItemSetsBucket"]: + return self.buckets # type: ignore + + +class FrequentItemSetsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg support: (required) + :arg doc_count: (required) + """ + + key: Mapping[str, Sequence[str]] + support: float + doc_count: int + + +class GeoBoundsAggregate(AttrDict[Any]): + """ + :arg bounds: + :arg meta: + """ + + bounds: Union[ + "CoordsGeoBounds", + "TopLeftBottomRightGeoBounds", + "TopRightBottomLeftGeoBounds", + "WktGeoBounds", + ] + meta: Mapping[str, Any] + + +class GeoCentroidAggregate(AttrDict[Any]): + """ + :arg count: (required) + :arg location: + :arg meta: + """ + + count: int + location: Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str] + meta: Mapping[str, Any] + + +class GeoDistanceAggregate(AttrDict[Any]): + """ + Result of a `geo_distance` aggregation. The unit for `from` and `to` + is meters by default. + + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["RangeBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "RangeBucket"]: + return self.buckets # type: ignore + + +class GeoHashGridAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["GeoHashGridBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "GeoHashGridBucket"]: + return self.buckets # type: ignore + + +class GeoHashGridBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: str + doc_count: int + + +class GeoHexGridAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["GeoHexGridBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "GeoHexGridBucket"]: + return self.buckets # type: ignore + + +class GeoHexGridBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: str + doc_count: int + + +class GeoLine(AttrDict[Any]): + """ + A GeoJson GeoLine. + + :arg type: (required) Always `"LineString"` + :arg coordinates: (required) Array of `[lon, lat]` coordinates + """ + + type: str + coordinates: Sequence[Sequence[float]] + + +class GeoLineAggregate(AttrDict[Any]): + """ + :arg type: (required) + :arg geometry: (required) + :arg properties: (required) + :arg meta: + """ + + type: str + geometry: "GeoLine" + properties: Any + meta: Mapping[str, Any] + + +class GeoTileGridAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["GeoTileGridBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "GeoTileGridBucket"]: + return self.buckets # type: ignore + + +class GeoTileGridBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: str + doc_count: int + + +class GlobalAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class HdrPercentileRanksAggregate(AttrDict[Any]): + """ + :arg values: (required) + :arg meta: + """ + + values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + meta: Mapping[str, Any] + + +class HdrPercentilesAggregate(AttrDict[Any]): + """ + :arg values: (required) + :arg meta: + """ + + values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + meta: Mapping[str, Any] + + +class HistogramAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["HistogramBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "HistogramBucket"]: + return self.buckets # type: ignore + + +class HistogramBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg key_as_string: + """ + + key: float + doc_count: int + key_as_string: str + + +class Hit(AttrDict[Any]): + """ + :arg index: (required) + :arg id: + :arg score: + :arg explanation: + :arg fields: + :arg highlight: + :arg inner_hits: + :arg matched_queries: + :arg nested: + :arg ignored: + :arg ignored_field_values: + :arg shard: + :arg node: + :arg routing: + :arg source: + :arg rank: + :arg seq_no: + :arg primary_term: + :arg version: + :arg sort: + """ + + index: str + id: str + score: Union[float, None] + explanation: "Explanation" + fields: Mapping[str, Any] + highlight: Mapping[str, Sequence[str]] + inner_hits: Mapping[str, "InnerHitsResult"] + matched_queries: Union[Sequence[str], Mapping[str, float]] + nested: "NestedIdentity" + ignored: Sequence[str] + ignored_field_values: Mapping[ + str, Sequence[Union[int, float, str, bool, None, Any]] + ] + shard: str + node: str + routing: str + source: Any + rank: int + seq_no: int + primary_term: int + version: int + sort: Sequence[Union[int, float, str, bool, None, Any]] + + +class HitsMetadata(AttrDict[Any]): + """ + :arg hits: (required) + :arg total: Total hit count information, present only if + `track_total_hits` wasn't `false` in the search request. + :arg max_score: + """ + + hits: Sequence["Hit"] + total: Union["TotalHits", int] + max_score: Union[float, None] + + +class InferenceAggregate(AttrDict[Any]): + """ + :arg value: + :arg feature_importance: + :arg top_classes: + :arg warning: + :arg meta: + """ + + value: Union[int, float, str, bool, None, Any] + feature_importance: Sequence["InferenceFeatureImportance"] + top_classes: Sequence["InferenceTopClassEntry"] + warning: str + meta: Mapping[str, Any] + + +class InferenceClassImportance(AttrDict[Any]): + """ + :arg class_name: (required) + :arg importance: (required) + """ + + class_name: str + importance: float + + +class InferenceFeatureImportance(AttrDict[Any]): + """ + :arg feature_name: (required) + :arg importance: + :arg classes: + """ + + feature_name: str + importance: float + classes: Sequence["InferenceClassImportance"] + + +class InferenceTopClassEntry(AttrDict[Any]): + """ + :arg class_name: (required) + :arg class_probability: (required) + :arg class_score: (required) + """ + + class_name: Union[int, float, str, bool, None, Any] + class_probability: float + class_score: float + + +class InnerHitsResult(AttrDict[Any]): + """ + :arg hits: (required) + """ + + hits: "HitsMetadata" + + +class IpPrefixAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["IpPrefixBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "IpPrefixBucket"]: + return self.buckets # type: ignore + + +class IpPrefixBucket(AttrDict[Any]): + """ + :arg is_ipv6: (required) + :arg key: (required) + :arg prefix_length: (required) + :arg doc_count: (required) + :arg netmask: + """ + + is_ipv6: bool + key: str + prefix_length: int + doc_count: int + netmask: str + + +class IpRangeAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["IpRangeBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "IpRangeBucket"]: + return self.buckets # type: ignore + + +class IpRangeBucket(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg key: + :arg from: + :arg to: + """ + + doc_count: int + key: str + from_: str + to: str + + +class KnnCollectorResult(AttrDict[Any]): + """ + :arg name: (required) + :arg reason: (required) + :arg time_in_nanos: (required) + :arg time: + :arg children: + """ + + name: str + reason: str + time_in_nanos: Any + time: Any + children: Sequence["KnnCollectorResult"] + + +class KnnQueryProfileBreakdown(AttrDict[Any]): + """ + :arg advance: (required) + :arg advance_count: (required) + :arg build_scorer: (required) + :arg build_scorer_count: (required) + :arg compute_max_score: (required) + :arg compute_max_score_count: (required) + :arg count_weight: (required) + :arg count_weight_count: (required) + :arg create_weight: (required) + :arg create_weight_count: (required) + :arg match: (required) + :arg match_count: (required) + :arg next_doc: (required) + :arg next_doc_count: (required) + :arg score: (required) + :arg score_count: (required) + :arg set_min_competitive_score: (required) + :arg set_min_competitive_score_count: (required) + :arg shallow_advance: (required) + :arg shallow_advance_count: (required) + """ + + advance: int + advance_count: int + build_scorer: int + build_scorer_count: int + compute_max_score: int + compute_max_score_count: int + count_weight: int + count_weight_count: int + create_weight: int + create_weight_count: int + match: int + match_count: int + next_doc: int + next_doc_count: int + score: int + score_count: int + set_min_competitive_score: int + set_min_competitive_score_count: int + shallow_advance: int + shallow_advance_count: int + + +class KnnQueryProfileResult(AttrDict[Any]): + """ + :arg type: (required) + :arg description: (required) + :arg time_in_nanos: (required) + :arg breakdown: (required) + :arg time: + :arg debug: + :arg children: + """ + + type: str + description: str + time_in_nanos: Any + breakdown: "KnnQueryProfileBreakdown" + time: Any + debug: Mapping[str, Any] + children: Sequence["KnnQueryProfileResult"] + + +class LongRareTermsAggregate(AttrDict[Any]): + """ + Result of the `rare_terms` aggregation when the field is some kind of + whole number like a integer, long, or a date. + + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["LongRareTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "LongRareTermsBucket"]: + return self.buckets # type: ignore + + +class LongRareTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg key_as_string: + """ + + key: int + doc_count: int + key_as_string: str + + +class LongTermsAggregate(AttrDict[Any]): + """ + Result of a `terms` aggregation when the field is some kind of whole + number like a integer, long, or a date. + + :arg doc_count_error_upper_bound: + :arg sum_other_doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + doc_count_error_upper_bound: int + sum_other_doc_count: int + buckets: Sequence["LongTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "LongTermsBucket"]: + return self.buckets # type: ignore + + +class LongTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg key_as_string: + :arg doc_count_error_upper_bound: + """ + + key: int + doc_count: int + key_as_string: str + doc_count_error_upper_bound: int + + +class MatrixStatsAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg fields: + :arg meta: + """ + + doc_count: int + fields: Sequence["MatrixStatsFields"] + meta: Mapping[str, Any] + + +class MatrixStatsFields(AttrDict[Any]): + """ + :arg name: (required) + :arg count: (required) + :arg mean: (required) + :arg variance: (required) + :arg skewness: (required) + :arg kurtosis: (required) + :arg covariance: (required) + :arg correlation: (required) + """ + + name: str + count: int + mean: float + variance: float + skewness: float + kurtosis: float + covariance: Mapping[str, float] + correlation: Mapping[str, float] + + +class MaxAggregate(AttrDict[Any]): + """ + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class MedianAbsoluteDeviationAggregate(AttrDict[Any]): + """ + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class MinAggregate(AttrDict[Any]): + """ + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class MissingAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class MultiTermsAggregate(AttrDict[Any]): + """ + :arg doc_count_error_upper_bound: + :arg sum_other_doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + doc_count_error_upper_bound: int + sum_other_doc_count: int + buckets: Sequence["MultiTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "MultiTermsBucket"]: + return self.buckets # type: ignore + + +class MultiTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg key_as_string: + :arg doc_count_error_upper_bound: + """ + + key: Sequence[Union[int, float, str, bool, None, Any]] + doc_count: int + key_as_string: str + doc_count_error_upper_bound: int + + +class NestedAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class NestedIdentity(AttrDict[Any]): + """ + :arg field: (required) + :arg offset: (required) + :arg _nested: + """ + + field: str + offset: int + _nested: "NestedIdentity" + + +class ParentAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class PercentilesBucketAggregate(AttrDict[Any]): + """ + :arg values: (required) + :arg meta: + """ + + values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + meta: Mapping[str, Any] + + +class PhraseSuggest(AttrDict[Any]): + """ + :arg options: (required) + :arg length: (required) + :arg offset: (required) + :arg text: (required) + """ + + options: Sequence["PhraseSuggestOption"] + length: int + offset: int + text: str + + +class PhraseSuggestOption(AttrDict[Any]): + """ + :arg text: (required) + :arg score: (required) + :arg highlighted: + :arg collate_match: + """ + + text: str + score: float + highlighted: str + collate_match: bool + + +class Profile(AttrDict[Any]): + """ + :arg shards: (required) + """ + + shards: Sequence["ShardProfile"] + + +class QueryBreakdown(AttrDict[Any]): + """ + :arg advance: (required) + :arg advance_count: (required) + :arg build_scorer: (required) + :arg build_scorer_count: (required) + :arg create_weight: (required) + :arg create_weight_count: (required) + :arg match: (required) + :arg match_count: (required) + :arg shallow_advance: (required) + :arg shallow_advance_count: (required) + :arg next_doc: (required) + :arg next_doc_count: (required) + :arg score: (required) + :arg score_count: (required) + :arg compute_max_score: (required) + :arg compute_max_score_count: (required) + :arg count_weight: (required) + :arg count_weight_count: (required) + :arg set_min_competitive_score: (required) + :arg set_min_competitive_score_count: (required) + """ + + advance: int + advance_count: int + build_scorer: int + build_scorer_count: int + create_weight: int + create_weight_count: int + match: int + match_count: int + shallow_advance: int + shallow_advance_count: int + next_doc: int + next_doc_count: int + score: int + score_count: int + compute_max_score: int + compute_max_score_count: int + count_weight: int + count_weight_count: int + set_min_competitive_score: int + set_min_competitive_score_count: int + + +class QueryProfile(AttrDict[Any]): + """ + :arg breakdown: (required) + :arg description: (required) + :arg time_in_nanos: (required) + :arg type: (required) + :arg children: + """ + + breakdown: "QueryBreakdown" + description: str + time_in_nanos: Any + type: str + children: Sequence["QueryProfile"] + + +class RangeAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["RangeBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "RangeBucket"]: + return self.buckets # type: ignore + + +class RangeBucket(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg from: + :arg to: + :arg from_as_string: + :arg to_as_string: + :arg key: The bucket key. Present if the aggregation is _not_ keyed + """ + + doc_count: int + from_: float + to: float + from_as_string: str + to_as_string: str + key: str + + +class RateAggregate(AttrDict[Any]): + """ + :arg value: (required) + :arg value_as_string: + :arg meta: + """ + + value: float + value_as_string: str + meta: Mapping[str, Any] + + +class Retries(AttrDict[Any]): + """ + :arg bulk: (required) + :arg search: (required) + """ + + bulk: int + search: int + + +class ReverseNestedAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class SamplerAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class ScriptedMetricAggregate(AttrDict[Any]): + """ + :arg value: (required) + :arg meta: + """ + + value: Any + meta: Mapping[str, Any] + + +class SearchProfile(AttrDict[Any]): + """ + :arg collector: (required) + :arg query: (required) + :arg rewrite_time: (required) + """ + + collector: Sequence["Collector"] + query: Sequence["QueryProfile"] + rewrite_time: int + + +class ShardFailure(AttrDict[Any]): + """ + :arg reason: (required) + :arg shard: (required) + :arg index: + :arg node: + :arg status: + """ + + reason: "ErrorCause" + shard: int + index: str + node: str + status: str + + +class ShardProfile(AttrDict[Any]): + """ + :arg aggregations: (required) + :arg cluster: (required) + :arg id: (required) + :arg index: (required) + :arg node_id: (required) + :arg searches: (required) + :arg shard_id: (required) + :arg dfs: + :arg fetch: + """ + + aggregations: Sequence["AggregationProfile"] + cluster: str + id: str + index: str + node_id: str + searches: Sequence["SearchProfile"] + shard_id: int + dfs: "DfsProfile" + fetch: "FetchProfile" + + +class ShardStatistics(AttrDict[Any]): + """ + :arg failed: (required) + :arg successful: (required) Indicates how many shards have + successfully run the search. + :arg total: (required) Indicates how many shards the search will run + on overall. + :arg failures: + :arg skipped: + """ + + failed: int + successful: int + total: int + failures: Sequence["ShardFailure"] + skipped: int + + +class SignificantLongTermsAggregate(AttrDict[Any]): + """ + :arg bg_count: + :arg doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + bg_count: int + doc_count: int + buckets: Sequence["SignificantLongTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "SignificantLongTermsBucket"]: + return self.buckets # type: ignore + + +class SignificantLongTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg score: (required) + :arg bg_count: (required) + :arg doc_count: (required) + :arg key_as_string: + """ + + key: int + score: float + bg_count: int + doc_count: int + key_as_string: str + + +class SignificantStringTermsAggregate(AttrDict[Any]): + """ + :arg bg_count: + :arg doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + bg_count: int + doc_count: int + buckets: Sequence["SignificantStringTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "SignificantStringTermsBucket"]: + return self.buckets # type: ignore + + +class SignificantStringTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg score: (required) + :arg bg_count: (required) + :arg doc_count: (required) + """ + + key: str + score: float + bg_count: int + doc_count: int + + +class SimpleValueAggregate(AttrDict[Any]): + """ + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class StandardDeviationBounds(AttrDict[Any]): + """ + :arg upper: (required) + :arg lower: (required) + :arg upper_population: (required) + :arg lower_population: (required) + :arg upper_sampling: (required) + :arg lower_sampling: (required) + """ + + upper: Union[float, None] + lower: Union[float, None] + upper_population: Union[float, None] + lower_population: Union[float, None] + upper_sampling: Union[float, None] + lower_sampling: Union[float, None] + + +class StandardDeviationBoundsAsString(AttrDict[Any]): + """ + :arg upper: (required) + :arg lower: (required) + :arg upper_population: (required) + :arg lower_population: (required) + :arg upper_sampling: (required) + :arg lower_sampling: (required) + """ + + upper: str + lower: str + upper_population: str + lower_population: str + upper_sampling: str + lower_sampling: str + + +class StatsAggregate(AttrDict[Any]): + """ + Statistics aggregation result. `min`, `max` and `avg` are missing if + there were no values to process (`count` is zero). + + :arg count: (required) + :arg min: (required) + :arg max: (required) + :arg avg: (required) + :arg sum: (required) + :arg min_as_string: + :arg max_as_string: + :arg avg_as_string: + :arg sum_as_string: + :arg meta: + """ + + count: int + min: Union[float, None] + max: Union[float, None] + avg: Union[float, None] + sum: float + min_as_string: str + max_as_string: str + avg_as_string: str + sum_as_string: str + meta: Mapping[str, Any] + + +class StatsBucketAggregate(AttrDict[Any]): + """ + :arg count: (required) + :arg min: (required) + :arg max: (required) + :arg avg: (required) + :arg sum: (required) + :arg min_as_string: + :arg max_as_string: + :arg avg_as_string: + :arg sum_as_string: + :arg meta: + """ + + count: int + min: Union[float, None] + max: Union[float, None] + avg: Union[float, None] + sum: float + min_as_string: str + max_as_string: str + avg_as_string: str + sum_as_string: str + meta: Mapping[str, Any] + + +class StringRareTermsAggregate(AttrDict[Any]): + """ + Result of the `rare_terms` aggregation when the field is a string. + + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["StringRareTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "StringRareTermsBucket"]: + return self.buckets # type: ignore + + +class StringRareTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: str + doc_count: int + + +class StringStatsAggregate(AttrDict[Any]): + """ + :arg count: (required) + :arg min_length: (required) + :arg max_length: (required) + :arg avg_length: (required) + :arg entropy: (required) + :arg distribution: + :arg min_length_as_string: + :arg max_length_as_string: + :arg avg_length_as_string: + :arg meta: + """ + + count: int + min_length: Union[int, None] + max_length: Union[int, None] + avg_length: Union[float, None] + entropy: Union[float, None] + distribution: Union[Mapping[str, float], None] + min_length_as_string: str + max_length_as_string: str + avg_length_as_string: str + meta: Mapping[str, Any] + + +class StringTermsAggregate(AttrDict[Any]): + """ + Result of a `terms` aggregation when the field is a string. + + :arg doc_count_error_upper_bound: + :arg sum_other_doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + doc_count_error_upper_bound: int + sum_other_doc_count: int + buckets: Sequence["StringTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "StringTermsBucket"]: + return self.buckets # type: ignore + + +class StringTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg doc_count_error_upper_bound: + """ + + key: Union[int, float, str, bool, None, Any] + doc_count: int + doc_count_error_upper_bound: int + + +class SumAggregate(AttrDict[Any]): + """ + Sum aggregation result. `value` is always present and is zero if there + were no values to process. + + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class TDigestPercentileRanksAggregate(AttrDict[Any]): + """ + :arg values: (required) + :arg meta: + """ + + values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + meta: Mapping[str, Any] + + +class TDigestPercentilesAggregate(AttrDict[Any]): + """ + :arg values: (required) + :arg meta: + """ + + values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + meta: Mapping[str, Any] + + +class TTestAggregate(AttrDict[Any]): + """ + :arg value: (required) + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class TermSuggest(AttrDict[Any]): + """ + :arg options: (required) + :arg length: (required) + :arg offset: (required) + :arg text: (required) + """ + + options: Sequence["TermSuggestOption"] + length: int + offset: int + text: str + + +class TermSuggestOption(AttrDict[Any]): + """ + :arg text: (required) + :arg score: (required) + :arg freq: (required) + :arg highlighted: + :arg collate_match: + """ + + text: str + score: float + freq: int + highlighted: str + collate_match: bool + + +class TimeSeriesAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["TimeSeriesBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "TimeSeriesBucket"]: + return self.buckets # type: ignore + + +class TimeSeriesBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: Mapping[str, Union[int, float, str, bool, None, Any]] + doc_count: int + + +class TopHitsAggregate(AttrDict[Any]): + """ + :arg hits: (required) + :arg meta: + """ + + hits: "HitsMetadata" + meta: Mapping[str, Any] + + +class TopMetrics(AttrDict[Any]): + """ + :arg sort: (required) + :arg metrics: (required) + """ + + sort: Sequence[Union[Union[int, float, str, bool, None, Any], None]] + metrics: Mapping[str, Union[Union[int, float, str, bool, None, Any], None]] + + +class TopMetricsAggregate(AttrDict[Any]): + """ + :arg top: (required) + :arg meta: + """ + + top: Sequence["TopMetrics"] + meta: Mapping[str, Any] + + +class TotalHits(AttrDict[Any]): + """ + :arg relation: (required) + :arg value: (required) + """ + + relation: Literal["eq", "gte"] + value: int + + +class UnmappedRareTermsAggregate(AttrDict[Any]): + """ + Result of a `rare_terms` aggregation when the field is unmapped. + `buckets` is always empty. + + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence[Any] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, Any]: + return self.buckets # type: ignore + + +class UnmappedSamplerAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class UnmappedSignificantTermsAggregate(AttrDict[Any]): + """ + Result of the `significant_terms` aggregation on an unmapped field. + `buckets` is always empty. + + :arg bg_count: + :arg doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + bg_count: int + doc_count: int + buckets: Sequence[Any] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, Any]: + return self.buckets # type: ignore + + +class UnmappedTermsAggregate(AttrDict[Any]): + """ + Result of a `terms` aggregation when the field is unmapped. `buckets` + is always empty. + + :arg doc_count_error_upper_bound: + :arg sum_other_doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + doc_count_error_upper_bound: int + sum_other_doc_count: int + buckets: Sequence[Any] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, Any]: + return self.buckets # type: ignore + + +class ValueCountAggregate(AttrDict[Any]): + """ + Value count aggregation result. `value` is always present. + + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class VariableWidthHistogramAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["VariableWidthHistogramBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "VariableWidthHistogramBucket"]: + return self.buckets # type: ignore + + +class VariableWidthHistogramBucket(AttrDict[Any]): + """ + :arg min: (required) + :arg key: (required) + :arg max: (required) + :arg doc_count: (required) + :arg min_as_string: + :arg key_as_string: + :arg max_as_string: + """ + + min: float + key: float + max: float + doc_count: int + min_as_string: str + key_as_string: str + max_as_string: str + + +class WeightedAvgAggregate(AttrDict[Any]): + """ + Weighted average aggregation result. `value` is missing if the weight + was set to zero. + + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] diff --git a/elasticsearch/dsl/update_by_query.py b/elasticsearch/dsl/update_by_query.py new file mode 100644 index 000000000..fdff22bc8 --- /dev/null +++ b/elasticsearch/dsl/update_by_query.py @@ -0,0 +1,19 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ._async.update_by_query import AsyncUpdateByQuery # noqa: F401 +from ._sync.update_by_query import UpdateByQuery # noqa: F401 diff --git a/elasticsearch/dsl/update_by_query_base.py b/elasticsearch/dsl/update_by_query_base.py new file mode 100644 index 000000000..e4490ddf6 --- /dev/null +++ b/elasticsearch/dsl/update_by_query_base.py @@ -0,0 +1,149 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, Type + +from typing_extensions import Self + +from .query import Bool, Q +from .response import UpdateByQueryResponse +from .search_base import ProxyDescriptor, QueryProxy, Request +from .utils import _R, recursive_to_dict + + +class UpdateByQueryBase(Request[_R]): + query = ProxyDescriptor[Self]("query") + + def __init__(self, **kwargs: Any): + """ + Update by query request to elasticsearch. + + :arg using: `Elasticsearch` instance to use + :arg index: limit the search to index + :arg doc_type: only query this type. + + All the parameters supplied (or omitted) at creation type can be later + overridden by methods (`using`, `index` and `doc_type` respectively). + + """ + super().__init__(**kwargs) + self._response_class = UpdateByQueryResponse[_R] + self._script: Dict[str, Any] = {} + self._query_proxy = QueryProxy(self, "query") + + def filter(self, *args: Any, **kwargs: Any) -> Self: + return self.query(Bool(filter=[Q(*args, **kwargs)])) + + def exclude(self, *args: Any, **kwargs: Any) -> Self: + return self.query(Bool(filter=[~Q(*args, **kwargs)])) + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> Self: + """ + Construct a new `UpdateByQuery` instance from a raw dict containing the search + body. Useful when migrating from raw dictionaries. + + Example:: + + ubq = UpdateByQuery.from_dict({ + "query": { + "bool": { + "must": [...] + } + }, + "script": {...} + }) + ubq = ubq.filter('term', published=True) + """ + u = cls() + u.update_from_dict(d) + return u + + def _clone(self) -> Self: + """ + Return a clone of the current search request. Performs a shallow copy + of all the underlying objects. Used internally by most state modifying + APIs. + """ + ubq = super()._clone() + + ubq._response_class = self._response_class + ubq._script = self._script.copy() + ubq.query._proxied = self.query._proxied + return ubq + + def response_class(self, cls: Type[UpdateByQueryResponse[_R]]) -> Self: + """ + Override the default wrapper used for the response. + """ + ubq = self._clone() + ubq._response_class = cls + return ubq + + def update_from_dict(self, d: Dict[str, Any]) -> Self: + """ + Apply options from a serialized body to the current instance. Modifies + the object in-place. Used mostly by ``from_dict``. + """ + d = d.copy() + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "script" in d: + self._script = d.pop("script") + self._extra.update(d) + return self + + def script(self, **kwargs: Any) -> Self: + """ + Define update action to take: + https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting-using.html + for more details. + + Note: the API only accepts a single script, so + calling the script multiple times will overwrite. + + Example:: + + ubq = Search() + ubq = ubq.script(source="ctx._source.likes++"") + ubq = ubq.script(source="ctx._source.likes += params.f"", + lang="expression", + params={'f': 3}) + """ + ubq = self._clone() + if ubq._script: + ubq._script = {} + ubq._script.update(kwargs) + return ubq + + def to_dict(self, **kwargs: Any) -> Dict[str, Any]: + """ + Serialize the search into the dictionary that will be sent over as the + request'ubq body. + + All additional keyword arguments will be included into the dictionary. + """ + d = {} + if self.query: + d["query"] = self.query.to_dict() + + if self._script: + d["script"] = self._script + + d.update(recursive_to_dict(self._extra)) + d.update(recursive_to_dict(kwargs)) + return d diff --git a/elasticsearch/dsl/utils.py b/elasticsearch/dsl/utils.py new file mode 100644 index 000000000..b425f79a4 --- /dev/null +++ b/elasticsearch/dsl/utils.py @@ -0,0 +1,686 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import collections.abc +from copy import copy +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Dict, + Generic, + Iterable, + Iterator, + List, + Mapping, + Optional, + Tuple, + Type, + Union, + cast, +) + +from elastic_transport.client_utils import DEFAULT +from typing_extensions import Self, TypeAlias, TypeVar + +from .exceptions import UnknownDslObject, ValidationException + +if TYPE_CHECKING: + from elastic_transport import ObjectApiResponse + from elasticsearch import AsyncElasticsearch, Elasticsearch + + from .document_base import DocumentOptions + from .field import Field + from .index_base import IndexBase + from .response import Hit # noqa: F401 + from .types import Hit as HitBaseType + +UsingType: TypeAlias = Union[str, "Elasticsearch"] +AsyncUsingType: TypeAlias = Union[str, "AsyncElasticsearch"] +AnyUsingType: TypeAlias = Union[str, "Elasticsearch", "AsyncElasticsearch"] + +_ValT = TypeVar("_ValT") # used by AttrDict +_R = TypeVar("_R", default="Hit") # used by Search and Response classes + +SKIP_VALUES = ("", None) +EXPAND__TO_DOT = True + +DOC_META_FIELDS = frozenset( + ( + "id", + "routing", + ) +) + +META_FIELDS = frozenset( + ( + # Elasticsearch metadata fields, except 'type' + "index", + "using", + "score", + "version", + "seq_no", + "primary_term", + ) +).union(DOC_META_FIELDS) + + +def _wrap(val: Any, obj_wrapper: Optional[Callable[[Any], Any]] = None) -> Any: + if isinstance(val, dict): + return AttrDict(val) if obj_wrapper is None else obj_wrapper(val) + if isinstance(val, list): + return AttrList(val) + return val + + +def _recursive_to_dict(value: Any) -> Any: + if hasattr(value, "to_dict"): + return value.to_dict() + elif isinstance(value, dict) or isinstance(value, AttrDict): + return {k: _recursive_to_dict(v) for k, v in value.items()} + elif isinstance(value, list) or isinstance(value, AttrList): + return [recursive_to_dict(elem) for elem in value] + else: + return value + + +class AttrList(Generic[_ValT]): + def __init__( + self, l: List[_ValT], obj_wrapper: Optional[Callable[[_ValT], Any]] = None + ): + # make iterables into lists + if not isinstance(l, list): + l = list(l) + self._l_ = l + self._obj_wrapper = obj_wrapper + + def __repr__(self) -> str: + return repr(self._l_) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, AttrList): + return other._l_ == self._l_ + # make sure we still equal to a dict with the same data + return bool(other == self._l_) + + def __ne__(self, other: Any) -> bool: + return not self == other + + def __getitem__(self, k: Union[int, slice]) -> Any: + l = self._l_[k] + if isinstance(k, slice): + return AttrList[_ValT](l, obj_wrapper=self._obj_wrapper) # type: ignore[arg-type] + return _wrap(l, self._obj_wrapper) + + def __setitem__(self, k: int, value: _ValT) -> None: + self._l_[k] = value + + def __iter__(self) -> Iterator[Any]: + return map(lambda i: _wrap(i, self._obj_wrapper), self._l_) + + def __len__(self) -> int: + return len(self._l_) + + def __nonzero__(self) -> bool: + return bool(self._l_) + + __bool__ = __nonzero__ + + def __getattr__(self, name: str) -> Any: + return getattr(self._l_, name) + + def __getstate__(self) -> Tuple[List[_ValT], Optional[Callable[[_ValT], Any]]]: + return self._l_, self._obj_wrapper + + def __setstate__( + self, state: Tuple[List[_ValT], Optional[Callable[[_ValT], Any]]] + ) -> None: + self._l_, self._obj_wrapper = state + + def to_list(self) -> List[_ValT]: + return self._l_ + + +class AttrDict(Generic[_ValT]): + """ + Helper class to provide attribute like access (read and write) to + dictionaries. Used to provide a convenient way to access both results and + nested dsl dicts. + """ + + _d_: Dict[str, _ValT] + RESERVED: Dict[str, str] = {"from_": "from"} + + def __init__(self, d: Dict[str, _ValT]): + # assign the inner dict manually to prevent __setattr__ from firing + super().__setattr__("_d_", d) + + def __contains__(self, key: object) -> bool: + return key in self._d_ + + def __nonzero__(self) -> bool: + return bool(self._d_) + + __bool__ = __nonzero__ + + def __dir__(self) -> List[str]: + # introspection for auto-complete in IPython etc + return list(self._d_.keys()) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, AttrDict): + return other._d_ == self._d_ + # make sure we still equal to a dict with the same data + return bool(other == self._d_) + + def __ne__(self, other: Any) -> bool: + return not self == other + + def __repr__(self) -> str: + r = repr(self._d_) + if len(r) > 60: + r = r[:60] + "...}" + return r + + def __getstate__(self) -> Tuple[Dict[str, _ValT]]: + return (self._d_,) + + def __setstate__(self, state: Tuple[Dict[str, _ValT]]) -> None: + super().__setattr__("_d_", state[0]) + + def __getattr__(self, attr_name: str) -> Any: + try: + return self.__getitem__(attr_name) + except KeyError: + raise AttributeError( + f"{self.__class__.__name__!r} object has no attribute {attr_name!r}" + ) + + def __delattr__(self, attr_name: str) -> None: + try: + del self._d_[self.RESERVED.get(attr_name, attr_name)] + except KeyError: + raise AttributeError( + f"{self.__class__.__name__!r} object has no attribute {attr_name!r}" + ) + + def __getitem__(self, key: str) -> Any: + return _wrap(self._d_[self.RESERVED.get(key, key)]) + + def __setitem__(self, key: str, value: _ValT) -> None: + self._d_[self.RESERVED.get(key, key)] = value + + def __delitem__(self, key: str) -> None: + del self._d_[self.RESERVED.get(key, key)] + + def __setattr__(self, name: str, value: _ValT) -> None: + # the __orig__class__ attribute has to be treated as an exception, as + # is it added to an object when it is instantiated with type arguments + if ( + name in self._d_ or not hasattr(self.__class__, name) + ) and name != "__orig_class__": + self._d_[self.RESERVED.get(name, name)] = value + else: + # there is an attribute on the class (could be property, ..) - don't add it as field + super().__setattr__(name, value) + + def __iter__(self) -> Iterator[str]: + return iter(self._d_) + + def to_dict(self, recursive: bool = False) -> Dict[str, _ValT]: + return cast( + Dict[str, _ValT], _recursive_to_dict(self._d_) if recursive else self._d_ + ) + + def keys(self) -> Iterable[str]: + return self._d_.keys() + + def items(self) -> Iterable[Tuple[str, _ValT]]: + return self._d_.items() + + +class DslMeta(type): + """ + Base Metaclass for DslBase subclasses that builds a registry of all classes + for given DslBase subclass (== all the query types for the Query subclass + of DslBase). + + It then uses the information from that registry (as well as `name` and + `shortcut` attributes from the base class) to construct any subclass based + on it's name. + + For typical use see `QueryMeta` and `Query` in `elasticsearch.dsl.query`. + """ + + name: str + _classes: Dict[str, type] + _type_name: str + _types: ClassVar[Dict[str, Type["DslBase"]]] = {} + + def __init__(cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any]): + super().__init__(name, bases, attrs) + # skip for DslBase + if not hasattr(cls, "_type_shortcut"): + return + if not cls.name: + # abstract base class, register it's shortcut + cls._types[cls._type_name] = cls._type_shortcut + # and create a registry for subclasses + if not hasattr(cls, "_classes"): + cls._classes = {} + elif cls.name not in cls._classes: + # normal class, register it + cls._classes[cls.name] = cls + + @classmethod + def get_dsl_type(cls, name: str) -> Type["DslBase"]: + try: + return cls._types[name] + except KeyError: + raise UnknownDslObject(f"DSL type {name} does not exist.") + + +class DslBase(metaclass=DslMeta): + """ + Base class for all DSL objects - queries, filters, aggregations etc. Wraps + a dictionary representing the object's json. + + Provides several feature: + - attribute access to the wrapped dictionary (.field instead of ['field']) + - _clone method returning a copy of self + - to_dict method to serialize into dict (to be sent via elasticsearch-py) + - basic logical operators (&, | and ~) using a Bool(Filter|Query) TODO: + move into a class specific for Query/Filter + - respects the definition of the class and (de)serializes it's + attributes based on the `_param_defs` definition (for example turning + all values in the `must` attribute into Query objects) + """ + + _param_defs: ClassVar[Dict[str, Dict[str, Union[str, bool]]]] = {} + + @classmethod + def get_dsl_class( + cls: Type[Self], name: str, default: Optional[str] = None + ) -> Type[Self]: + try: + return cls._classes[name] + except KeyError: + if default is not None: + return cls._classes[default] + raise UnknownDslObject( + f"DSL class `{name}` does not exist in {cls._type_name}." + ) + + def __init__(self, _expand__to_dot: Optional[bool] = None, **params: Any) -> None: + if _expand__to_dot is None: + _expand__to_dot = EXPAND__TO_DOT + self._params: Dict[str, Any] = {} + for pname, pvalue in params.items(): + if pvalue == DEFAULT: + continue + # expand "__" to dots + if "__" in pname and _expand__to_dot: + pname = pname.replace("__", ".") + # convert instrumented fields to string + if type(pvalue).__name__ == "InstrumentedField": + pvalue = str(pvalue) + self._setattr(pname, pvalue) + + def _repr_params(self) -> str: + """Produce a repr of all our parameters to be used in __repr__.""" + return ", ".join( + f"{n.replace('.', '__')}={v!r}" + for (n, v) in sorted(self._params.items()) + # make sure we don't include empty typed params + if "type" not in self._param_defs.get(n, {}) or v + ) + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self._repr_params()})" + + def __eq__(self, other: Any) -> bool: + return isinstance(other, self.__class__) and other.to_dict() == self.to_dict() + + def __ne__(self, other: Any) -> bool: + return not self == other + + def __setattr__(self, name: str, value: Any) -> None: + if name.startswith("_"): + return super().__setattr__(name, value) + return self._setattr(name, value) + + def _setattr(self, name: str, value: Any) -> None: + # if this attribute has special type assigned to it... + name = AttrDict.RESERVED.get(name, name) + if name in self._param_defs: + pinfo = self._param_defs[name] + + if "type" in pinfo: + # get the shortcut used to construct this type (query.Q, aggs.A, etc) + shortcut = self.__class__.get_dsl_type(str(pinfo["type"])) + + # list of dict(name -> DslBase) + if pinfo.get("multi") and pinfo.get("hash"): + if not isinstance(value, (tuple, list)): + value = (value,) + value = list( + {k: shortcut(v) for (k, v) in obj.items()} for obj in value + ) + elif pinfo.get("multi"): + if not isinstance(value, (tuple, list)): + value = (value,) + value = list(map(shortcut, value)) + + # dict(name -> DslBase), make sure we pickup all the objs + elif pinfo.get("hash"): + value = {k: shortcut(v) for (k, v) in value.items()} + + # single value object, just convert + else: + value = shortcut(value) + self._params[name] = value + + def __getattr__(self, name: str) -> Any: + if name.startswith("_"): + raise AttributeError( + f"{self.__class__.__name__!r} object has no attribute {name!r}" + ) + + value = None + try: + value = self._params[name] + except KeyError: + # compound types should never throw AttributeError and return empty + # container instead + if name in self._param_defs: + pinfo = self._param_defs[name] + if pinfo.get("multi"): + value = self._params.setdefault(name, []) + elif pinfo.get("hash"): + value = self._params.setdefault(name, {}) + if value is None: + raise AttributeError( + f"{self.__class__.__name__!r} object has no attribute {name!r}" + ) + + # wrap nested dicts in AttrDict for convenient access + if isinstance(value, dict): + return AttrDict(value) + return value + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the DSL object to plain dict + """ + d = {} + for pname, value in self._params.items(): + pinfo = self._param_defs.get(pname) + + # typed param + if pinfo and "type" in pinfo: + # don't serialize empty lists and dicts for typed fields + if value in ({}, []): + continue + + # list of dict(name -> DslBase) + if pinfo.get("multi") and pinfo.get("hash"): + value = list( + {k: v.to_dict() for k, v in obj.items()} for obj in value + ) + + # multi-values are serialized as list of dicts + elif pinfo.get("multi"): + value = list(map(lambda x: x.to_dict(), value)) + + # squash all the hash values into one dict + elif pinfo.get("hash"): + value = {k: v.to_dict() for k, v in value.items()} + + # serialize single values + else: + value = value.to_dict() + + # serialize anything with to_dict method + elif hasattr(value, "to_dict"): + value = value.to_dict() + + d[pname] = value + return {self.name: d} + + def _clone(self) -> Self: + c = self.__class__() + for attr in self._params: + c._params[attr] = copy(self._params[attr]) + return c + + +if TYPE_CHECKING: + HitMetaBase = HitBaseType +else: + HitMetaBase = AttrDict[Any] + + +class HitMeta(HitMetaBase): + inner_hits: Mapping[str, Any] + + def __init__( + self, + document: Dict[str, Any], + exclude: Tuple[str, ...] = ("_source", "_fields"), + ): + d = { + k[1:] if k.startswith("_") else k: v + for (k, v) in document.items() + if k not in exclude + } + if "type" in d: + # make sure we are consistent everywhere in python + d["doc_type"] = d.pop("type") + super().__init__(d) + + +class ObjectBase(AttrDict[Any]): + _doc_type: "DocumentOptions" + _index: "IndexBase" + meta: HitMeta + + def __init__(self, meta: Optional[Dict[str, Any]] = None, **kwargs: Any): + meta = meta or {} + for k in list(kwargs): + if k.startswith("_") and k[1:] in META_FIELDS: + meta[k] = kwargs.pop(k) + + super(AttrDict, self).__setattr__("meta", HitMeta(meta)) + + # process field defaults + if hasattr(self, "_defaults"): + for name in self._defaults: + if name not in kwargs: + value = self._defaults[name] + if callable(value): + value = value() + kwargs[name] = value + + super().__init__(kwargs) + + @classmethod + def __list_fields(cls) -> Iterator[Tuple[str, "Field", bool]]: + """ + Get all the fields defined for our class, if we have an Index, try + looking at the index mappings as well, mark the fields from Index as + optional. + """ + for name in cls._doc_type.mapping: + field = cls._doc_type.mapping[name] + yield name, field, False + + if hasattr(cls.__class__, "_index"): + if not cls._index._mapping: + return + for name in cls._index._mapping: + # don't return fields that are in _doc_type + if name in cls._doc_type.mapping: + continue + field = cls._index._mapping[name] + yield name, field, True + + @classmethod + def __get_field(cls, name: str) -> Optional["Field"]: + try: + return cls._doc_type.mapping[name] + except KeyError: + # fallback to fields on the Index + if hasattr(cls, "_index") and cls._index._mapping: + try: + return cls._index._mapping[name] + except KeyError: + pass + return None + + @classmethod + def from_es(cls, hit: Union[Dict[str, Any], "ObjectApiResponse[Any]"]) -> Self: + meta = hit.copy() + data = meta.pop("_source", {}) + doc = cls(meta=meta) + doc._from_dict(data) + return doc + + def _from_dict(self, data: Dict[str, Any]) -> None: + for k, v in data.items(): + f = self.__get_field(k) + if f and f._coerce: + v = f.deserialize(v) + setattr(self, k, v) + + def __getstate__(self) -> Tuple[Dict[str, Any], Dict[str, Any]]: # type: ignore[override] + return self.to_dict(), self.meta._d_ + + def __setstate__(self, state: Tuple[Dict[str, Any], Dict[str, Any]]) -> None: # type: ignore[override] + data, meta = state + super(AttrDict, self).__setattr__("_d_", {}) + super(AttrDict, self).__setattr__("meta", HitMeta(meta)) + self._from_dict(data) + + def __getattr__(self, name: str) -> Any: + try: + return super().__getattr__(name) + except AttributeError: + f = self.__get_field(name) + if f is not None and hasattr(f, "empty"): + value = f.empty() + if value not in SKIP_VALUES: + setattr(self, name, value) + value = getattr(self, name) + return value + raise + + def __setattr__(self, name: str, value: Any) -> None: + if name in self.__class__._doc_type.mapping: + self._d_[name] = value + else: + super().__setattr__(name, value) + + def to_dict(self, skip_empty: bool = True) -> Dict[str, Any]: + out = {} + for k, v in self._d_.items(): + # if this is a mapped field, + f = self.__get_field(k) + if f and f._coerce: + v = f.serialize(v) + + # if someone assigned AttrList, unwrap it + if isinstance(v, AttrList): + v = v._l_ + + if skip_empty: + # don't serialize empty values + # careful not to include numeric zeros + if v in ([], {}, None): + continue + + out[k] = v + return out + + def clean_fields(self, validate: bool = True) -> None: + errors: Dict[str, List[ValidationException]] = {} + for name, field, optional in self.__list_fields(): + data = self._d_.get(name, None) + if data is None and optional: + continue + try: + # save the cleaned value + data = field.clean(data) + except ValidationException as e: + errors.setdefault(name, []).append(e) + + if name in self._d_ or data not in ([], {}, None): + self._d_[name] = cast(Any, data) + + if validate and errors: + raise ValidationException(errors) + + def clean(self) -> None: + pass + + def full_clean(self) -> None: + self.clean_fields(validate=False) + self.clean() + self.clean_fields(validate=True) + + +def merge( + data: Union[Dict[str, Any], AttrDict[Any]], + new_data: Union[Dict[str, Any], AttrDict[Any]], + raise_on_conflict: bool = False, +) -> None: + if not ( + isinstance(data, (AttrDict, collections.abc.Mapping)) + and isinstance(new_data, (AttrDict, collections.abc.Mapping)) + ): + raise ValueError( + f"You can only merge two dicts! Got {data!r} and {new_data!r} instead." + ) + + for key, value in new_data.items(): + if ( + key in data + and isinstance(data[key], (AttrDict, collections.abc.Mapping)) + and isinstance(value, (AttrDict, collections.abc.Mapping)) + ): + merge(data[key], value, raise_on_conflict) # type: ignore + elif key in data and data[key] != value and raise_on_conflict: + raise ValueError(f"Incompatible data for key {key!r}, cannot be merged.") + else: + data[key] = value + + +def recursive_to_dict(data: Any) -> Any: + """Recursively transform objects that potentially have .to_dict() + into dictionary literals by traversing AttrList, AttrDict, list, + tuple, and Mapping types. + """ + if isinstance(data, AttrList): + data = list(data._l_) + elif hasattr(data, "to_dict"): + data = data.to_dict() + if isinstance(data, (list, tuple)): + return type(data)(recursive_to_dict(inner) for inner in data) + elif isinstance(data, dict): + return {key: recursive_to_dict(val) for key, val in data.items()} + return data diff --git a/elasticsearch/dsl/wrappers.py b/elasticsearch/dsl/wrappers.py new file mode 100644 index 000000000..ecd2e1363 --- /dev/null +++ b/elasticsearch/dsl/wrappers.py @@ -0,0 +1,119 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import operator +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, + Dict, + Literal, + Mapping, + Optional, + Tuple, + TypeVar, + Union, + cast, +) + +if TYPE_CHECKING: + from _operator import _SupportsComparison + +from typing_extensions import TypeAlias + +from .utils import AttrDict + +ComparisonOperators: TypeAlias = Literal["lt", "lte", "gt", "gte"] +RangeValT = TypeVar("RangeValT", bound="_SupportsComparison") + +__all__ = ["Range"] + + +class Range(AttrDict[RangeValT]): + OPS: ClassVar[ + Mapping[ + ComparisonOperators, + Callable[["_SupportsComparison", "_SupportsComparison"], bool], + ] + ] = { + "lt": operator.lt, + "lte": operator.le, + "gt": operator.gt, + "gte": operator.ge, + } + + def __init__( + self, + d: Optional[Dict[str, RangeValT]] = None, + /, + **kwargs: RangeValT, + ): + if d is not None and (kwargs or not isinstance(d, dict)): + raise ValueError( + "Range accepts a single dictionary or a set of keyword arguments." + ) + + if d is None: + data = kwargs + else: + data = d + + for k in data: + if k not in self.OPS: + raise ValueError(f"Range received an unknown operator {k!r}") + + if "gt" in data and "gte" in data: + raise ValueError("You cannot specify both gt and gte for Range.") + + if "lt" in data and "lte" in data: + raise ValueError("You cannot specify both lt and lte for Range.") + + super().__init__(data) + + def __repr__(self) -> str: + return "Range(%s)" % ", ".join("%s=%r" % op for op in self._d_.items()) + + def __contains__(self, item: object) -> bool: + if isinstance(item, str): + return super().__contains__(item) + + item_supports_comp = any(hasattr(item, f"__{op}__") for op in self.OPS) + if not item_supports_comp: + return False + + for op in self.OPS: + if op in self._d_ and not self.OPS[op]( + cast("_SupportsComparison", item), self._d_[op] + ): + return False + return True + + @property + def upper(self) -> Union[Tuple[RangeValT, bool], Tuple[None, Literal[False]]]: + if "lt" in self._d_: + return self._d_["lt"], False + if "lte" in self._d_: + return self._d_["lte"], True + return None, False + + @property + def lower(self) -> Union[Tuple[RangeValT, bool], Tuple[None, Literal[False]]]: + if "gt" in self._d_: + return self._d_["gt"], False + if "gte" in self._d_: + return self._d_["gte"], True + return None, False diff --git a/examples/dsl/README.rst b/examples/dsl/README.rst new file mode 100644 index 000000000..87bfe0ec0 --- /dev/null +++ b/examples/dsl/README.rst @@ -0,0 +1,47 @@ +Elasticsearch DSL Examples +========================== + +In this directory you can see several complete examples demonstrating key +concepts and patterns exposed by ``elasticsearch-dsl``. + +``alias_migration.py`` +---------------------- + +The alias migration example shows a useful pattern where we use versioned +indices (``test-blog-0``, ``test-blog-1``, ...) to manage schema changes and +hides that behind an alias so that the application doesn't have to be aware of +the versions and just refer to the ``test-blog`` alias for both read and write +operations. + +For simplicity we use a timestamp as version in the index name. + +``parent_child.py`` +------------------- + +More complex example highlighting the possible relationships available in +elasticsearch - `parent/child +`_ and +`nested +`_. + +``composite_agg.py`` +-------------------- + +A helper function using the `composite aggregation +`_ +to paginate over aggregation results. + +``percolate.py`` +---------------- + +A ``BlogPost`` document with automatic classification using the `percolator +`_ +functionality. + +``completion.py`` +----------------- + +As example using `completion suggester +`_ +to auto complete people's names. + diff --git a/examples/dsl/alias_migration.py b/examples/dsl/alias_migration.py new file mode 100644 index 000000000..24355aded --- /dev/null +++ b/examples/dsl/alias_migration.py @@ -0,0 +1,161 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Simple example with a single Document demonstrating how schema can be managed, +including upgrading with reindexing. + +Key concepts: + + * setup() function to first initialize the schema (as index template) in + elasticsearch. Can be called any time (recommended with every deploy of + your app). + + * migrate() function to be called any time when the schema changes - it + will create a new index (by incrementing the version) and update the alias. + By default it will also (before flipping the alias) move the data from the + previous index to the new one. + + * BlogPost._matches() class method is required for this code to work since + otherwise BlogPost will not be used to deserialize the documents as those + will have index set to the concrete index whereas the class refers to the + alias. +""" +import os +from datetime import datetime +from fnmatch import fnmatch +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +from elasticsearch.dsl import Document, Keyword, connections, mapped_field + +ALIAS = "test-blog" +PATTERN = ALIAS + "-*" +PRIORITY = 100 + + +class BlogPost(Document): + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: int + + title: str + tags: List[str] = mapped_field(Keyword()) + content: str + published: Optional[datetime] = mapped_field(default=None) + + def is_published(self) -> bool: + return bool(self.published and datetime.now() > self.published) + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + # override _matches to match indices in a pattern instead of just ALIAS + # hit is the raw dict as returned by elasticsearch + return fnmatch(hit["_index"], PATTERN) + + class Index: + # we will use an alias instead of the index + name = ALIAS + # set settings and possibly other attributes of the index like + # analyzers + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +def setup() -> None: + """ + Create the index template in elasticsearch specifying the mappings and any + settings to be used. This can be run at any time, ideally at every new code + deploy. + """ + # create an index template + index_template = BlogPost._index.as_composable_template( + ALIAS, PATTERN, priority=PRIORITY + ) + # upload the template into elasticsearch + # potentially overriding the one already there + index_template.save() + + # create the first index if it doesn't exist + if not BlogPost._index.exists(): + migrate(move_data=False) + + +def migrate(move_data: bool = True, update_alias: bool = True) -> None: + """ + Upgrade function that creates a new index for the data. Optionally it also can + (and by default will) reindex previous copy of the data into the new index + (specify ``move_data=False`` to skip this step) and update the alias to + point to the latest index (set ``update_alias=False`` to skip). + + Note that while this function is running the application can still perform + any and all searches without any loss of functionality. It should, however, + not perform any writes at this time as those might be lost. + """ + # construct a new index name by appending current timestamp + next_index = PATTERN.replace("*", datetime.now().strftime("%Y%m%d%H%M%S%f")) + + # get the low level connection + es = connections.get_connection() + + # create new index, it will use the settings from the template + es.indices.create(index=next_index) + + if move_data: + # move data from current alias to the new index + es.options(request_timeout=3600).reindex( + body={"source": {"index": ALIAS}, "dest": {"index": next_index}} + ) + # refresh the index to make the changes visible + es.indices.refresh(index=next_index) + + if update_alias: + # repoint the alias to point to the newly created index + es.indices.update_aliases( + body={ + "actions": [ + {"remove": {"alias": ALIAS, "index": PATTERN}}, + {"add": {"alias": ALIAS, "index": next_index}}, + ] + } + ) + + +def main() -> None: + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the empty index + setup() + + # create a new document + bp = BlogPost( + _id=0, + title="Hello World!", + tags=["testing", "dummy"], + content=open(__file__).read(), + ) + bp.save(refresh=True) + + # create new index + migrate() + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/async/alias_migration.py b/examples/dsl/async/alias_migration.py new file mode 100644 index 000000000..94bdd63ce --- /dev/null +++ b/examples/dsl/async/alias_migration.py @@ -0,0 +1,162 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Simple example with a single Document demonstrating how schema can be managed, +including upgrading with reindexing. + +Key concepts: + + * setup() function to first initialize the schema (as index template) in + elasticsearch. Can be called any time (recommended with every deploy of + your app). + + * migrate() function to be called any time when the schema changes - it + will create a new index (by incrementing the version) and update the alias. + By default it will also (before flipping the alias) move the data from the + previous index to the new one. + + * BlogPost._matches() class method is required for this code to work since + otherwise BlogPost will not be used to deserialize the documents as those + will have index set to the concrete index whereas the class refers to the + alias. +""" +import asyncio +import os +from datetime import datetime +from fnmatch import fnmatch +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +from elasticsearch.dsl import AsyncDocument, Keyword, async_connections, mapped_field + +ALIAS = "test-blog" +PATTERN = ALIAS + "-*" +PRIORITY = 100 + + +class BlogPost(AsyncDocument): + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: int + + title: str + tags: List[str] = mapped_field(Keyword()) + content: str + published: Optional[datetime] = mapped_field(default=None) + + def is_published(self) -> bool: + return bool(self.published and datetime.now() > self.published) + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + # override _matches to match indices in a pattern instead of just ALIAS + # hit is the raw dict as returned by elasticsearch + return fnmatch(hit["_index"], PATTERN) + + class Index: + # we will use an alias instead of the index + name = ALIAS + # set settings and possibly other attributes of the index like + # analyzers + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +async def setup() -> None: + """ + Create the index template in elasticsearch specifying the mappings and any + settings to be used. This can be run at any time, ideally at every new code + deploy. + """ + # create an index template + index_template = BlogPost._index.as_composable_template( + ALIAS, PATTERN, priority=PRIORITY + ) + # upload the template into elasticsearch + # potentially overriding the one already there + await index_template.save() + + # create the first index if it doesn't exist + if not await BlogPost._index.exists(): + await migrate(move_data=False) + + +async def migrate(move_data: bool = True, update_alias: bool = True) -> None: + """ + Upgrade function that creates a new index for the data. Optionally it also can + (and by default will) reindex previous copy of the data into the new index + (specify ``move_data=False`` to skip this step) and update the alias to + point to the latest index (set ``update_alias=False`` to skip). + + Note that while this function is running the application can still perform + any and all searches without any loss of functionality. It should, however, + not perform any writes at this time as those might be lost. + """ + # construct a new index name by appending current timestamp + next_index = PATTERN.replace("*", datetime.now().strftime("%Y%m%d%H%M%S%f")) + + # get the low level connection + es = async_connections.get_connection() + + # create new index, it will use the settings from the template + await es.indices.create(index=next_index) + + if move_data: + # move data from current alias to the new index + await es.options(request_timeout=3600).reindex( + body={"source": {"index": ALIAS}, "dest": {"index": next_index}} + ) + # refresh the index to make the changes visible + await es.indices.refresh(index=next_index) + + if update_alias: + # repoint the alias to point to the newly created index + await es.indices.update_aliases( + body={ + "actions": [ + {"remove": {"alias": ALIAS, "index": PATTERN}}, + {"add": {"alias": ALIAS, "index": next_index}}, + ] + } + ) + + +async def main() -> None: + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the empty index + await setup() + + # create a new document + bp = BlogPost( + _id=0, + title="Hello World!", + tags=["testing", "dummy"], + content=open(__file__).read(), + ) + await bp.save(refresh=True) + + # create new index + await migrate() + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/completion.py b/examples/dsl/async/completion.py new file mode 100644 index 000000000..1c5929b2b --- /dev/null +++ b/examples/dsl/async/completion.py @@ -0,0 +1,114 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Example ``Document`` with completion suggester. + +In the ``Person`` class we index the person's name to allow auto completing in +any order ("first last", "middle last first", ...). For the weight we use a +value from the ``popularity`` field which is a long. + +To make the suggestions work in different languages we added a custom analyzer +that does ascii folding. +""" + +import asyncio +import os +from itertools import permutations +from typing import TYPE_CHECKING, Any, Dict, Optional + +from elasticsearch.dsl import ( + AsyncDocument, + Completion, + Keyword, + Long, + Text, + analyzer, + async_connections, + mapped_field, + token_filter, +) + +# custom analyzer for names +ascii_fold = analyzer( + "ascii_fold", + # we don't want to split O'Brian or Toulouse-Lautrec + tokenizer="whitespace", + filter=["lowercase", token_filter("ascii_fold", "asciifolding")], +) + + +class Person(AsyncDocument): + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: Optional[int] = mapped_field(default=None) + + name: str = mapped_field(Text(fields={"keyword": Keyword()}), default="") + popularity: int = mapped_field(Long(), default=0) + + # completion field with a custom analyzer + suggest: Dict[str, Any] = mapped_field(Completion(analyzer=ascii_fold), init=False) + + def clean(self) -> None: + """ + Automatically construct the suggestion input and weight by taking all + possible permutations of Person's name as ``input`` and taking their + popularity as ``weight``. + """ + self.suggest = { + "input": [" ".join(p) for p in permutations(self.name.split())], + "weight": self.popularity, + } + + class Index: + name = "test-suggest" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +async def main() -> None: + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the empty index + await Person.init() + + # index some sample data + for id, (name, popularity) in enumerate( + [("Henri de Toulouse-Lautrec", 42), ("Jára Cimrman", 124)] + ): + await Person(_id=id, name=name, popularity=popularity).save() + + # refresh index manually to make changes live + await Person._index.refresh() + + # run some suggestions + for text in ("já", "Jara Cimr", "tou", "de hen"): + s = Person.search() + s = s.suggest("auto_complete", text, completion={"field": "suggest"}) + response = await s.execute() + + # print out all the options we got + for option in response.suggest["auto_complete"][0].options: + print("%10s: %25s (%d)" % (text, option._source.name, option._score)) + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/composite_agg.py b/examples/dsl/async/composite_agg.py new file mode 100644 index 000000000..f9a7640a3 --- /dev/null +++ b/examples/dsl/async/composite_agg.py @@ -0,0 +1,94 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import asyncio +import os +from typing import Any, AsyncIterator, Dict, Mapping, Sequence, cast + +from elasticsearch.helpers import async_bulk + +from elasticsearch.dsl import Agg, AsyncSearch, Response, aggs, async_connections +from elasticsearch.dsl.types import CompositeAggregate +from test_elasticsearch.test_dsl.test_integration.test_data import DATA, GIT_INDEX + + +async def scan_aggs( + search: AsyncSearch, + source_aggs: Sequence[Mapping[str, Agg]], + inner_aggs: Dict[str, Agg] = {}, + size: int = 10, +) -> AsyncIterator[CompositeAggregate]: + """ + Helper function used to iterate over all possible bucket combinations of + ``source_aggs``, returning results of ``inner_aggs`` for each. Uses the + ``composite`` aggregation under the hood to perform this. + """ + + async def run_search(**kwargs: Any) -> Response: + s = search[:0] + bucket = s.aggs.bucket( + "comp", + aggs.Composite( + sources=source_aggs, + size=size, + **kwargs, + ), + ) + for agg_name, agg in inner_aggs.items(): + bucket[agg_name] = agg + return await s.execute() + + response = await run_search() + while response.aggregations["comp"].buckets: + for b in response.aggregations["comp"].buckets: + yield cast(CompositeAggregate, b) + if "after_key" in response.aggregations["comp"]: + after = response.aggregations["comp"].after_key + else: + after = response.aggregations["comp"].buckets[-1].key + response = await run_search(after=after) + + +async def main() -> None: + # initiate the default connection to elasticsearch + client = async_connections.create_connection( + hosts=[os.environ["ELASTICSEARCH_URL"]] + ) + + # create the index and populate it with some data + # note that the dataset is imported from the library's test suite + await client.indices.delete(index="git", ignore_unavailable=True) + await client.indices.create(index="git", **GIT_INDEX) + await async_bulk(client, DATA, raise_on_error=True, refresh=True) + + # run some aggregations on the data + async for b in scan_aggs( + AsyncSearch(index="git"), + [{"files": aggs.Terms(field="files")}], + {"first_seen": aggs.Min(field="committed_date")}, + ): + print( + "File %s has been modified %d times, first seen at %s." + % (b.key.files, b.doc_count, b.first_seen.value_as_string) + ) + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/parent_child.py b/examples/dsl/async/parent_child.py new file mode 100644 index 000000000..16dc6ebc3 --- /dev/null +++ b/examples/dsl/async/parent_child.py @@ -0,0 +1,276 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Complex data model example modeling stackoverflow-like data. + +It is used to showcase several key features of elasticsearch-dsl: + + * Object and Nested fields: see User and Comment classes and fields they + are used in + + * method add_comment is used to add comments + + * Parent/Child relationship + + * See the Join field on Post creating the relationship between Question + and Answer + + * Meta.matches allows the hits from same index to be wrapped in proper + classes + + * to see how child objects are created see Question.add_answer + + * Question.search_answers shows how to query for children of a + particular parent + +""" +import asyncio +import os +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncSearch, + Date, + InnerDoc, + Join, + Keyword, + Long, + Text, + async_connections, + mapped_field, +) + + +class User(InnerDoc): + """ + Class used to represent a denormalized user stored on other objects. + """ + + id: int = mapped_field(Long()) + signed_up: Optional[datetime] = mapped_field(Date()) + username: str = mapped_field(Text(fields={"keyword": Keyword()})) + email: Optional[str] = mapped_field(Text(fields={"keyword": Keyword()})) + location: Optional[str] = mapped_field(Text(fields={"keyword": Keyword()})) + + +class Comment(InnerDoc): + """ + Class wrapper for nested comment objects. + """ + + author: User + created: datetime + content: str + + +class Post(AsyncDocument): + """ + Base class for Question and Answer containing the common fields. + """ + + author: User + + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _routing: str = mapped_field(default=None) + _id: Optional[int] = mapped_field(default=None) + + created: Optional[datetime] = mapped_field(default=None) + body: str = mapped_field(default="") + comments: List[Comment] = mapped_field(default_factory=list) + question_answer: Any = mapped_field( + Join(relations={"question": "answer"}), default_factory=dict + ) + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + # Post is an abstract class, make sure it never gets used for + # deserialization + return False + + class Index: + name = "test-qa-site" + settings = { + "number_of_shards": 1, + "number_of_replicas": 0, + } + + async def add_comment( + self, + user: User, + content: str, + created: Optional[datetime] = None, + commit: Optional[bool] = True, + ) -> Comment: + c = Comment(author=user, content=content, created=created or datetime.now()) + self.comments.append(c) + if commit: + await self.save() + return c + + async def save(self, **kwargs: Any) -> None: # type: ignore[override] + # if there is no date, use now + if self.created is None: + self.created = datetime.now() + await super().save(**kwargs) + + +class Question(Post): + tags: List[str] = mapped_field( + default_factory=list + ) # .tags will return empty list if not present + title: str = mapped_field(Text(fields={"keyword": Keyword()}), default="") + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + """Use Question class for parent documents""" + return bool(hit["_source"]["question_answer"] == "question") + + @classmethod + def search(cls, **kwargs: Any) -> AsyncSearch: # type: ignore[override] + return cls._index.search(**kwargs).filter("term", question_answer="question") + + async def add_answer( + self, + user: User, + body: str, + created: Optional[datetime] = None, + accepted: bool = False, + commit: Optional[bool] = True, + ) -> "Answer": + answer = Answer( + # required make sure the answer is stored in the same shard + _routing=self.meta.id, + # set up the parent/child mapping + question_answer={"name": "answer", "parent": self.meta.id}, + # pass in the field values + author=user, + created=created, + body=body, + is_accepted=accepted, + ) + if commit: + await answer.save() + return answer + + def search_answers(self) -> AsyncSearch: + # search only our index + s = Answer.search() + # filter for answers belonging to us + s = s.filter("parent_id", type="answer", id=self.meta.id) + # add routing to only go to specific shard + s = s.params(routing=self.meta.id) + return s + + async def get_answers(self) -> List[Any]: + """ + Get answers either from inner_hits already present or by searching + elasticsearch. + """ + if "inner_hits" in self.meta and "answer" in self.meta.inner_hits: + return cast(List[Any], self.meta.inner_hits["answer"].hits) + return [a async for a in self.search_answers()] + + async def save(self, **kwargs: Any) -> None: # type: ignore[override] + self.question_answer = "question" + await super().save(**kwargs) + + +class Answer(Post): + is_accepted: bool = mapped_field(default=False) + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + """Use Answer class for child documents with child name 'answer'""" + return ( + isinstance(hit["_source"]["question_answer"], dict) + and hit["_source"]["question_answer"].get("name") == "answer" + ) + + @classmethod + def search(cls, **kwargs: Any) -> AsyncSearch: # type: ignore[override] + return cls._index.search(**kwargs).exclude("term", question_answer="question") + + async def get_question(self) -> Optional[Question]: + # cache question in self.meta + # any attributes set on self would be interpreted as fields + if "question" not in self.meta: + self.meta.question = await Question.get( + id=self.question_answer.parent, index=self.meta.index + ) + return cast(Optional[Question], self.meta.question) + + async def save(self, **kwargs: Any) -> None: # type: ignore[override] + # set routing to parents id automatically + self.meta.routing = self.question_answer.parent + await super().save(**kwargs) + + +async def setup() -> None: + """Create an IndexTemplate and save it into elasticsearch.""" + index_template = Post._index.as_composable_template("base", priority=100) + await index_template.save() + + +async def main() -> Answer: + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create index + await setup() + + # user objects to use + nick = User( + id=47, + signed_up=datetime(2017, 4, 3), + username="fxdgear", + email="nick.lang@elastic.co", + location="Colorado", + ) + honza = User( + id=42, + signed_up=datetime(2013, 4, 3), + username="honzakral", + email="honza@elastic.co", + location="Prague", + ) + + # create a question object + question = Question( + _id=1, + author=nick, + tags=["elasticsearch", "python"], + title="How do I use elasticsearch from Python?", + body=""" + I want to use elasticsearch, how do I do it from Python? + """, + ) + await question.save() + answer = await question.add_answer(honza, "Just use `elasticsearch-py`!") + + # close the connection + await async_connections.get_connection().close() + + return answer + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/percolate.py b/examples/dsl/async/percolate.py new file mode 100644 index 000000000..75350bbed --- /dev/null +++ b/examples/dsl/async/percolate.py @@ -0,0 +1,117 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import asyncio +import os +from typing import TYPE_CHECKING, Any, List, Optional + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncSearch, + Keyword, + Percolator, + Q, + Query, + async_connections, + mapped_field, +) + + +class BlogPost(AsyncDocument): + """ + Blog posts that will be automatically tagged based on percolation queries. + """ + + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: int + + content: Optional[str] + tags: List[str] = mapped_field(Keyword(), default_factory=list) + + class Index: + name = "test-blogpost" + + async def add_tags(self) -> None: + # run a percolation to automatically tag the blog post. + s = AsyncSearch(index="test-percolator") + s = s.query( + "percolate", field="query", index=self._get_index(), document=self.to_dict() + ) + + # collect all the tags from matched percolators + async for percolator in s: + self.tags.extend(percolator.tags) + + # make sure tags are unique + self.tags = list(set(self.tags)) + + async def save(self, **kwargs: Any) -> None: # type: ignore[override] + await self.add_tags() + await super().save(**kwargs) + + +class PercolatorDoc(AsyncDocument): + """ + Document class used for storing the percolation queries. + """ + + if TYPE_CHECKING: + _id: str + + # relevant fields from BlogPost must be also present here for the queries + # to be able to use them. Another option would be to use document + # inheritance but save() would have to be reset to normal behavior. + content: Optional[str] + + # the percolator query to be run against the doc + query: Query = mapped_field(Percolator()) + # list of tags to append to a document + tags: List[str] = mapped_field(Keyword(multi=True)) + + class Index: + name = "test-percolator" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +async def setup() -> None: + # create the percolator index if it doesn't exist + if not await PercolatorDoc._index.exists(): + await PercolatorDoc.init() + + # register a percolation query looking for documents about python + await PercolatorDoc( + _id="python", + tags=["programming", "development", "python"], + content="", + query=Q("match", content="python"), + ).save(refresh=True) + + +async def main() -> None: + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + await setup() + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/search_as_you_type.py b/examples/dsl/async/search_as_you_type.py new file mode 100644 index 000000000..5919f3e3f --- /dev/null +++ b/examples/dsl/async/search_as_you_type.py @@ -0,0 +1,99 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Example ``Document`` with search_as_you_type field datatype and how to search it. + +When creating a field with search_as_you_type datatype ElasticSearch creates additional +subfields to enable efficient as-you-type completion, matching terms at any position +within the input. + +To custom analyzer with ascii folding allow search to work in different languages. +""" + +import asyncio +import os +from typing import TYPE_CHECKING, Optional + +from elasticsearch.dsl import ( + AsyncDocument, + SearchAsYouType, + async_connections, + mapped_field, +) +from elasticsearch.dsl.query import MultiMatch + + +class Person(AsyncDocument): + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: Optional[int] = mapped_field(default=None) + + name: str = mapped_field(SearchAsYouType(max_shingle_size=3), default="") + + class Index: + name = "test-search-as-you-type" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +async def main() -> None: + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the empty index + await Person.init() + + import pprint + + pprint.pprint(Person().to_dict(), indent=2) + + # index some sample data + names = [ + "Andy Warhol", + "Alphonse Mucha", + "Henri de Toulouse-Lautrec", + "Jára Cimrman", + ] + for id, name in enumerate(names): + await Person(_id=id, name=name).save() + + # refresh index manually to make changes live + await Person._index.refresh() + + # run some suggestions + for text in ("já", "Cimr", "toulouse", "Henri Tou", "a"): + s = Person.search() + + s.query = MultiMatch( # type: ignore[assignment] + query=text, + type="bool_prefix", + fields=["name", "name._2gram", "name._3gram"], + ) + + response = await s.execute() + + # print out all the options we got + for h in response: + print("%15s: %25s" % (text, h.name)) + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/semantic_text.py b/examples/dsl/async/semantic_text.py new file mode 100644 index 000000000..0c416067b --- /dev/null +++ b/examples/dsl/async/semantic_text.py @@ -0,0 +1,148 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +""" +# Semantic Text example + +Requirements: + +$ pip install "elasticsearch-dsl[async]" tqdm + +Before running this example, an ELSER inference endpoint must be created in the +Elasticsearch cluster. This can be done manually from Kibana, or with the +following curl command from a terminal: + +curl -X PUT \ + "$ELASTICSEARCH_URL/_inference/sparse_embedding/my-elser-endpoint" \ + -H "Content-Type: application/json" \ + -d '{"service":"elser","service_settings":{"num_allocations":1,"num_threads":1}}' + +To run the example: + +$ python semantic_text.py "text to search" + +The index will be created automatically if it does not exist. Add +`--recreate-index` to the command to regenerate it. + +The example dataset includes a selection of workplace documents. The +following are good example queries to try out with this dataset: + +$ python semantic_text.py "work from home" +$ python semantic_text.py "vacation time" +$ python semantic_text.py "can I bring a bird to work?" + +When the index is created, the inference service will split the documents into +short passages, and for each passage a sparse embedding will be generated using +Elastic's ELSER v2 model. +""" + +import argparse +import asyncio +import json +import os +from datetime import datetime +from typing import Any, Optional +from urllib.request import urlopen + +from tqdm import tqdm + +from elasticsearch import dsl + +DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json" + + +class WorkplaceDoc(dsl.AsyncDocument): + class Index: + name = "workplace_documents_semantic" + + name: str + summary: str + content: Any = dsl.mapped_field( + dsl.field.SemanticText(inference_id="my-elser-endpoint") + ) + created: datetime + updated: Optional[datetime] + url: str = dsl.mapped_field(dsl.Keyword()) + category: str = dsl.mapped_field(dsl.Keyword()) + + +async def create() -> None: + + # create the index + await WorkplaceDoc._index.delete(ignore_unavailable=True) + await WorkplaceDoc.init() + + # download the data + dataset = json.loads(urlopen(DATASET_URL).read()) + + # import the dataset + for data in tqdm(dataset, desc="Indexing documents..."): + doc = WorkplaceDoc( + name=data["name"], + summary=data["summary"], + content=data["content"], + created=data.get("created_on"), + updated=data.get("updated_at"), + url=data["url"], + category=data["category"], + ) + await doc.save() + + # refresh the index + await WorkplaceDoc._index.refresh() + + +async def search(query: str) -> dsl.AsyncSearch[WorkplaceDoc]: + search = WorkplaceDoc.search() + search = search[:5] + return search.query(dsl.query.Semantic(field=WorkplaceDoc.content, query=query)) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Vector database with Elasticsearch") + parser.add_argument( + "--recreate-index", action="store_true", help="Recreate and populate the index" + ) + parser.add_argument("query", action="store", help="The search query") + return parser.parse_args() + + +async def main() -> None: + args = parse_args() + + # initiate the default connection to elasticsearch + dsl.async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + if args.recreate_index or not await WorkplaceDoc._index.exists(): + await create() + + results = await search(args.query) + + async for hit in results: + print( + f"Document: {hit.name} [Category: {hit.category}] [Score: {hit.meta.score}]" + ) + print(f"Content: {hit.content.text}") + print("--------------------\n") + + # close the connection + await dsl.async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/sparse_vectors.py b/examples/dsl/async/sparse_vectors.py new file mode 100644 index 000000000..86d99bfff --- /dev/null +++ b/examples/dsl/async/sparse_vectors.py @@ -0,0 +1,198 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +# Sparse vector database example + +Requirements: + +$ pip install nltk tqdm elasticsearch-dsl[async] + +Before running this example, the ELSER v2 model must be downloaded and deployed +to the Elasticsearch cluster, and an ingest pipeline must be defined. This can +be done manually from Kibana, or with the following three curl commands from a +terminal, adjusting the endpoint as needed: + +curl -X PUT \ + "http://localhost:9200/_ml/trained_models/.elser_model_2?wait_for_completion" \ + -H "Content-Type: application/json" \ + -d '{"input":{"field_names":["text_field"]}}' +curl -X POST \ + "http://localhost:9200/_ml/trained_models/.elser_model_2/deployment/_start?wait_for=fully_allocated" +curl -X PUT \ + "http://localhost:9200/_ingest/pipeline/elser_ingest_pipeline" \ + -H "Content-Type: application/json" \ + -d '{"processors":[{"foreach":{"field":"passages","processor":{"inference":{"model_id":".elser_model_2","input_output":[{"input_field":"_ingest._value.content","output_field":"_ingest._value.embedding"}]}}}}]}' + +To run the example: + +$ python sparse_vectors.py "text to search" + +The index will be created automatically if it does not exist. Add +`--recreate-index` to regenerate it. + +The example dataset includes a selection of workplace documents. The +following are good example queries to try out with this dataset: + +$ python sparse_vectors.py "work from home" +$ python sparse_vectors.py "vacation time" +$ python sparse_vectors.py "can I bring a bird to work?" + +When the index is created, the documents are split into short passages, and for +each passage a sparse embedding is generated using Elastic's ELSER v2 model. +The documents that are returned as search results are those that have the +highest scored passages. Add `--show-inner-hits` to the command to see +individual passage results as well. +""" + +import argparse +import asyncio +import json +import os +from datetime import datetime +from typing import Any, Dict, List, Optional +from urllib.request import urlopen + +import nltk # type: ignore +from tqdm import tqdm + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncSearch, + InnerDoc, + Keyword, + Q, + SparseVector, + async_connections, + mapped_field, +) + +DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json" + +# initialize sentence tokenizer +nltk.download("punkt_tab", quiet=True) + + +class Passage(InnerDoc): + content: Optional[str] + embedding: Dict[str, float] = mapped_field(SparseVector(), init=False) + + +class WorkplaceDoc(AsyncDocument): + class Index: + name = "workplace_documents_sparse" + settings = {"default_pipeline": "elser_ingest_pipeline"} + + name: str + summary: str + content: str + created: datetime + updated: Optional[datetime] + url: str = mapped_field(Keyword()) + category: str = mapped_field(Keyword()) + passages: List[Passage] = mapped_field(default=[]) + + _model: Any = None + + def clean(self) -> None: + # split the content into sentences + passages = nltk.sent_tokenize(self.content) + + # generate an embedding for each passage and save it as a nested document + for passage in passages: + self.passages.append(Passage(content=passage)) + + +async def create() -> None: + + # create the index + await WorkplaceDoc._index.delete(ignore_unavailable=True) + await WorkplaceDoc.init() + + # download the data + dataset = json.loads(urlopen(DATASET_URL).read()) + + # import the dataset + for data in tqdm(dataset, desc="Indexing documents..."): + doc = WorkplaceDoc( + name=data["name"], + summary=data["summary"], + content=data["content"], + created=data.get("created_on"), + updated=data.get("updated_at"), + url=data["url"], + category=data["category"], + ) + await doc.save() + + +async def search(query: str) -> AsyncSearch[WorkplaceDoc]: + return WorkplaceDoc.search()[:5].query( + "nested", + path="passages", + query=Q( + "text_expansion", + passages__content={ + "model_id": ".elser_model_2", + "model_text": query, + }, + ), + inner_hits={"size": 2}, + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Vector database with Elasticsearch") + parser.add_argument( + "--recreate-index", action="store_true", help="Recreate and populate the index" + ) + parser.add_argument( + "--show-inner-hits", + action="store_true", + help="Show results for individual passages", + ) + parser.add_argument("query", action="store", help="The search query") + return parser.parse_args() + + +async def main() -> None: + args = parse_args() + + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + if args.recreate_index or not await WorkplaceDoc._index.exists(): + await create() + + results = await search(args.query) + + async for hit in results: + print( + f"Document: {hit.name} [Category: {hit.category}] [Score: {hit.meta.score}]" + ) + print(f"Summary: {hit.summary}") + if args.show_inner_hits: + for passage in hit.meta.inner_hits["passages"]: + print(f" - [Score: {passage.meta.score}] {passage.content!r}") + print("") + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/vectors.py b/examples/dsl/async/vectors.py new file mode 100644 index 000000000..62fbfe3f5 --- /dev/null +++ b/examples/dsl/async/vectors.py @@ -0,0 +1,187 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +# Vector database example + +Requirements: + +$ pip install nltk sentence_transformers tqdm elasticsearch-dsl[async] + +To run the example: + +$ python vectors.py "text to search" + +The index will be created automatically if it does not exist. Add +`--recreate-index` to regenerate it. + +The example dataset includes a selection of workplace documents. The +following are good example queries to try out with this dataset: + +$ python vectors.py "work from home" +$ python vectors.py "vacation time" +$ python vectors.py "can I bring a bird to work?" + +When the index is created, the documents are split into short passages, and for +each passage an embedding is generated using the open source +"all-MiniLM-L6-v2" model. The documents that are returned as search results are +those that have the highest scored passages. Add `--show-inner-hits` to the +command to see individual passage results as well. +""" + +import argparse +import asyncio +import json +import os +from datetime import datetime +from typing import Any, List, Optional, cast +from urllib.request import urlopen + +import nltk # type: ignore +from sentence_transformers import SentenceTransformer +from tqdm import tqdm + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncSearch, + DenseVector, + InnerDoc, + Keyword, + M, + async_connections, + mapped_field, +) + +DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json" +MODEL_NAME = "all-MiniLM-L6-v2" + +# initialize sentence tokenizer +nltk.download("punkt_tab", quiet=True) + +# this will be the embedding model +embedding_model: Any = None + + +class Passage(InnerDoc): + content: str + embedding: List[float] = mapped_field(DenseVector()) + + +class WorkplaceDoc(AsyncDocument): + class Index: + name = "workplace_documents" + + name: str + summary: str + content: str + created: datetime + updated: Optional[datetime] + url: str = mapped_field(Keyword(required=True)) + category: str = mapped_field(Keyword(required=True)) + passages: M[List[Passage]] = mapped_field(default=[]) + + @classmethod + def get_embedding(cls, input: str) -> List[float]: + global embedding_model + if embedding_model is None: + embedding_model = SentenceTransformer(MODEL_NAME) + return cast(List[float], list(embedding_model.encode(input))) + + def clean(self) -> None: + # split the content into sentences + passages = cast(List[str], nltk.sent_tokenize(self.content)) + + # generate an embedding for each passage and save it as a nested document + for passage in passages: + self.passages.append( + Passage(content=passage, embedding=self.get_embedding(passage)) + ) + + +async def create() -> None: + # create the index + await WorkplaceDoc._index.delete(ignore_unavailable=True) + await WorkplaceDoc.init() + + # download the data + dataset = json.loads(urlopen(DATASET_URL).read()) + + # import the dataset + for data in tqdm(dataset, desc="Indexing documents..."): + doc = WorkplaceDoc( + name=data["name"], + summary=data["summary"], + content=data["content"], + created=data.get("created_on"), + updated=data.get("updated_at"), + url=data["url"], + category=data["category"], + ) + await doc.save() + + +async def search(query: str) -> AsyncSearch[WorkplaceDoc]: + return WorkplaceDoc.search().knn( + field=WorkplaceDoc.passages.embedding, + k=5, + num_candidates=50, + query_vector=list(WorkplaceDoc.get_embedding(query)), + inner_hits={"size": 2}, + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Vector database with Elasticsearch") + parser.add_argument( + "--recreate-index", action="store_true", help="Recreate and populate the index" + ) + parser.add_argument( + "--show-inner-hits", + action="store_true", + help="Show results for individual passages", + ) + parser.add_argument("query", action="store", help="The search query") + return parser.parse_args() + + +async def main() -> None: + args = parse_args() + + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + if args.recreate_index or not await WorkplaceDoc._index.exists(): + await create() + + results = await search(args.query) + + async for hit in results: + print( + f"Document: {hit.name} [Category: {hit.category}] [Score: {hit.meta.score}]" + ) + print(f"Summary: {hit.summary}") + if args.show_inner_hits: + for passage in hit.meta.inner_hits["passages"]: + print(f" - [Score: {passage.meta.score}] {passage.content!r}") + print("") + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/completion.py b/examples/dsl/completion.py new file mode 100644 index 000000000..3380dc520 --- /dev/null +++ b/examples/dsl/completion.py @@ -0,0 +1,113 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Example ``Document`` with completion suggester. + +In the ``Person`` class we index the person's name to allow auto completing in +any order ("first last", "middle last first", ...). For the weight we use a +value from the ``popularity`` field which is a long. + +To make the suggestions work in different languages we added a custom analyzer +that does ascii folding. +""" + +import os +from itertools import permutations +from typing import TYPE_CHECKING, Any, Dict, Optional + +from elasticsearch.dsl import ( + Completion, + Document, + Keyword, + Long, + Text, + analyzer, + connections, + mapped_field, + token_filter, +) + +# custom analyzer for names +ascii_fold = analyzer( + "ascii_fold", + # we don't want to split O'Brian or Toulouse-Lautrec + tokenizer="whitespace", + filter=["lowercase", token_filter("ascii_fold", "asciifolding")], +) + + +class Person(Document): + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: Optional[int] = mapped_field(default=None) + + name: str = mapped_field(Text(fields={"keyword": Keyword()}), default="") + popularity: int = mapped_field(Long(), default=0) + + # completion field with a custom analyzer + suggest: Dict[str, Any] = mapped_field(Completion(analyzer=ascii_fold), init=False) + + def clean(self) -> None: + """ + Automatically construct the suggestion input and weight by taking all + possible permutations of Person's name as ``input`` and taking their + popularity as ``weight``. + """ + self.suggest = { + "input": [" ".join(p) for p in permutations(self.name.split())], + "weight": self.popularity, + } + + class Index: + name = "test-suggest" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +def main() -> None: + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the empty index + Person.init() + + # index some sample data + for id, (name, popularity) in enumerate( + [("Henri de Toulouse-Lautrec", 42), ("Jára Cimrman", 124)] + ): + Person(_id=id, name=name, popularity=popularity).save() + + # refresh index manually to make changes live + Person._index.refresh() + + # run some suggestions + for text in ("já", "Jara Cimr", "tou", "de hen"): + s = Person.search() + s = s.suggest("auto_complete", text, completion={"field": "suggest"}) + response = s.execute() + + # print out all the options we got + for option in response.suggest["auto_complete"][0].options: + print("%10s: %25s (%d)" % (text, option._source.name, option._score)) + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/composite_agg.py b/examples/dsl/composite_agg.py new file mode 100644 index 000000000..6710222b8 --- /dev/null +++ b/examples/dsl/composite_agg.py @@ -0,0 +1,91 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +from typing import Any, Dict, Iterator, Mapping, Sequence, cast + +from elasticsearch.helpers import bulk + +from elasticsearch.dsl import Agg, Response, Search, aggs, connections +from elasticsearch.dsl.types import CompositeAggregate +from test_elasticsearch.test_dsl.test_integration.test_data import DATA, GIT_INDEX + + +def scan_aggs( + search: Search, + source_aggs: Sequence[Mapping[str, Agg]], + inner_aggs: Dict[str, Agg] = {}, + size: int = 10, +) -> Iterator[CompositeAggregate]: + """ + Helper function used to iterate over all possible bucket combinations of + ``source_aggs``, returning results of ``inner_aggs`` for each. Uses the + ``composite`` aggregation under the hood to perform this. + """ + + def run_search(**kwargs: Any) -> Response: + s = search[:0] + bucket = s.aggs.bucket( + "comp", + aggs.Composite( + sources=source_aggs, + size=size, + **kwargs, + ), + ) + for agg_name, agg in inner_aggs.items(): + bucket[agg_name] = agg + return s.execute() + + response = run_search() + while response.aggregations["comp"].buckets: + for b in response.aggregations["comp"].buckets: + yield cast(CompositeAggregate, b) + if "after_key" in response.aggregations["comp"]: + after = response.aggregations["comp"].after_key + else: + after = response.aggregations["comp"].buckets[-1].key + response = run_search(after=after) + + +def main() -> None: + # initiate the default connection to elasticsearch + client = connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the index and populate it with some data + # note that the dataset is imported from the library's test suite + client.indices.delete(index="git", ignore_unavailable=True) + client.indices.create(index="git", **GIT_INDEX) + bulk(client, DATA, raise_on_error=True, refresh=True) + + # run some aggregations on the data + for b in scan_aggs( + Search(index="git"), + [{"files": aggs.Terms(field="files")}], + {"first_seen": aggs.Min(field="committed_date")}, + ): + print( + "File %s has been modified %d times, first seen at %s." + % (b.key.files, b.doc_count, b.first_seen.value_as_string) + ) + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/parent_child.py b/examples/dsl/parent_child.py new file mode 100644 index 000000000..22c597464 --- /dev/null +++ b/examples/dsl/parent_child.py @@ -0,0 +1,275 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Complex data model example modeling stackoverflow-like data. + +It is used to showcase several key features of elasticsearch-dsl: + + * Object and Nested fields: see User and Comment classes and fields they + are used in + + * method add_comment is used to add comments + + * Parent/Child relationship + + * See the Join field on Post creating the relationship between Question + and Answer + + * Meta.matches allows the hits from same index to be wrapped in proper + classes + + * to see how child objects are created see Question.add_answer + + * Question.search_answers shows how to query for children of a + particular parent + +""" +import os +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast + +from elasticsearch.dsl import ( + Date, + Document, + InnerDoc, + Join, + Keyword, + Long, + Search, + Text, + connections, + mapped_field, +) + + +class User(InnerDoc): + """ + Class used to represent a denormalized user stored on other objects. + """ + + id: int = mapped_field(Long()) + signed_up: Optional[datetime] = mapped_field(Date()) + username: str = mapped_field(Text(fields={"keyword": Keyword()})) + email: Optional[str] = mapped_field(Text(fields={"keyword": Keyword()})) + location: Optional[str] = mapped_field(Text(fields={"keyword": Keyword()})) + + +class Comment(InnerDoc): + """ + Class wrapper for nested comment objects. + """ + + author: User + created: datetime + content: str + + +class Post(Document): + """ + Base class for Question and Answer containing the common fields. + """ + + author: User + + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _routing: str = mapped_field(default=None) + _id: Optional[int] = mapped_field(default=None) + + created: Optional[datetime] = mapped_field(default=None) + body: str = mapped_field(default="") + comments: List[Comment] = mapped_field(default_factory=list) + question_answer: Any = mapped_field( + Join(relations={"question": "answer"}), default_factory=dict + ) + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + # Post is an abstract class, make sure it never gets used for + # deserialization + return False + + class Index: + name = "test-qa-site" + settings = { + "number_of_shards": 1, + "number_of_replicas": 0, + } + + def add_comment( + self, + user: User, + content: str, + created: Optional[datetime] = None, + commit: Optional[bool] = True, + ) -> Comment: + c = Comment(author=user, content=content, created=created or datetime.now()) + self.comments.append(c) + if commit: + self.save() + return c + + def save(self, **kwargs: Any) -> None: # type: ignore[override] + # if there is no date, use now + if self.created is None: + self.created = datetime.now() + super().save(**kwargs) + + +class Question(Post): + tags: List[str] = mapped_field( + default_factory=list + ) # .tags will return empty list if not present + title: str = mapped_field(Text(fields={"keyword": Keyword()}), default="") + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + """Use Question class for parent documents""" + return bool(hit["_source"]["question_answer"] == "question") + + @classmethod + def search(cls, **kwargs: Any) -> Search: # type: ignore[override] + return cls._index.search(**kwargs).filter("term", question_answer="question") + + def add_answer( + self, + user: User, + body: str, + created: Optional[datetime] = None, + accepted: bool = False, + commit: Optional[bool] = True, + ) -> "Answer": + answer = Answer( + # required make sure the answer is stored in the same shard + _routing=self.meta.id, + # set up the parent/child mapping + question_answer={"name": "answer", "parent": self.meta.id}, + # pass in the field values + author=user, + created=created, + body=body, + is_accepted=accepted, + ) + if commit: + answer.save() + return answer + + def search_answers(self) -> Search: + # search only our index + s = Answer.search() + # filter for answers belonging to us + s = s.filter("parent_id", type="answer", id=self.meta.id) + # add routing to only go to specific shard + s = s.params(routing=self.meta.id) + return s + + def get_answers(self) -> List[Any]: + """ + Get answers either from inner_hits already present or by searching + elasticsearch. + """ + if "inner_hits" in self.meta and "answer" in self.meta.inner_hits: + return cast(List[Any], self.meta.inner_hits["answer"].hits) + return [a for a in self.search_answers()] + + def save(self, **kwargs: Any) -> None: # type: ignore[override] + self.question_answer = "question" + super().save(**kwargs) + + +class Answer(Post): + is_accepted: bool = mapped_field(default=False) + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + """Use Answer class for child documents with child name 'answer'""" + return ( + isinstance(hit["_source"]["question_answer"], dict) + and hit["_source"]["question_answer"].get("name") == "answer" + ) + + @classmethod + def search(cls, **kwargs: Any) -> Search: # type: ignore[override] + return cls._index.search(**kwargs).exclude("term", question_answer="question") + + def get_question(self) -> Optional[Question]: + # cache question in self.meta + # any attributes set on self would be interpreted as fields + if "question" not in self.meta: + self.meta.question = Question.get( + id=self.question_answer.parent, index=self.meta.index + ) + return cast(Optional[Question], self.meta.question) + + def save(self, **kwargs: Any) -> None: # type: ignore[override] + # set routing to parents id automatically + self.meta.routing = self.question_answer.parent + super().save(**kwargs) + + +def setup() -> None: + """Create an IndexTemplate and save it into elasticsearch.""" + index_template = Post._index.as_composable_template("base", priority=100) + index_template.save() + + +def main() -> Answer: + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create index + setup() + + # user objects to use + nick = User( + id=47, + signed_up=datetime(2017, 4, 3), + username="fxdgear", + email="nick.lang@elastic.co", + location="Colorado", + ) + honza = User( + id=42, + signed_up=datetime(2013, 4, 3), + username="honzakral", + email="honza@elastic.co", + location="Prague", + ) + + # create a question object + question = Question( + _id=1, + author=nick, + tags=["elasticsearch", "python"], + title="How do I use elasticsearch from Python?", + body=""" + I want to use elasticsearch, how do I do it from Python? + """, + ) + question.save() + answer = question.add_answer(honza, "Just use `elasticsearch-py`!") + + # close the connection + connections.get_connection().close() + + return answer + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/percolate.py b/examples/dsl/percolate.py new file mode 100644 index 000000000..d8747feda --- /dev/null +++ b/examples/dsl/percolate.py @@ -0,0 +1,116 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +from typing import TYPE_CHECKING, Any, List, Optional + +from elasticsearch.dsl import ( + Document, + Keyword, + Percolator, + Q, + Query, + Search, + connections, + mapped_field, +) + + +class BlogPost(Document): + """ + Blog posts that will be automatically tagged based on percolation queries. + """ + + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: int + + content: Optional[str] + tags: List[str] = mapped_field(Keyword(), default_factory=list) + + class Index: + name = "test-blogpost" + + def add_tags(self) -> None: + # run a percolation to automatically tag the blog post. + s = Search(index="test-percolator") + s = s.query( + "percolate", field="query", index=self._get_index(), document=self.to_dict() + ) + + # collect all the tags from matched percolators + for percolator in s: + self.tags.extend(percolator.tags) + + # make sure tags are unique + self.tags = list(set(self.tags)) + + def save(self, **kwargs: Any) -> None: # type: ignore[override] + self.add_tags() + super().save(**kwargs) + + +class PercolatorDoc(Document): + """ + Document class used for storing the percolation queries. + """ + + if TYPE_CHECKING: + _id: str + + # relevant fields from BlogPost must be also present here for the queries + # to be able to use them. Another option would be to use document + # inheritance but save() would have to be reset to normal behavior. + content: Optional[str] + + # the percolator query to be run against the doc + query: Query = mapped_field(Percolator()) + # list of tags to append to a document + tags: List[str] = mapped_field(Keyword(multi=True)) + + class Index: + name = "test-percolator" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +def setup() -> None: + # create the percolator index if it doesn't exist + if not PercolatorDoc._index.exists(): + PercolatorDoc.init() + + # register a percolation query looking for documents about python + PercolatorDoc( + _id="python", + tags=["programming", "development", "python"], + content="", + query=Q("match", content="python"), + ).save(refresh=True) + + +def main() -> None: + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + setup() + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/search_as_you_type.py b/examples/dsl/search_as_you_type.py new file mode 100644 index 000000000..c1ebc99a4 --- /dev/null +++ b/examples/dsl/search_as_you_type.py @@ -0,0 +1,93 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Example ``Document`` with search_as_you_type field datatype and how to search it. + +When creating a field with search_as_you_type datatype ElasticSearch creates additional +subfields to enable efficient as-you-type completion, matching terms at any position +within the input. + +To custom analyzer with ascii folding allow search to work in different languages. +""" + +import os +from typing import TYPE_CHECKING, Optional + +from elasticsearch.dsl import Document, SearchAsYouType, connections, mapped_field +from elasticsearch.dsl.query import MultiMatch + + +class Person(Document): + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: Optional[int] = mapped_field(default=None) + + name: str = mapped_field(SearchAsYouType(max_shingle_size=3), default="") + + class Index: + name = "test-search-as-you-type" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +def main() -> None: + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the empty index + Person.init() + + import pprint + + pprint.pprint(Person().to_dict(), indent=2) + + # index some sample data + names = [ + "Andy Warhol", + "Alphonse Mucha", + "Henri de Toulouse-Lautrec", + "Jára Cimrman", + ] + for id, name in enumerate(names): + Person(_id=id, name=name).save() + + # refresh index manually to make changes live + Person._index.refresh() + + # run some suggestions + for text in ("já", "Cimr", "toulouse", "Henri Tou", "a"): + s = Person.search() + + s.query = MultiMatch( # type: ignore[assignment] + query=text, + type="bool_prefix", + fields=["name", "name._2gram", "name._3gram"], + ) + + response = s.execute() + + # print out all the options we got + for h in response: + print("%15s: %25s" % (text, h.name)) + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/semantic_text.py b/examples/dsl/semantic_text.py new file mode 100644 index 000000000..aff2d8097 --- /dev/null +++ b/examples/dsl/semantic_text.py @@ -0,0 +1,147 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +""" +# Semantic Text example + +Requirements: + +$ pip install "elasticsearch-dsl" tqdm + +Before running this example, an ELSER inference endpoint must be created in the +Elasticsearch cluster. This can be done manually from Kibana, or with the +following curl command from a terminal: + +curl -X PUT \ + "$ELASTICSEARCH_URL/_inference/sparse_embedding/my-elser-endpoint" \ + -H "Content-Type: application/json" \ + -d '{"service":"elser","service_settings":{"num_allocations":1,"num_threads":1}}' + +To run the example: + +$ python semantic_text.py "text to search" + +The index will be created automatically if it does not exist. Add +`--recreate-index` to the command to regenerate it. + +The example dataset includes a selection of workplace documents. The +following are good example queries to try out with this dataset: + +$ python semantic_text.py "work from home" +$ python semantic_text.py "vacation time" +$ python semantic_text.py "can I bring a bird to work?" + +When the index is created, the inference service will split the documents into +short passages, and for each passage a sparse embedding will be generated using +Elastic's ELSER v2 model. +""" + +import argparse +import json +import os +from datetime import datetime +from typing import Any, Optional +from urllib.request import urlopen + +from tqdm import tqdm + +from elasticsearch import dsl + +DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json" + + +class WorkplaceDoc(dsl.Document): + class Index: + name = "workplace_documents_semantic" + + name: str + summary: str + content: Any = dsl.mapped_field( + dsl.field.SemanticText(inference_id="my-elser-endpoint") + ) + created: datetime + updated: Optional[datetime] + url: str = dsl.mapped_field(dsl.Keyword()) + category: str = dsl.mapped_field(dsl.Keyword()) + + +def create() -> None: + + # create the index + WorkplaceDoc._index.delete(ignore_unavailable=True) + WorkplaceDoc.init() + + # download the data + dataset = json.loads(urlopen(DATASET_URL).read()) + + # import the dataset + for data in tqdm(dataset, desc="Indexing documents..."): + doc = WorkplaceDoc( + name=data["name"], + summary=data["summary"], + content=data["content"], + created=data.get("created_on"), + updated=data.get("updated_at"), + url=data["url"], + category=data["category"], + ) + doc.save() + + # refresh the index + WorkplaceDoc._index.refresh() + + +def search(query: str) -> dsl.Search[WorkplaceDoc]: + search = WorkplaceDoc.search() + search = search[:5] + return search.query(dsl.query.Semantic(field=WorkplaceDoc.content, query=query)) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Vector database with Elasticsearch") + parser.add_argument( + "--recreate-index", action="store_true", help="Recreate and populate the index" + ) + parser.add_argument("query", action="store", help="The search query") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + + # initiate the default connection to elasticsearch + dsl.connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + if args.recreate_index or not WorkplaceDoc._index.exists(): + create() + + results = search(args.query) + + for hit in results: + print( + f"Document: {hit.name} [Category: {hit.category}] [Score: {hit.meta.score}]" + ) + print(f"Content: {hit.content.text}") + print("--------------------\n") + + # close the connection + dsl.connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/sparse_vectors.py b/examples/dsl/sparse_vectors.py new file mode 100644 index 000000000..01bb99178 --- /dev/null +++ b/examples/dsl/sparse_vectors.py @@ -0,0 +1,197 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +# Sparse vector database example + +Requirements: + +$ pip install nltk tqdm elasticsearch-dsl + +Before running this example, the ELSER v2 model must be downloaded and deployed +to the Elasticsearch cluster, and an ingest pipeline must be defined. This can +be done manually from Kibana, or with the following three curl commands from a +terminal, adjusting the endpoint as needed: + +curl -X PUT \ + "http://localhost:9200/_ml/trained_models/.elser_model_2?wait_for_completion" \ + -H "Content-Type: application/json" \ + -d '{"input":{"field_names":["text_field"]}}' +curl -X POST \ + "http://localhost:9200/_ml/trained_models/.elser_model_2/deployment/_start?wait_for=fully_allocated" +curl -X PUT \ + "http://localhost:9200/_ingest/pipeline/elser_ingest_pipeline" \ + -H "Content-Type: application/json" \ + -d '{"processors":[{"foreach":{"field":"passages","processor":{"inference":{"model_id":".elser_model_2","input_output":[{"input_field":"_ingest._value.content","output_field":"_ingest._value.embedding"}]}}}}]}' + +To run the example: + +$ python sparse_vectors.py "text to search" + +The index will be created automatically if it does not exist. Add +`--recreate-index` to regenerate it. + +The example dataset includes a selection of workplace documents. The +following are good example queries to try out with this dataset: + +$ python sparse_vectors.py "work from home" +$ python sparse_vectors.py "vacation time" +$ python sparse_vectors.py "can I bring a bird to work?" + +When the index is created, the documents are split into short passages, and for +each passage a sparse embedding is generated using Elastic's ELSER v2 model. +The documents that are returned as search results are those that have the +highest scored passages. Add `--show-inner-hits` to the command to see +individual passage results as well. +""" + +import argparse +import json +import os +from datetime import datetime +from typing import Any, Dict, List, Optional +from urllib.request import urlopen + +import nltk # type: ignore +from tqdm import tqdm + +from elasticsearch.dsl import ( + Document, + InnerDoc, + Keyword, + Q, + Search, + SparseVector, + connections, + mapped_field, +) + +DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json" + +# initialize sentence tokenizer +nltk.download("punkt_tab", quiet=True) + + +class Passage(InnerDoc): + content: Optional[str] + embedding: Dict[str, float] = mapped_field(SparseVector(), init=False) + + +class WorkplaceDoc(Document): + class Index: + name = "workplace_documents_sparse" + settings = {"default_pipeline": "elser_ingest_pipeline"} + + name: str + summary: str + content: str + created: datetime + updated: Optional[datetime] + url: str = mapped_field(Keyword()) + category: str = mapped_field(Keyword()) + passages: List[Passage] = mapped_field(default=[]) + + _model: Any = None + + def clean(self) -> None: + # split the content into sentences + passages = nltk.sent_tokenize(self.content) + + # generate an embedding for each passage and save it as a nested document + for passage in passages: + self.passages.append(Passage(content=passage)) + + +def create() -> None: + + # create the index + WorkplaceDoc._index.delete(ignore_unavailable=True) + WorkplaceDoc.init() + + # download the data + dataset = json.loads(urlopen(DATASET_URL).read()) + + # import the dataset + for data in tqdm(dataset, desc="Indexing documents..."): + doc = WorkplaceDoc( + name=data["name"], + summary=data["summary"], + content=data["content"], + created=data.get("created_on"), + updated=data.get("updated_at"), + url=data["url"], + category=data["category"], + ) + doc.save() + + +def search(query: str) -> Search[WorkplaceDoc]: + return WorkplaceDoc.search()[:5].query( + "nested", + path="passages", + query=Q( + "text_expansion", + passages__content={ + "model_id": ".elser_model_2", + "model_text": query, + }, + ), + inner_hits={"size": 2}, + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Vector database with Elasticsearch") + parser.add_argument( + "--recreate-index", action="store_true", help="Recreate and populate the index" + ) + parser.add_argument( + "--show-inner-hits", + action="store_true", + help="Show results for individual passages", + ) + parser.add_argument("query", action="store", help="The search query") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + if args.recreate_index or not WorkplaceDoc._index.exists(): + create() + + results = search(args.query) + + for hit in results: + print( + f"Document: {hit.name} [Category: {hit.category}] [Score: {hit.meta.score}]" + ) + print(f"Summary: {hit.summary}") + if args.show_inner_hits: + for passage in hit.meta.inner_hits["passages"]: + print(f" - [Score: {passage.meta.score}] {passage.content!r}") + print("") + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/vectors.py b/examples/dsl/vectors.py new file mode 100644 index 000000000..2567e2889 --- /dev/null +++ b/examples/dsl/vectors.py @@ -0,0 +1,186 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +# Vector database example + +Requirements: + +$ pip install nltk sentence_transformers tqdm elasticsearch-dsl + +To run the example: + +$ python vectors.py "text to search" + +The index will be created automatically if it does not exist. Add +`--recreate-index` to regenerate it. + +The example dataset includes a selection of workplace documents. The +following are good example queries to try out with this dataset: + +$ python vectors.py "work from home" +$ python vectors.py "vacation time" +$ python vectors.py "can I bring a bird to work?" + +When the index is created, the documents are split into short passages, and for +each passage an embedding is generated using the open source +"all-MiniLM-L6-v2" model. The documents that are returned as search results are +those that have the highest scored passages. Add `--show-inner-hits` to the +command to see individual passage results as well. +""" + +import argparse +import json +import os +from datetime import datetime +from typing import Any, List, Optional, cast +from urllib.request import urlopen + +import nltk # type: ignore +from sentence_transformers import SentenceTransformer +from tqdm import tqdm + +from elasticsearch.dsl import ( + DenseVector, + Document, + InnerDoc, + Keyword, + M, + Search, + connections, + mapped_field, +) + +DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json" +MODEL_NAME = "all-MiniLM-L6-v2" + +# initialize sentence tokenizer +nltk.download("punkt_tab", quiet=True) + +# this will be the embedding model +embedding_model: Any = None + + +class Passage(InnerDoc): + content: str + embedding: List[float] = mapped_field(DenseVector()) + + +class WorkplaceDoc(Document): + class Index: + name = "workplace_documents" + + name: str + summary: str + content: str + created: datetime + updated: Optional[datetime] + url: str = mapped_field(Keyword(required=True)) + category: str = mapped_field(Keyword(required=True)) + passages: M[List[Passage]] = mapped_field(default=[]) + + @classmethod + def get_embedding(cls, input: str) -> List[float]: + global embedding_model + if embedding_model is None: + embedding_model = SentenceTransformer(MODEL_NAME) + return cast(List[float], list(embedding_model.encode(input))) + + def clean(self) -> None: + # split the content into sentences + passages = cast(List[str], nltk.sent_tokenize(self.content)) + + # generate an embedding for each passage and save it as a nested document + for passage in passages: + self.passages.append( + Passage(content=passage, embedding=self.get_embedding(passage)) + ) + + +def create() -> None: + # create the index + WorkplaceDoc._index.delete(ignore_unavailable=True) + WorkplaceDoc.init() + + # download the data + dataset = json.loads(urlopen(DATASET_URL).read()) + + # import the dataset + for data in tqdm(dataset, desc="Indexing documents..."): + doc = WorkplaceDoc( + name=data["name"], + summary=data["summary"], + content=data["content"], + created=data.get("created_on"), + updated=data.get("updated_at"), + url=data["url"], + category=data["category"], + ) + doc.save() + + +def search(query: str) -> Search[WorkplaceDoc]: + return WorkplaceDoc.search().knn( + field=WorkplaceDoc.passages.embedding, + k=5, + num_candidates=50, + query_vector=list(WorkplaceDoc.get_embedding(query)), + inner_hits={"size": 2}, + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Vector database with Elasticsearch") + parser.add_argument( + "--recreate-index", action="store_true", help="Recreate and populate the index" + ) + parser.add_argument( + "--show-inner-hits", + action="store_true", + help="Show results for individual passages", + ) + parser.add_argument("query", action="store", help="The search query") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + if args.recreate_index or not WorkplaceDoc._index.exists(): + create() + + results = search(args.query) + + for hit in results: + print( + f"Document: {hit.name} [Category: {hit.category}] [Score: {hit.meta.score}]" + ) + print(f"Summary: {hit.summary}") + if args.show_inner_hits: + for passage in hit.meta.inner_hits["passages"]: + print(f" - [Score: {passage.meta.score}] {passage.content!r}") + print("") + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index b5f03e1d0..a023cd0e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,8 @@ keywords = [ dynamic = ["version"] dependencies = [ "elastic-transport>=8.15.1,<9", + "python-dateutil", + "typing-extensions", ] [project.optional-dependencies] @@ -109,7 +111,16 @@ packages = ["elasticsearch"] [tool.pytest.ini_options] junit_family = "legacy" xfail_strict = true -markers = "otel" +markers = [ + "otel", + "sync: mark a test as performing I/O without asyncio.", +] +filterwarnings = [ + "error", + "ignore:Legacy index templates are deprecated in favor of composable templates.:elasticsearch.exceptions.ElasticsearchWarning", + "ignore:datetime.datetime.utcfromtimestamp() is deprecated and scheduled for removal in a future version..*:DeprecationWarning", + "default:enable_cleanup_closed ignored.*:DeprecationWarning", +] [tool.isort] profile = "black" diff --git a/test_elasticsearch/test_dsl/__init__.py b/test_elasticsearch/test_dsl/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/_async/__init__.py b/test_elasticsearch/test_dsl/_async/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/_async/test_document.py b/test_elasticsearch/test_dsl/_async/test_document.py new file mode 100644 index 000000000..5fe2d326c --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/test_document.py @@ -0,0 +1,883 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this file creates several documents using bad or no types because +# these are still supported and should be kept functional in spite +# of not having appropriate type hints. For that reason the comment +# below disables many mypy checks that fails as a result of this. +# mypy: disable-error-code="assignment, index, arg-type, call-arg, operator, comparison-overlap, attr-defined" + +import codecs +import ipaddress +import pickle +import sys +from datetime import datetime +from hashlib import md5 +from typing import Any, ClassVar, Dict, List, Optional + +import pytest +from pytest import raises + +from elasticsearch.dsl import ( + AsyncDocument, + Index, + InnerDoc, + M, + Mapping, + MetaField, + Range, + analyzer, + field, + mapped_field, + utils, +) +from elasticsearch.dsl.document_base import InstrumentedField +from elasticsearch.dsl.exceptions import IllegalOperation, ValidationException + + +class MyInner(InnerDoc): + old_field = field.Text() + + +class MyDoc(AsyncDocument): + title = field.Keyword() + name = field.Text() + created_at = field.Date() + inner = field.Object(MyInner) + + +class MySubDoc(MyDoc): + name = field.Keyword() + + class Index: + name = "default-index" + + +class MyDoc2(AsyncDocument): + extra = field.Long() + + +class MyMultiSubDoc(MyDoc2, MySubDoc): + pass + + +class Comment(InnerDoc): + title = field.Text() + tags = field.Keyword(multi=True) + + +class DocWithNested(AsyncDocument): + comments = field.Nested(Comment) + + class Index: + name = "test-doc-with-nested" + + +class SimpleCommit(AsyncDocument): + files = field.Text(multi=True) + + class Index: + name = "test-git" + + +class Secret(str): + pass + + +class SecretField(field.CustomField): + builtin_type = "text" + + def _serialize(self, data: Any) -> Any: + return codecs.encode(data, "rot_13") + + def _deserialize(self, data: Any) -> Any: + if isinstance(data, Secret): + return data + return Secret(codecs.decode(data, "rot_13")) + + +class SecretDoc(AsyncDocument): + title = SecretField(index="no") + + class Index: + name = "test-secret-doc" + + +class NestedSecret(AsyncDocument): + secrets = field.Nested(SecretDoc) + + class Index: + name = "test-nested-secret" + + +class OptionalObjectWithRequiredField(AsyncDocument): + comments = field.Nested(properties={"title": field.Keyword(required=True)}) + + class Index: + name = "test-required" + + +class Host(AsyncDocument): + ip = field.Ip() + + class Index: + name = "test-host" + + +def test_range_serializes_properly() -> None: + class D(AsyncDocument): + lr: Range[int] = field.LongRange() + + d = D(lr=Range(lt=42)) + assert 40 in d.lr + assert 47 not in d.lr + assert {"lr": {"lt": 42}} == d.to_dict() + + d = D(lr={"lt": 42}) + assert {"lr": {"lt": 42}} == d.to_dict() + + +def test_range_deserializes_properly() -> None: + class D(InnerDoc): + lr = field.LongRange() + + d = D.from_es({"lr": {"lt": 42}}, True) + assert isinstance(d.lr, Range) + assert 40 in d.lr + assert 47 not in d.lr + + +def test_resolve_nested() -> None: + nested, field = NestedSecret._index.resolve_nested("secrets.title") + assert nested == ["secrets"] + assert field is NestedSecret._doc_type.mapping["secrets"]["title"] + + +def test_conflicting_mapping_raises_error_in_index_to_dict() -> None: + class A(AsyncDocument): + name = field.Text() + + class B(AsyncDocument): + name = field.Keyword() + + i = Index("i") + i.document(A) + i.document(B) + + with raises(ValueError): + i.to_dict() + + +def test_ip_address_serializes_properly() -> None: + host = Host(ip=ipaddress.IPv4Address("10.0.0.1")) + + assert {"ip": "10.0.0.1"} == host.to_dict() + + +def test_matches_uses_index() -> None: + assert SimpleCommit._matches({"_index": "test-git"}) + assert not SimpleCommit._matches({"_index": "not-test-git"}) + + +def test_matches_with_no_name_always_matches() -> None: + class D(AsyncDocument): + pass + + assert D._matches({}) + assert D._matches({"_index": "whatever"}) + + +def test_matches_accepts_wildcards() -> None: + class MyDoc(AsyncDocument): + class Index: + name = "my-*" + + assert MyDoc._matches({"_index": "my-index"}) + assert not MyDoc._matches({"_index": "not-my-index"}) + + +def test_assigning_attrlist_to_field() -> None: + sc = SimpleCommit() + l = ["README", "README.rst"] + sc.files = utils.AttrList(l) + + assert sc.to_dict()["files"] is l + + +def test_optional_inner_objects_are_not_validated_if_missing() -> None: + d = OptionalObjectWithRequiredField() + + d.full_clean() + + +def test_custom_field() -> None: + s = SecretDoc(title=Secret("Hello")) + + assert {"title": "Uryyb"} == s.to_dict() + assert s.title == "Hello" + + s = SecretDoc.from_es({"_source": {"title": "Uryyb"}}) + assert s.title == "Hello" + assert isinstance(s.title, Secret) + + +def test_custom_field_mapping() -> None: + assert { + "properties": {"title": {"index": "no", "type": "text"}} + } == SecretDoc._doc_type.mapping.to_dict() + + +def test_custom_field_in_nested() -> None: + s = NestedSecret() + s.secrets.append(SecretDoc(title=Secret("Hello"))) + + assert {"secrets": [{"title": "Uryyb"}]} == s.to_dict() + assert s.secrets[0].title == "Hello" + + +def test_multi_works_after_doc_has_been_saved() -> None: + c = SimpleCommit() + c.full_clean() + c.files.append("setup.py") + + assert c.to_dict() == {"files": ["setup.py"]} + + +def test_multi_works_in_nested_after_doc_has_been_serialized() -> None: + # Issue #359 + c = DocWithNested(comments=[Comment(title="First!")]) + + assert [] == c.comments[0].tags + assert {"comments": [{"title": "First!"}]} == c.to_dict() + assert [] == c.comments[0].tags + + +def test_null_value_for_object() -> None: + d = MyDoc(inner=None) + + assert d.inner is None + + +def test_inherited_doc_types_can_override_index() -> None: + class MyDocDifferentIndex(MySubDoc): + class Index: + name = "not-default-index" + settings = {"number_of_replicas": 0} + aliases: Dict[str, Any] = {"a": {}} + analyzers = [analyzer("my_analizer", tokenizer="keyword")] + + assert MyDocDifferentIndex._index._name == "not-default-index" + assert MyDocDifferentIndex()._get_index() == "not-default-index" + assert MyDocDifferentIndex._index.to_dict() == { + "aliases": {"a": {}}, + "mappings": { + "properties": { + "created_at": {"type": "date"}, + "inner": { + "type": "object", + "properties": {"old_field": {"type": "text"}}, + }, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + } + }, + "settings": { + "analysis": { + "analyzer": {"my_analizer": {"tokenizer": "keyword", "type": "custom"}} + }, + "number_of_replicas": 0, + }, + } + + +def test_to_dict_with_meta() -> None: + d = MySubDoc(title="hello") + d.meta.routing = "some-parent" + + assert { + "_index": "default-index", + "_routing": "some-parent", + "_source": {"title": "hello"}, + } == d.to_dict(True) + + +def test_to_dict_with_meta_includes_custom_index() -> None: + d = MySubDoc(title="hello") + d.meta.index = "other-index" + + assert {"_index": "other-index", "_source": {"title": "hello"}} == d.to_dict(True) + + +def test_to_dict_without_skip_empty_will_include_empty_fields() -> None: + d = MySubDoc(tags=[], title=None, inner={}) + + assert {} == d.to_dict() + assert {"tags": [], "title": None, "inner": {}} == d.to_dict(skip_empty=False) + + +def test_attribute_can_be_removed() -> None: + d = MyDoc(title="hello") + + del d.title + assert "title" not in d._d_ + + +def test_doc_type_can_be_correctly_pickled() -> None: + d = DocWithNested( + title="Hello World!", comments=[Comment(title="hellp")], meta={"id": 42} + ) + s = pickle.dumps(d) + + d2 = pickle.loads(s) + + assert d2 == d + assert 42 == d2.meta.id + assert "Hello World!" == d2.title + assert [{"title": "hellp"}] == d2.comments + assert isinstance(d2.comments[0], Comment) + + +def test_meta_is_accessible_even_on_empty_doc() -> None: + d = MyDoc() + d.meta + + d = MyDoc(title="aaa") + d.meta + + +def test_meta_field_mapping() -> None: + class User(AsyncDocument): + username = field.Text() + + class Meta: + all = MetaField(enabled=False) + _index = MetaField(enabled=True) + dynamic = MetaField("strict") + dynamic_templates = MetaField([42]) + + assert { + "properties": {"username": {"type": "text"}}, + "_all": {"enabled": False}, + "_index": {"enabled": True}, + "dynamic": "strict", + "dynamic_templates": [42], + } == User._doc_type.mapping.to_dict() + + +def test_multi_value_fields() -> None: + class Blog(AsyncDocument): + tags = field.Keyword(multi=True) + + b = Blog() + assert [] == b.tags + b.tags.append("search") + b.tags.append("python") + assert ["search", "python"] == b.tags + + +def test_docs_with_properties() -> None: + class User(AsyncDocument): + pwd_hash: str = field.Text() + + def check_password(self, pwd: bytes) -> bool: + return md5(pwd).hexdigest() == self.pwd_hash + + @property + def password(self) -> None: + raise AttributeError("readonly") + + @password.setter + def password(self, pwd: bytes) -> None: + self.pwd_hash = md5(pwd).hexdigest() + + u = User(pwd_hash=md5(b"secret").hexdigest()) + assert u.check_password(b"secret") + assert not u.check_password(b"not-secret") + + u.password = b"not-secret" + assert "password" not in u._d_ + assert not u.check_password(b"secret") + assert u.check_password(b"not-secret") + + with raises(AttributeError): + u.password + + +def test_nested_can_be_assigned_to() -> None: + d1 = DocWithNested(comments=[Comment(title="First!")]) + d2 = DocWithNested() + + d2.comments = d1.comments + assert isinstance(d1.comments[0], Comment) + assert d2.comments == [{"title": "First!"}] + assert {"comments": [{"title": "First!"}]} == d2.to_dict() + assert isinstance(d2.comments[0], Comment) + + +def test_nested_can_be_none() -> None: + d = DocWithNested(comments=None, title="Hello World!") + + assert {"title": "Hello World!"} == d.to_dict() + + +def test_nested_defaults_to_list_and_can_be_updated() -> None: + md = DocWithNested() + + assert [] == md.comments + + md.comments.append({"title": "hello World!"}) + assert {"comments": [{"title": "hello World!"}]} == md.to_dict() + + +def test_to_dict_is_recursive_and_can_cope_with_multi_values() -> None: + md = MyDoc(name=["a", "b", "c"]) + md.inner = [MyInner(old_field="of1"), MyInner(old_field="of2")] + + assert isinstance(md.inner[0], MyInner) + + assert { + "name": ["a", "b", "c"], + "inner": [{"old_field": "of1"}, {"old_field": "of2"}], + } == md.to_dict() + + +def test_to_dict_ignores_empty_collections() -> None: + md = MySubDoc(name="", address={}, count=0, valid=False, tags=[]) + + assert {"name": "", "count": 0, "valid": False} == md.to_dict() + + +def test_declarative_mapping_definition() -> None: + assert issubclass(MyDoc, AsyncDocument) + assert hasattr(MyDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "text"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MyDoc._doc_type.mapping.to_dict() + + +def test_you_can_supply_own_mapping_instance() -> None: + class MyD(AsyncDocument): + title = field.Text() + + class Meta: + mapping = Mapping() + mapping.meta("_all", enabled=False) + + assert { + "_all": {"enabled": False}, + "properties": {"title": {"type": "text"}}, + } == MyD._doc_type.mapping.to_dict() + + +def test_document_can_be_created_dynamically() -> None: + n = datetime.now() + md = MyDoc(title="hello") + md.name = "My Fancy Document!" + md.created_at = n + + inner = md.inner + # consistent returns + assert inner is md.inner + inner.old_field = "Already defined." + + md.inner.new_field = ["undefined", "field"] + + assert { + "title": "hello", + "name": "My Fancy Document!", + "created_at": n, + "inner": {"old_field": "Already defined.", "new_field": ["undefined", "field"]}, + } == md.to_dict() + + +def test_invalid_date_will_raise_exception() -> None: + md = MyDoc() + md.created_at = "not-a-date" + with raises(ValidationException): + md.full_clean() + + +def test_document_inheritance() -> None: + assert issubclass(MySubDoc, MyDoc) + assert issubclass(MySubDoc, AsyncDocument) + assert hasattr(MySubDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MySubDoc._doc_type.mapping.to_dict() + + +def test_child_class_can_override_parent() -> None: + class A(AsyncDocument): + o = field.Object(dynamic=False, properties={"a": field.Text()}) + + class B(A): + o = field.Object(dynamic="strict", properties={"b": field.Text()}) + + assert { + "properties": { + "o": { + "dynamic": "strict", + "properties": {"a": {"type": "text"}, "b": {"type": "text"}}, + "type": "object", + } + } + } == B._doc_type.mapping.to_dict() + + +def test_meta_fields_are_stored_in_meta_and_ignored_by_to_dict() -> None: + md = MySubDoc(meta={"id": 42}, name="My First doc!") + + md.meta.index = "my-index" + assert md.meta.index == "my-index" + assert md.meta.id == 42 + assert {"name": "My First doc!"} == md.to_dict() + assert {"id": 42, "index": "my-index"} == md.meta.to_dict() + + +def test_index_inheritance() -> None: + assert issubclass(MyMultiSubDoc, MySubDoc) + assert issubclass(MyMultiSubDoc, MyDoc2) + assert issubclass(MyMultiSubDoc, AsyncDocument) + assert hasattr(MyMultiSubDoc, "_doc_type") + assert hasattr(MyMultiSubDoc, "_index") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + "extra": {"type": "long"}, + } + } == MyMultiSubDoc._doc_type.mapping.to_dict() + + +def test_meta_fields_can_be_set_directly_in_init() -> None: + p = object() + md = MyDoc(_id=p, title="Hello World!") + + assert md.meta.id is p + + +@pytest.mark.asyncio +async def test_save_no_index(async_mock_client: Any) -> None: + md = MyDoc() + with raises(ValidationException): + await md.save(using="mock") + + +@pytest.mark.asyncio +async def test_delete_no_index(async_mock_client: Any) -> None: + md = MyDoc() + with raises(ValidationException): + await md.delete(using="mock") + + +@pytest.mark.asyncio +async def test_update_no_fields() -> None: + md = MyDoc() + with raises(IllegalOperation): + await md.update() + + +def test_search_with_custom_alias_and_index() -> None: + search_object = MyDoc.search( + using="staging", index=["custom_index1", "custom_index2"] + ) + + assert search_object._using == "staging" + assert search_object._index == ["custom_index1", "custom_index2"] + + +def test_from_es_respects_underscored_non_meta_fields() -> None: + doc = { + "_index": "test-index", + "_id": "elasticsearch", + "_score": 12.0, + "fields": {"hello": "world", "_routing": "es", "_tags": ["search"]}, + "_source": { + "city": "Amsterdam", + "name": "Elasticsearch", + "_tagline": "You know, for search", + }, + } + + class Company(AsyncDocument): + class Index: + name = "test-company" + + c = Company.from_es(doc) + + assert c.meta.fields._tags == ["search"] + assert c.meta.fields._routing == "es" + assert c._tagline == "You know, for search" + + +def test_nested_and_object_inner_doc() -> None: + class MySubDocWithNested(MyDoc): + nested_inner = field.Nested(MyInner) + + props = MySubDocWithNested._doc_type.mapping.to_dict()["properties"] + assert props == { + "created_at": {"type": "date"}, + "inner": {"properties": {"old_field": {"type": "text"}}, "type": "object"}, + "name": {"type": "text"}, + "nested_inner": { + "properties": {"old_field": {"type": "text"}}, + "type": "nested", + }, + "title": {"type": "keyword"}, + } + + +def test_doc_with_type_hints() -> None: + class TypedInnerDoc(InnerDoc): + st: M[str] + dt: M[Optional[datetime]] + li: M[List[int]] + + class TypedDoc(AsyncDocument): + st: str + dt: Optional[datetime] + li: List[int] + ob: TypedInnerDoc + ns: List[TypedInnerDoc] + ip: Optional[str] = field.Ip() + k1: str = field.Keyword(required=True) + k2: M[str] = field.Keyword() + k3: str = mapped_field(field.Keyword(), default="foo") + k4: M[Optional[str]] = mapped_field(field.Keyword()) # type: ignore[misc] + s1: Secret = SecretField() + s2: M[Secret] = SecretField() + s3: Secret = mapped_field(SecretField()) # type: ignore[misc] + s4: M[Optional[Secret]] = mapped_field( + SecretField(), default_factory=lambda: "foo" + ) + i1: ClassVar + i2: ClassVar[int] + + props = TypedDoc._doc_type.mapping.to_dict()["properties"] + assert props == { + "st": {"type": "text"}, + "dt": {"type": "date"}, + "li": {"type": "integer"}, + "ob": { + "type": "object", + "properties": { + "st": {"type": "text"}, + "dt": {"type": "date"}, + "li": {"type": "integer"}, + }, + }, + "ns": { + "type": "nested", + "properties": { + "st": {"type": "text"}, + "dt": {"type": "date"}, + "li": {"type": "integer"}, + }, + }, + "ip": {"type": "ip"}, + "k1": {"type": "keyword"}, + "k2": {"type": "keyword"}, + "k3": {"type": "keyword"}, + "k4": {"type": "keyword"}, + "s1": {"type": "text"}, + "s2": {"type": "text"}, + "s3": {"type": "text"}, + "s4": {"type": "text"}, + } + + TypedDoc.i1 = "foo" + TypedDoc.i2 = 123 + + doc = TypedDoc() + assert doc.k3 == "foo" + assert doc.s4 == "foo" + with raises(ValidationException) as exc_info: + doc.full_clean() + assert set(exc_info.value.args[0].keys()) == { + "st", + "k1", + "k2", + "ob", + "s1", + "s2", + "s3", + } + + assert TypedDoc.i1 == "foo" + assert TypedDoc.i2 == 123 + + doc.st = "s" + doc.li = [1, 2, 3] + doc.k1 = "k1" + doc.k2 = "k2" + doc.ob.st = "s" + doc.ob.li = [1] + doc.s1 = "s1" + doc.s2 = "s2" + doc.s3 = "s3" + doc.full_clean() + + doc.ob = TypedInnerDoc(li=[1]) + with raises(ValidationException) as exc_info: + doc.full_clean() + assert set(exc_info.value.args[0].keys()) == {"ob"} + assert set(exc_info.value.args[0]["ob"][0].args[0].keys()) == {"st"} + + doc.ob.st = "s" + doc.ns.append(TypedInnerDoc(li=[1, 2])) + with raises(ValidationException) as exc_info: + doc.full_clean() + + doc.ns[0].st = "s" + doc.full_clean() + + doc.ip = "1.2.3.4" + n = datetime.now() + doc.dt = n + assert doc.to_dict() == { + "st": "s", + "li": [1, 2, 3], + "dt": n, + "ob": { + "st": "s", + "li": [1], + }, + "ns": [ + { + "st": "s", + "li": [1, 2], + } + ], + "ip": "1.2.3.4", + "k1": "k1", + "k2": "k2", + "k3": "foo", + "s1": "s1", + "s2": "s2", + "s3": "s3", + "s4": "foo", + } + + s = TypedDoc.search().sort(TypedDoc.st, -TypedDoc.dt, +TypedDoc.ob.st) + s.aggs.bucket("terms_agg", "terms", field=TypedDoc.k1) + assert s.to_dict() == { + "aggs": {"terms_agg": {"terms": {"field": "k1"}}}, + "sort": ["st", {"dt": {"order": "desc"}}, "ob.st"], + } + + +@pytest.mark.skipif(sys.version_info < (3, 10), reason="requires Python 3.10") +def test_doc_with_pipe_type_hints() -> None: + with pytest.raises(TypeError): + + class BadlyTypedDoc(AsyncDocument): + s: str + f: str | int | None # type: ignore[syntax] + + class TypedDoc(AsyncDocument): + s: str + f1: str | None # type: ignore[syntax] + f2: M[int | None] # type: ignore[syntax] + f3: M[datetime | None] # type: ignore[syntax] + + props = TypedDoc._doc_type.mapping.to_dict()["properties"] + assert props == { + "s": {"type": "text"}, + "f1": {"type": "text"}, + "f2": {"type": "integer"}, + "f3": {"type": "date"}, + } + + doc = TypedDoc() + with raises(ValidationException) as exc_info: + doc.full_clean() + assert set(exc_info.value.args[0].keys()) == {"s"} + doc.s = "s" + doc.full_clean() + + +def test_instrumented_field() -> None: + class Child(InnerDoc): + st: M[str] + + class Doc(AsyncDocument): + st: str + ob: Child + ns: List[Child] + + doc = Doc( + st="foo", + ob=Child(st="bar"), + ns=[ + Child(st="baz"), + Child(st="qux"), + ], + ) + + assert type(doc.st) is str + assert doc.st == "foo" + + assert type(doc.ob) is Child + assert doc.ob.st == "bar" + + assert type(doc.ns) is utils.AttrList + assert doc.ns[0].st == "baz" + assert doc.ns[1].st == "qux" + assert type(doc.ns[0]) is Child + assert type(doc.ns[1]) is Child + + assert type(Doc.st) is InstrumentedField + assert str(Doc.st) == "st" + assert +Doc.st == "st" + assert -Doc.st == "-st" + assert Doc.st.to_dict() == {"type": "text"} + with raises(AttributeError): + Doc.st.something + + assert type(Doc.ob) is InstrumentedField + assert str(Doc.ob) == "ob" + assert str(Doc.ob.st) == "ob.st" + assert +Doc.ob.st == "ob.st" + assert -Doc.ob.st == "-ob.st" + assert Doc.ob.st.to_dict() == {"type": "text"} + with raises(AttributeError): + Doc.ob.something + with raises(AttributeError): + Doc.ob.st.something + + assert type(Doc.ns) is InstrumentedField + assert str(Doc.ns) == "ns" + assert str(Doc.ns.st) == "ns.st" + assert +Doc.ns.st == "ns.st" + assert -Doc.ns.st == "-ns.st" + assert Doc.ns.st.to_dict() == {"type": "text"} + with raises(AttributeError): + Doc.ns.something + with raises(AttributeError): + Doc.ns.st.something diff --git a/test_elasticsearch/test_dsl/_async/test_faceted_search.py b/test_elasticsearch/test_dsl/_async/test_faceted_search.py new file mode 100644 index 000000000..e3bd30850 --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/test_faceted_search.py @@ -0,0 +1,201 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +import pytest + +from elasticsearch.dsl.faceted_search import ( + AsyncFacetedSearch, + DateHistogramFacet, + TermsFacet, +) + + +class BlogSearch(AsyncFacetedSearch): + doc_types = ["user", "post"] + fields = [ + "title^5", + "body", + ] + + facets = { + "category": TermsFacet(field="category.raw"), + "tags": TermsFacet(field="tags"), + } + + +def test_query_is_created_properly() -> None: + bs = BlogSearch("python search") + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +def test_query_is_created_properly_with_sort_tuple() -> None: + bs = BlogSearch("python search", sort=("category", "-title")) + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + "sort": ["category", {"title": {"order": "desc"}}], + } == s.to_dict() + + +def test_filter_is_applied_to_search_but_not_relevant_facet() -> None: + bs = BlogSearch("python search", filters={"category": "elastic"}) + s = bs.build_search() + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["elastic"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "post_filter": {"terms": {"category.raw": ["elastic"]}}, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +def test_filters_are_applied_to_search_ant_relevant_facets() -> None: + bs = BlogSearch( + "python search", filters={"category": "elastic", "tags": ["python", "django"]} + ) + s = bs.build_search() + + d = s.to_dict() + + # we need to test post_filter without relying on order + f = d["post_filter"]["bool"].pop("must") + assert len(f) == 2 + assert {"terms": {"category.raw": ["elastic"]}} in f + assert {"terms": {"tags": ["python", "django"]}} in f + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["elastic"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"terms": {"tags": ["python", "django"]}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "post_filter": {"bool": {}}, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == d + + +def test_date_histogram_facet_with_1970_01_01_date() -> None: + dhf = DateHistogramFacet() + assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0) # type: ignore[arg-type] + assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0) # type: ignore[arg-type] + + +@pytest.mark.parametrize( + ["interval_type", "interval"], + [ + ("interval", "year"), + ("calendar_interval", "year"), + ("interval", "month"), + ("calendar_interval", "month"), + ("interval", "week"), + ("calendar_interval", "week"), + ("interval", "day"), + ("calendar_interval", "day"), + ("fixed_interval", "day"), + ("interval", "hour"), + ("fixed_interval", "hour"), + ("interval", "1Y"), + ("calendar_interval", "1Y"), + ("interval", "1M"), + ("calendar_interval", "1M"), + ("interval", "1w"), + ("calendar_interval", "1w"), + ("interval", "1d"), + ("calendar_interval", "1d"), + ("fixed_interval", "1d"), + ("interval", "1h"), + ("fixed_interval", "1h"), + ], +) +def test_date_histogram_interval_types(interval_type: str, interval: str) -> None: + dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval}) + assert dhf.get_aggregation().to_dict() == { + "date_histogram": { + "field": "@timestamp", + interval_type: interval, + "min_doc_count": 0, + } + } + dhf.get_value_filter(datetime.now()) + + +def test_date_histogram_no_interval_keyerror() -> None: + dhf = DateHistogramFacet(field="@timestamp") + with pytest.raises(KeyError) as e: + dhf.get_value_filter(datetime.now()) + assert str(e.value) == "'interval'" + + +def test_params_added_to_search() -> None: + bs = BlogSearch("python search") + assert bs._s._params == {} + bs.params(routing="42") + assert bs._s._params == {"routing": "42"} diff --git a/test_elasticsearch/test_dsl/_async/test_index.py b/test_elasticsearch/test_dsl/_async/test_index.py new file mode 100644 index 000000000..624bab79a --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/test_index.py @@ -0,0 +1,197 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import string +from random import choice +from typing import Any, Dict + +import pytest +from pytest import raises + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncIndex, + AsyncIndexTemplate, + Date, + Text, + analyzer, +) + + +class Post(AsyncDocument): + title = Text() + published_from = Date() + + +def test_multiple_doc_types_will_combine_mappings() -> None: + class User(AsyncDocument): + username = Text() + + i = AsyncIndex("i") + i.document(Post) + i.document(User) + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "username": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +def test_search_is_limited_to_index_name() -> None: + i = AsyncIndex("my-index") + s = i.search() + + assert s._index == ["my-index"] + + +def test_cloned_index_has_copied_settings_and_using() -> None: + client = object() + i = AsyncIndex("my-index", using=client) # type: ignore[arg-type] + i.settings(number_of_shards=1) + + i2 = i.clone("my-other-index") + + assert "my-other-index" == i2._name + assert client is i2._using + assert i._settings == i2._settings + assert i._settings is not i2._settings + + +def test_cloned_index_has_analysis_attribute() -> None: + """ + Regression test for Issue #582 in which `AsyncIndex.clone()` was not copying + over the `_analysis` attribute. + """ + client = object() + i = AsyncIndex("my-index", using=client) # type: ignore[arg-type] + + random_analyzer_name = "".join(choice(string.ascii_letters) for _ in range(100)) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + i.analyzer(random_analyzer) + + i2 = i.clone("my-clone-index") + + assert i.to_dict()["settings"]["analysis"] == i2.to_dict()["settings"]["analysis"] + + +def test_settings_are_saved() -> None: + i = AsyncIndex("i") + i.settings(number_of_replicas=0) + i.settings(number_of_shards=1) + + assert {"settings": {"number_of_shards": 1, "number_of_replicas": 0}} == i.to_dict() + + +def test_registered_doc_type_included_in_to_dict() -> None: + i = AsyncIndex("i", using="alias") + i.document(Post) + + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +def test_registered_doc_type_included_in_search() -> None: + i = AsyncIndex("i", using="alias") + i.document(Post) + + s = i.search() + + assert s._doc_type == [Post] + + +def test_aliases_add_to_object() -> None: + random_alias = "".join(choice(string.ascii_letters) for _ in range(100)) + alias_dict: Dict[str, Any] = {random_alias: {}} + + index = AsyncIndex("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == alias_dict + + +def test_aliases_returned_from_to_dict() -> None: + random_alias = "".join(choice(string.ascii_letters) for _ in range(100)) + alias_dict: Dict[str, Any] = {random_alias: {}} + + index = AsyncIndex("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == index.to_dict()["aliases"] == alias_dict + + +def test_analyzers_added_to_object() -> None: + random_analyzer_name = "".join(choice(string.ascii_letters) for _ in range(100)) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + index = AsyncIndex("i", using="alias") + index.analyzer(random_analyzer) + + assert index._analysis["analyzer"][random_analyzer_name] == { + "filter": ["standard"], + "type": "custom", + "tokenizer": "standard", + } + + +def test_analyzers_returned_from_to_dict() -> None: + random_analyzer_name = "".join(choice(string.ascii_letters) for _ in range(100)) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + index = AsyncIndex("i", using="alias") + index.analyzer(random_analyzer) + + assert index.to_dict()["settings"]["analysis"]["analyzer"][ + random_analyzer_name + ] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"} + + +def test_conflicting_analyzer_raises_error() -> None: + i = AsyncIndex("i") + i.analyzer("my_analyzer", tokenizer="whitespace", filter=["lowercase", "stop"]) + + with raises(ValueError): + i.analyzer("my_analyzer", tokenizer="keyword", filter=["lowercase", "stop"]) + + +def test_index_template_can_have_order() -> None: + i = AsyncIndex("i-*") + it = i.as_template("i", order=2) + + assert {"index_patterns": ["i-*"], "order": 2} == it.to_dict() + + +@pytest.mark.asyncio +async def test_index_template_save_result(async_mock_client: Any) -> None: + it = AsyncIndexTemplate("test-template", "test-*") + + assert await it.save(using="mock") == await async_mock_client.indices.put_template() diff --git a/test_elasticsearch/test_dsl/_async/test_mapping.py b/test_elasticsearch/test_dsl/_async/test_mapping.py new file mode 100644 index 000000000..93da49fae --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/test_mapping.py @@ -0,0 +1,222 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json + +from elasticsearch.dsl import AsyncMapping, Keyword, Nested, Text, analysis + + +def test_mapping_can_has_fields() -> None: + m = AsyncMapping() + m.field("name", "text").field("tags", "keyword") + + assert { + "properties": {"name": {"type": "text"}, "tags": {"type": "keyword"}} + } == m.to_dict() + + +def test_mapping_update_is_recursive() -> None: + m1 = AsyncMapping() + m1.field("title", "text") + m1.field("author", "object") + m1.field("author", "object", properties={"name": {"type": "text"}}) + m1.meta("_all", enabled=False) + m1.meta("dynamic", False) + + m2 = AsyncMapping() + m2.field("published_from", "date") + m2.field("author", "object", properties={"email": {"type": "text"}}) + m2.field("title", "text") + m2.field("lang", "keyword") + m2.meta("_analyzer", path="lang") + + m1.update(m2, update_only=True) + + assert { + "_all": {"enabled": False}, + "_analyzer": {"path": "lang"}, + "dynamic": False, + "properties": { + "published_from": {"type": "date"}, + "title": {"type": "text"}, + "lang": {"type": "keyword"}, + "author": { + "type": "object", + "properties": {"name": {"type": "text"}, "email": {"type": "text"}}, + }, + }, + } == m1.to_dict() + + +def test_properties_can_iterate_over_all_the_fields() -> None: + m = AsyncMapping() + m.field("f1", "text", test_attr="f1", fields={"f2": Keyword(test_attr="f2")}) + m.field("f3", Nested(test_attr="f3", properties={"f4": Text(test_attr="f4")})) + + assert {"f1", "f2", "f3", "f4"} == { + f.test_attr for f in m.properties._collect_fields() + } + + +def test_mapping_can_collect_all_analyzers_and_normalizers() -> None: + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer("english") + a3 = analysis.analyzer("unknown_custom") + a4 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + a5 = analysis.analyzer("my_analyzer3", tokenizer="keyword") + n1 = analysis.normalizer("my_normalizer1", filter=["lowercase"]) + n2 = analysis.normalizer( + "my_normalizer2", + filter=[ + "my_filter1", + "my_filter2", + analysis.token_filter("my_filter3", "stop", stopwords=["e", "f"]), + ], + ) + n3 = analysis.normalizer("unknown_custom") + + m = AsyncMapping() + m.field( + "title", + "text", + analyzer=a1, + fields={"english": Text(analyzer=a2), "unknown": Keyword(search_analyzer=a3)}, + ) + m.field("comments", Nested(properties={"author": Text(analyzer=a4)})) + m.field("normalized_title", "keyword", normalizer=n1) + m.field("normalized_comment", "keyword", normalizer=n2) + m.field("unknown", "keyword", normalizer=n3) + m.meta("_all", analyzer=a5) + + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + "my_analyzer3": {"tokenizer": "keyword", "type": "custom"}, + }, + "normalizer": { + "my_normalizer1": {"filter": ["lowercase"], "type": "custom"}, + "my_normalizer2": { + "filter": ["my_filter1", "my_filter2", "my_filter3"], + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + "my_filter3": {"stopwords": ["e", "f"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + assert json.loads(json.dumps(m.to_dict())) == m.to_dict() + + +def test_mapping_can_collect_multiple_analyzers() -> None: + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + m = AsyncMapping() + m.field("title", "text", analyzer=a1, search_analyzer=a2) + m.field( + "text", + "text", + analyzer=a1, + fields={ + "english": Text(analyzer=a1), + "unknown": Keyword(analyzer=a1, search_analyzer=a2), + }, + ) + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + +def test_even_non_custom_analyzers_can_have_params() -> None: + a1 = analysis.analyzer("whitespace", type="pattern", pattern=r"\\s+") + m = AsyncMapping() + m.field("title", "text", analyzer=a1) + + assert { + "analyzer": {"whitespace": {"type": "pattern", "pattern": r"\\s+"}} + } == m._collect_analysis() + + +def test_resolve_field_can_resolve_multifields() -> None: + m = AsyncMapping() + m.field("title", "text", fields={"keyword": Keyword()}) + + assert isinstance(m.resolve_field("title.keyword"), Keyword) + + +def test_resolve_nested() -> None: + m = AsyncMapping() + m.field("n1", "nested", properties={"n2": Nested(properties={"k1": Keyword()})}) + m.field("k2", "keyword") + + nested, field = m.resolve_nested("n1.n2.k1") + assert nested == ["n1", "n1.n2"] + assert isinstance(field, Keyword) + + nested, field = m.resolve_nested("k2") + assert nested == [] + assert isinstance(field, Keyword) diff --git a/test_elasticsearch/test_dsl/_async/test_search.py b/test_elasticsearch/test_dsl/_async/test_search.py new file mode 100644 index 000000000..a00ddf448 --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/test_search.py @@ -0,0 +1,841 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from copy import deepcopy +from typing import Any + +import pytest +from pytest import raises + +from elasticsearch.dsl import ( + AsyncEmptySearch, + AsyncSearch, + Document, + Q, + query, + types, + wrappers, +) +from elasticsearch.dsl.exceptions import IllegalOperation + + +def test_expand__to_dot_is_respected() -> None: + s = AsyncSearch().query("match", a__b=42, _expand__to_dot=False) + + assert {"query": {"match": {"a__b": 42}}} == s.to_dict() + + +@pytest.mark.asyncio +async def test_execute_uses_cache() -> None: + s = AsyncSearch() + r = object() + s._response = r # type: ignore[assignment] + + assert r is await s.execute() + + +@pytest.mark.asyncio +async def test_cache_can_be_ignored(async_mock_client: Any) -> None: + s = AsyncSearch(using="mock") + r = object() + s._response = r # type: ignore[assignment] + await s.execute(ignore_cache=True) + + async_mock_client.search.assert_awaited_once_with(index=None, body={}) + + +@pytest.mark.asyncio +async def test_iter_iterates_over_hits() -> None: + s = AsyncSearch() + s._response = [1, 2, 3] # type: ignore[assignment] + + assert [1, 2, 3] == [hit async for hit in s] + + +def test_cache_isnt_cloned() -> None: + s = AsyncSearch() + s._response = object() # type: ignore[assignment] + + assert not hasattr(s._clone(), "_response") + + +def test_search_starts_with_no_query() -> None: + s = AsyncSearch() + + assert s.query._proxied is None + + +def test_search_query_combines_query() -> None: + s = AsyncSearch() + + s2 = s.query("match", f=42) + assert s2.query._proxied == query.Match(f=42) + assert s.query._proxied is None + + s3 = s2.query("match", f=43) + assert s2.query._proxied == query.Match(f=42) + assert s3.query._proxied == query.Bool(must=[query.Match(f=42), query.Match(f=43)]) + + +def test_query_can_be_assigned_to() -> None: + s = AsyncSearch() + + q = Q("match", title="python") + s.query = q # type: ignore + + assert s.query._proxied is q + + +def test_query_can_be_wrapped() -> None: + s = AsyncSearch().query("match", title="python") + + s.query = Q("function_score", query=s.query, field_value_factor={"field": "rating"}) # type: ignore + + assert { + "query": { + "function_score": { + "functions": [{"field_value_factor": {"field": "rating"}}], + "query": {"match": {"title": "python"}}, + } + } + } == s.to_dict() + + +def test_using() -> None: + o = object() + o2 = object() + s = AsyncSearch(using=o) + assert s._using is o + s2 = s.using(o2) # type: ignore[arg-type] + assert s._using is o + assert s2._using is o2 + + +def test_methods_are_proxied_to_the_query() -> None: + s = AsyncSearch().query("match_all") + + assert s.query.to_dict() == {"match_all": {}} + + +def test_query_always_returns_search() -> None: + s = AsyncSearch() + + assert isinstance(s.query("match", f=42), AsyncSearch) + + +def test_source_copied_on_clone() -> None: + s = AsyncSearch().source(False) + assert s._clone()._source == s._source + assert s._clone()._source is False + + s2 = AsyncSearch().source([]) + assert s2._clone()._source == s2._source + assert s2._source == [] + + s3 = AsyncSearch().source(["some", "fields"]) + assert s3._clone()._source == s3._source + assert s3._clone()._source == ["some", "fields"] + + +def test_copy_clones() -> None: + from copy import copy + + s1 = AsyncSearch().source(["some", "fields"]) + s2 = copy(s1) + + assert s1 == s2 + assert s1 is not s2 + + +def test_aggs_allow_two_metric() -> None: + s = AsyncSearch() + + s.aggs.metric("a", "max", field="a").metric("b", "max", field="b") + + assert s.to_dict() == { + "aggs": {"a": {"max": {"field": "a"}}, "b": {"max": {"field": "b"}}} + } + + +def test_aggs_get_copied_on_change() -> None: + s = AsyncSearch().query("match_all") + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + + s2 = s.query("match_all") + s2.aggs.bucket("per_month", "date_histogram", field="date", interval="month") + s3 = s2.query("match_all") + s3.aggs["per_month"].metric("max_score", "max", field="score") + s4 = s3._clone() + s4.aggs.metric("max_score", "max", field="score") + + d: Any = { + "query": {"match_all": {}}, + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + } + + assert d == s.to_dict() + d["aggs"]["per_month"] = {"date_histogram": {"field": "date", "interval": "month"}} + assert d == s2.to_dict() + d["aggs"]["per_month"]["aggs"] = {"max_score": {"max": {"field": "score"}}} + assert d == s3.to_dict() + d["aggs"]["max_score"] = {"max": {"field": "score"}} + assert d == s4.to_dict() + + +def test_search_index() -> None: + s = AsyncSearch(index="i") + assert s._index == ["i"] + s = s.index("i2") + assert s._index == ["i", "i2"] + s = s.index("i3") + assert s._index == ["i", "i2", "i3"] + s = s.index() + assert s._index is None + s = AsyncSearch(index=("i", "i2")) + assert s._index == ["i", "i2"] + s = AsyncSearch(index=["i", "i2"]) + assert s._index == ["i", "i2"] + s = AsyncSearch() + s = s.index("i", "i2") + assert s._index == ["i", "i2"] + s2 = s.index("i3") + assert s._index == ["i", "i2"] + assert s2._index == ["i", "i2", "i3"] + s = AsyncSearch() + s = s.index(["i", "i2"], "i3") + assert s._index == ["i", "i2", "i3"] + s2 = s.index("i4") + assert s._index == ["i", "i2", "i3"] + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(["i4"]) + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(("i4", "i5")) + assert s2._index == ["i", "i2", "i3", "i4", "i5"] + + +def test_doc_type_document_class() -> None: + class MyDocument(Document): + pass + + s = AsyncSearch(doc_type=MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + s = AsyncSearch().doc_type(MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + +def test_knn() -> None: + s = AsyncSearch() + + with raises(TypeError): + s.knn() # type: ignore[call-arg] + with raises(TypeError): + s.knn("field") # type: ignore[call-arg] + with raises(TypeError): + s.knn("field", 5) # type: ignore[call-arg] + with raises(ValueError): + s.knn("field", 5, 100) + with raises(ValueError): + s.knn("field", 5, 100, query_vector=[1, 2, 3], query_vector_builder={}) + + s = s.knn("field", 5, 100, query_vector=[1, 2, 3]) + assert { + "knn": { + "field": "field", + "k": 5, + "num_candidates": 100, + "query_vector": [1, 2, 3], + } + } == s.to_dict() + + s = s.knn( + k=4, + num_candidates=40, + boost=0.8, + field="name", + query_vector_builder={ + "text_embedding": {"model_id": "foo", "model_text": "search text"} + }, + inner_hits={"size": 1}, + ) + assert { + "knn": [ + { + "field": "field", + "k": 5, + "num_candidates": 100, + "query_vector": [1, 2, 3], + }, + { + "field": "name", + "k": 4, + "num_candidates": 40, + "query_vector_builder": { + "text_embedding": {"model_id": "foo", "model_text": "search text"} + }, + "boost": 0.8, + "inner_hits": {"size": 1}, + }, + ] + } == s.to_dict() + + +def test_rank() -> None: + s = AsyncSearch() + s.rank(rrf=False) + assert {} == s.to_dict() + + s = s.rank(rrf=True) + assert {"rank": {"rrf": {}}} == s.to_dict() + + s = s.rank(rrf={"window_size": 50, "rank_constant": 20}) + assert {"rank": {"rrf": {"window_size": 50, "rank_constant": 20}}} == s.to_dict() + + +def test_sort() -> None: + s = AsyncSearch() + s = s.sort("fielda", "-fieldb") + + assert ["fielda", {"fieldb": {"order": "desc"}}] == s._sort + assert {"sort": ["fielda", {"fieldb": {"order": "desc"}}]} == s.to_dict() + + s = s.sort() + assert [] == s._sort + assert AsyncSearch().to_dict() == s.to_dict() + + +def test_sort_by_score() -> None: + s = AsyncSearch() + s = s.sort("_score") + assert {"sort": ["_score"]} == s.to_dict() + + s = AsyncSearch() + with raises(IllegalOperation): + s.sort("-_score") + + +def test_collapse() -> None: + s = AsyncSearch() + + inner_hits = {"name": "most_recent", "size": 5, "sort": [{"@timestamp": "desc"}]} + s = s.collapse("user.id", inner_hits=inner_hits, max_concurrent_group_searches=4) + + assert { + "field": "user.id", + "inner_hits": { + "name": "most_recent", + "size": 5, + "sort": [{"@timestamp": "desc"}], + }, + "max_concurrent_group_searches": 4, + } == s._collapse + assert { + "collapse": { + "field": "user.id", + "inner_hits": { + "name": "most_recent", + "size": 5, + "sort": [{"@timestamp": "desc"}], + }, + "max_concurrent_group_searches": 4, + } + } == s.to_dict() + + s = s.collapse() + assert {} == s._collapse + assert AsyncSearch().to_dict() == s.to_dict() + + +def test_slice() -> None: + s = AsyncSearch() + assert {"from": 3, "size": 7} == s[3:10].to_dict() + assert {"size": 5} == s[:5].to_dict() + assert {"from": 3} == s[3:].to_dict() + assert {"from": 0, "size": 0} == s[0:0].to_dict() + assert {"from": 20, "size": 0} == s[20:0].to_dict() + assert {"from": 10, "size": 5} == s[10:][:5].to_dict() + assert {"from": 10, "size": 0} == s[:5][10:].to_dict() + assert {"size": 10} == s[:10][:40].to_dict() + assert {"size": 10} == s[:40][:10].to_dict() + assert {"size": 40} == s[:40][:80].to_dict() + assert {"from": 12, "size": 0} == s[:5][10:][2:].to_dict() + assert {"from": 15, "size": 0} == s[10:][:5][5:].to_dict() + assert {} == s[:].to_dict() + with raises(ValueError): + s[-1:] + with raises(ValueError): + s[4:-1] + with raises(ValueError): + s[-3:-2] + + +def test_index() -> None: + s = AsyncSearch() + assert {"from": 3, "size": 1} == s[3].to_dict() + assert {"from": 3, "size": 1} == s[3][0].to_dict() + assert {"from": 8, "size": 0} == s[3][5].to_dict() + assert {"from": 4, "size": 1} == s[3:10][1].to_dict() + with raises(ValueError): + s[-3] + + +def test_search_to_dict() -> None: + s = AsyncSearch() + assert {} == s.to_dict() + + s = s.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == s.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == s.to_dict(size=10) + + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + d = { + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + "query": {"match": {"f": 42}}, + } + assert d == s.to_dict() + + s = AsyncSearch(extra={"size": 5}) + assert {"size": 5} == s.to_dict() + s = s.extra(from_=42) + assert {"size": 5, "from": 42} == s.to_dict() + + +def test_complex_example() -> None: + s = AsyncSearch() + s = ( + s.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .collapse("user_id") + .post_filter("terms", tags=["prague", "czech"]) + .script_fields(more_attendees="doc['attendees'].value + 42") + ) + + s.aggs.bucket("per_country", "terms", field="country").metric( + "avg_attendees", "avg", field="attendees" + ) + + s.query.minimum_should_match = 2 + + s = s.highlight_options(order="score").highlight("title", "body", fragment_size=50) + + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "post_filter": {"terms": {"tags": ["prague", "czech"]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "collapse": {"field": "user_id"}, + "highlight": { + "order": "score", + "fields": {"title": {"fragment_size": 50}, "body": {"fragment_size": 50}}, + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } == s.to_dict() + + +def test_reverse() -> None: + d = { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [ + { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + } + ], + } + }, + "post_filter": {"bool": {"must": [{"terms": {"tags": ["prague", "czech"]}}]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "sort": ["title", {"category": {"order": "desc"}}, "_score"], + "size": 5, + "highlight": {"order": "score", "fields": {"title": {"fragment_size": 50}}}, + "suggest": { + "my-title-suggestions-1": { + "text": "devloping distibutd saerch engies", + "term": {"size": 3, "field": "title"}, + } + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } + + d2 = deepcopy(d) + + s = AsyncSearch.from_dict(d) + + # make sure we haven't modified anything in place + assert d == d2 + assert {"size": 5} == s._extra + assert d == s.to_dict() + + +def test_code_generated_classes() -> None: + s = AsyncSearch() + s = ( + s.query(query.Match("title", types.MatchQuery(query="python"))) + .query(~query.Match("title", types.MatchQuery(query="ruby"))) + .query( + query.Knn( + field="title", + query_vector=[1.0, 2.0, 3.0], + num_candidates=10, + k=3, + filter=query.Range("year", wrappers.Range(gt="2004")), + ) + ) + .filter( + query.Term("category", types.TermQuery(value="meetup")) + | query.Term("category", types.TermQuery(value="conference")) + ) + .collapse("user_id") + .post_filter(query.Terms(tags=["prague", "czech"])) + .script_fields(more_attendees="doc['attendees'].value + 42") + ) + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": {"value": "meetup"}}}, + {"term": {"category": {"value": "conference"}}}, + ] + } + } + ], + "must": [ + {"match": {"title": {"query": "python"}}}, + { + "knn": { + "field": "title", + "filter": [ + { + "range": { + "year": { + "gt": "2004", + }, + }, + }, + ], + "k": 3, + "num_candidates": 10, + "query_vector": [ + 1.0, + 2.0, + 3.0, + ], + }, + }, + ], + "must_not": [{"match": {"title": {"query": "ruby"}}}], + } + }, + "post_filter": {"terms": {"tags": ["prague", "czech"]}}, + "collapse": {"field": "user_id"}, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } == s.to_dict() + + +def test_from_dict_doesnt_need_query() -> None: + s = AsyncSearch.from_dict({"size": 5}) + + assert {"size": 5} == s.to_dict() + + +@pytest.mark.asyncio +async def test_params_being_passed_to_search(async_mock_client: Any) -> None: + s = AsyncSearch(using="mock") + s = s.params(routing="42") + await s.execute() + + async_mock_client.search.assert_awaited_once_with(index=None, body={}, routing="42") + + +def test_source() -> None: + assert {} == AsyncSearch().source().to_dict() + + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]} + } == AsyncSearch().source(includes=["foo.bar.*"], excludes=("foo.one",)).to_dict() + + assert {"_source": False} == AsyncSearch().source(False).to_dict() + + assert {"_source": ["f1", "f2"]} == AsyncSearch().source( + includes=["foo.bar.*"], excludes=["foo.one"] + ).source(["f1", "f2"]).to_dict() + + +def test_source_on_clone() -> None: + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]}, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == AsyncSearch().source(includes=["foo.bar.*"]).source( + excludes=["foo.one"] + ).filter( + "term", title="python" + ).to_dict() + assert { + "_source": False, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == AsyncSearch().source(False).filter("term", title="python").to_dict() + + +def test_source_on_clear() -> None: + assert ( + {} + == AsyncSearch() + .source(includes=["foo.bar.*"]) + .source(includes=None, excludes=None) + .to_dict() + ) + + +def test_suggest_accepts_global_text() -> None: + s = AsyncSearch.from_dict( + { + "suggest": { + "text": "the amsterdma meetpu", + "my-suggest-1": {"term": {"field": "title"}}, + "my-suggest-2": {"text": "other", "term": {"field": "body"}}, + } + } + ) + + assert { + "suggest": { + "my-suggest-1": { + "term": {"field": "title"}, + "text": "the amsterdma meetpu", + }, + "my-suggest-2": {"term": {"field": "body"}, "text": "other"}, + } + } == s.to_dict() + + +def test_suggest() -> None: + s = AsyncSearch() + s = s.suggest("my_suggestion", "pyhton", term={"field": "title"}) + + assert { + "suggest": {"my_suggestion": {"term": {"field": "title"}, "text": "pyhton"}} + } == s.to_dict() + + +def test_exclude() -> None: + s = AsyncSearch() + s = s.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == s.to_dict() + + +@pytest.mark.asyncio +async def test_delete_by_query(async_mock_client: Any) -> None: + s = AsyncSearch(using="mock", index="i").query("match", lang="java") + await s.delete() + + async_mock_client.delete_by_query.assert_awaited_once_with( + index=["i"], body={"query": {"match": {"lang": "java"}}} + ) + + +def test_update_from_dict() -> None: + s = AsyncSearch() + s.update_from_dict({"indices_boost": [{"important-documents": 2}]}) + s.update_from_dict({"_source": ["id", "name"]}) + s.update_from_dict({"collapse": {"field": "user_id"}}) + + assert { + "indices_boost": [{"important-documents": 2}], + "_source": ["id", "name"], + "collapse": {"field": "user_id"}, + } == s.to_dict() + + +def test_rescore_query_to_dict() -> None: + s = AsyncSearch(index="index-name") + + positive_query = Q( + "function_score", + query=Q("term", tags="a"), + script_score={"script": "_score * 1"}, + ) + + negative_query = Q( + "function_score", + query=Q("term", tags="b"), + script_score={"script": "_score * -100"}, + ) + + s = s.query(positive_query) + s = s.extra( + rescore={"window_size": 100, "query": {"rescore_query": negative_query}} + ) + assert s.to_dict() == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 100, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "b"}}, + "functions": [{"script_score": {"script": "_score * -100"}}], + } + } + }, + }, + } + + assert s.to_dict( + rescore={"window_size": 10, "query": {"rescore_query": positive_query}} + ) == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 10, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + } + }, + }, + } + + +@pytest.mark.asyncio +async def test_empty_search() -> None: + s = AsyncEmptySearch(index="index-name") + s = s.query("match", lang="java") + s.aggs.bucket("versions", "terms", field="version") + + assert await s.count() == 0 + assert [hit async for hit in s] == [] + assert [hit async for hit in s.scan()] == [] + await s.delete() # should not error + + +def test_suggest_completion() -> None: + s = AsyncSearch() + s = s.suggest("my_suggestion", "pyhton", completion={"field": "title"}) + + assert { + "suggest": { + "my_suggestion": {"completion": {"field": "title"}, "prefix": "pyhton"} + } + } == s.to_dict() + + +def test_suggest_regex_query() -> None: + s = AsyncSearch() + s = s.suggest("my_suggestion", regex="py[thon|py]", completion={"field": "title"}) + + assert { + "suggest": { + "my_suggestion": {"completion": {"field": "title"}, "regex": "py[thon|py]"} + } + } == s.to_dict() + + +def test_suggest_must_pass_text_or_regex() -> None: + s = AsyncSearch() + with raises(ValueError): + s.suggest("my_suggestion") + + +def test_suggest_can_only_pass_text_or_regex() -> None: + s = AsyncSearch() + with raises(ValueError): + s.suggest("my_suggestion", text="python", regex="py[hton|py]") + + +def test_suggest_regex_must_be_wtih_completion() -> None: + s = AsyncSearch() + with raises(ValueError): + s.suggest("my_suggestion", regex="py[thon|py]") diff --git a/test_elasticsearch/test_dsl/_async/test_update_by_query.py b/test_elasticsearch/test_dsl/_async/test_update_by_query.py new file mode 100644 index 000000000..9253623dc --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/test_update_by_query.py @@ -0,0 +1,180 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from copy import deepcopy +from typing import Any + +import pytest + +from elasticsearch.dsl import AsyncUpdateByQuery, Q +from elasticsearch.dsl.response import UpdateByQueryResponse +from elasticsearch.dsl.search_base import SearchBase + + +def test_ubq_starts_with_no_query() -> None: + ubq = AsyncUpdateByQuery() + + assert ubq.query._proxied is None + + +def test_ubq_to_dict() -> None: + ubq = AsyncUpdateByQuery() + assert {} == ubq.to_dict() + + ubq = ubq.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == ubq.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == ubq.to_dict(size=10) + + ubq = AsyncUpdateByQuery(extra={"size": 5}) + assert {"size": 5} == ubq.to_dict() + + ubq = AsyncUpdateByQuery(extra={"extra_q": Q("term", category="conference")}) + assert {"extra_q": {"term": {"category": "conference"}}} == ubq.to_dict() + + +def test_complex_example() -> None: + ubq = AsyncUpdateByQuery() + ubq = ( + ubq.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + ) + + ubq.query.minimum_should_match = 2 + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } == ubq.to_dict() + + +def test_exclude() -> None: + ubq = AsyncUpdateByQuery() + ubq = ubq.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == ubq.to_dict() + + +def test_reverse() -> None: + d = { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [ + { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + } + ], + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } + + d2 = deepcopy(d) + + ubq = AsyncUpdateByQuery.from_dict(d) + + assert d == d2 + assert d == ubq.to_dict() + + +def test_from_dict_doesnt_need_query() -> None: + ubq = AsyncUpdateByQuery.from_dict({"script": {"source": "test"}}) + + assert {"script": {"source": "test"}} == ubq.to_dict() + + +@pytest.mark.asyncio +async def test_params_being_passed_to_search(async_mock_client: Any) -> None: + ubq = AsyncUpdateByQuery(using="mock", index="i") + ubq = ubq.params(routing="42") + await ubq.execute() + + async_mock_client.update_by_query.assert_called_once_with(index=["i"], routing="42") + + +def test_overwrite_script() -> None: + ubq = AsyncUpdateByQuery() + ubq = ubq.script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + assert { + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + } + } == ubq.to_dict() + ubq = ubq.script(source="ctx._source.likes++") + assert {"script": {"source": "ctx._source.likes++"}} == ubq.to_dict() + + +def test_update_by_query_response_success() -> None: + ubqr = UpdateByQueryResponse(SearchBase(), {"timed_out": False, "failures": []}) + assert ubqr.success() + + ubqr = UpdateByQueryResponse(SearchBase(), {"timed_out": True, "failures": []}) + assert not ubqr.success() + + ubqr = UpdateByQueryResponse(SearchBase(), {"timed_out": False, "failures": [{}]}) + assert not ubqr.success() diff --git a/test_elasticsearch/test_dsl/_sync/__init__.py b/test_elasticsearch/test_dsl/_sync/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/_sync/test_document.py b/test_elasticsearch/test_dsl/_sync/test_document.py new file mode 100644 index 000000000..05ad9d623 --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/test_document.py @@ -0,0 +1,883 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this file creates several documents using bad or no types because +# these are still supported and should be kept functional in spite +# of not having appropriate type hints. For that reason the comment +# below disables many mypy checks that fails as a result of this. +# mypy: disable-error-code="assignment, index, arg-type, call-arg, operator, comparison-overlap, attr-defined" + +import codecs +import ipaddress +import pickle +import sys +from datetime import datetime +from hashlib import md5 +from typing import Any, ClassVar, Dict, List, Optional + +import pytest +from pytest import raises + +from elasticsearch.dsl import ( + Document, + Index, + InnerDoc, + M, + Mapping, + MetaField, + Range, + analyzer, + field, + mapped_field, + utils, +) +from elasticsearch.dsl.document_base import InstrumentedField +from elasticsearch.dsl.exceptions import IllegalOperation, ValidationException + + +class MyInner(InnerDoc): + old_field = field.Text() + + +class MyDoc(Document): + title = field.Keyword() + name = field.Text() + created_at = field.Date() + inner = field.Object(MyInner) + + +class MySubDoc(MyDoc): + name = field.Keyword() + + class Index: + name = "default-index" + + +class MyDoc2(Document): + extra = field.Long() + + +class MyMultiSubDoc(MyDoc2, MySubDoc): + pass + + +class Comment(InnerDoc): + title = field.Text() + tags = field.Keyword(multi=True) + + +class DocWithNested(Document): + comments = field.Nested(Comment) + + class Index: + name = "test-doc-with-nested" + + +class SimpleCommit(Document): + files = field.Text(multi=True) + + class Index: + name = "test-git" + + +class Secret(str): + pass + + +class SecretField(field.CustomField): + builtin_type = "text" + + def _serialize(self, data: Any) -> Any: + return codecs.encode(data, "rot_13") + + def _deserialize(self, data: Any) -> Any: + if isinstance(data, Secret): + return data + return Secret(codecs.decode(data, "rot_13")) + + +class SecretDoc(Document): + title = SecretField(index="no") + + class Index: + name = "test-secret-doc" + + +class NestedSecret(Document): + secrets = field.Nested(SecretDoc) + + class Index: + name = "test-nested-secret" + + +class OptionalObjectWithRequiredField(Document): + comments = field.Nested(properties={"title": field.Keyword(required=True)}) + + class Index: + name = "test-required" + + +class Host(Document): + ip = field.Ip() + + class Index: + name = "test-host" + + +def test_range_serializes_properly() -> None: + class D(Document): + lr: Range[int] = field.LongRange() + + d = D(lr=Range(lt=42)) + assert 40 in d.lr + assert 47 not in d.lr + assert {"lr": {"lt": 42}} == d.to_dict() + + d = D(lr={"lt": 42}) + assert {"lr": {"lt": 42}} == d.to_dict() + + +def test_range_deserializes_properly() -> None: + class D(InnerDoc): + lr = field.LongRange() + + d = D.from_es({"lr": {"lt": 42}}, True) + assert isinstance(d.lr, Range) + assert 40 in d.lr + assert 47 not in d.lr + + +def test_resolve_nested() -> None: + nested, field = NestedSecret._index.resolve_nested("secrets.title") + assert nested == ["secrets"] + assert field is NestedSecret._doc_type.mapping["secrets"]["title"] + + +def test_conflicting_mapping_raises_error_in_index_to_dict() -> None: + class A(Document): + name = field.Text() + + class B(Document): + name = field.Keyword() + + i = Index("i") + i.document(A) + i.document(B) + + with raises(ValueError): + i.to_dict() + + +def test_ip_address_serializes_properly() -> None: + host = Host(ip=ipaddress.IPv4Address("10.0.0.1")) + + assert {"ip": "10.0.0.1"} == host.to_dict() + + +def test_matches_uses_index() -> None: + assert SimpleCommit._matches({"_index": "test-git"}) + assert not SimpleCommit._matches({"_index": "not-test-git"}) + + +def test_matches_with_no_name_always_matches() -> None: + class D(Document): + pass + + assert D._matches({}) + assert D._matches({"_index": "whatever"}) + + +def test_matches_accepts_wildcards() -> None: + class MyDoc(Document): + class Index: + name = "my-*" + + assert MyDoc._matches({"_index": "my-index"}) + assert not MyDoc._matches({"_index": "not-my-index"}) + + +def test_assigning_attrlist_to_field() -> None: + sc = SimpleCommit() + l = ["README", "README.rst"] + sc.files = utils.AttrList(l) + + assert sc.to_dict()["files"] is l + + +def test_optional_inner_objects_are_not_validated_if_missing() -> None: + d = OptionalObjectWithRequiredField() + + d.full_clean() + + +def test_custom_field() -> None: + s = SecretDoc(title=Secret("Hello")) + + assert {"title": "Uryyb"} == s.to_dict() + assert s.title == "Hello" + + s = SecretDoc.from_es({"_source": {"title": "Uryyb"}}) + assert s.title == "Hello" + assert isinstance(s.title, Secret) + + +def test_custom_field_mapping() -> None: + assert { + "properties": {"title": {"index": "no", "type": "text"}} + } == SecretDoc._doc_type.mapping.to_dict() + + +def test_custom_field_in_nested() -> None: + s = NestedSecret() + s.secrets.append(SecretDoc(title=Secret("Hello"))) + + assert {"secrets": [{"title": "Uryyb"}]} == s.to_dict() + assert s.secrets[0].title == "Hello" + + +def test_multi_works_after_doc_has_been_saved() -> None: + c = SimpleCommit() + c.full_clean() + c.files.append("setup.py") + + assert c.to_dict() == {"files": ["setup.py"]} + + +def test_multi_works_in_nested_after_doc_has_been_serialized() -> None: + # Issue #359 + c = DocWithNested(comments=[Comment(title="First!")]) + + assert [] == c.comments[0].tags + assert {"comments": [{"title": "First!"}]} == c.to_dict() + assert [] == c.comments[0].tags + + +def test_null_value_for_object() -> None: + d = MyDoc(inner=None) + + assert d.inner is None + + +def test_inherited_doc_types_can_override_index() -> None: + class MyDocDifferentIndex(MySubDoc): + class Index: + name = "not-default-index" + settings = {"number_of_replicas": 0} + aliases: Dict[str, Any] = {"a": {}} + analyzers = [analyzer("my_analizer", tokenizer="keyword")] + + assert MyDocDifferentIndex._index._name == "not-default-index" + assert MyDocDifferentIndex()._get_index() == "not-default-index" + assert MyDocDifferentIndex._index.to_dict() == { + "aliases": {"a": {}}, + "mappings": { + "properties": { + "created_at": {"type": "date"}, + "inner": { + "type": "object", + "properties": {"old_field": {"type": "text"}}, + }, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + } + }, + "settings": { + "analysis": { + "analyzer": {"my_analizer": {"tokenizer": "keyword", "type": "custom"}} + }, + "number_of_replicas": 0, + }, + } + + +def test_to_dict_with_meta() -> None: + d = MySubDoc(title="hello") + d.meta.routing = "some-parent" + + assert { + "_index": "default-index", + "_routing": "some-parent", + "_source": {"title": "hello"}, + } == d.to_dict(True) + + +def test_to_dict_with_meta_includes_custom_index() -> None: + d = MySubDoc(title="hello") + d.meta.index = "other-index" + + assert {"_index": "other-index", "_source": {"title": "hello"}} == d.to_dict(True) + + +def test_to_dict_without_skip_empty_will_include_empty_fields() -> None: + d = MySubDoc(tags=[], title=None, inner={}) + + assert {} == d.to_dict() + assert {"tags": [], "title": None, "inner": {}} == d.to_dict(skip_empty=False) + + +def test_attribute_can_be_removed() -> None: + d = MyDoc(title="hello") + + del d.title + assert "title" not in d._d_ + + +def test_doc_type_can_be_correctly_pickled() -> None: + d = DocWithNested( + title="Hello World!", comments=[Comment(title="hellp")], meta={"id": 42} + ) + s = pickle.dumps(d) + + d2 = pickle.loads(s) + + assert d2 == d + assert 42 == d2.meta.id + assert "Hello World!" == d2.title + assert [{"title": "hellp"}] == d2.comments + assert isinstance(d2.comments[0], Comment) + + +def test_meta_is_accessible_even_on_empty_doc() -> None: + d = MyDoc() + d.meta + + d = MyDoc(title="aaa") + d.meta + + +def test_meta_field_mapping() -> None: + class User(Document): + username = field.Text() + + class Meta: + all = MetaField(enabled=False) + _index = MetaField(enabled=True) + dynamic = MetaField("strict") + dynamic_templates = MetaField([42]) + + assert { + "properties": {"username": {"type": "text"}}, + "_all": {"enabled": False}, + "_index": {"enabled": True}, + "dynamic": "strict", + "dynamic_templates": [42], + } == User._doc_type.mapping.to_dict() + + +def test_multi_value_fields() -> None: + class Blog(Document): + tags = field.Keyword(multi=True) + + b = Blog() + assert [] == b.tags + b.tags.append("search") + b.tags.append("python") + assert ["search", "python"] == b.tags + + +def test_docs_with_properties() -> None: + class User(Document): + pwd_hash: str = field.Text() + + def check_password(self, pwd: bytes) -> bool: + return md5(pwd).hexdigest() == self.pwd_hash + + @property + def password(self) -> None: + raise AttributeError("readonly") + + @password.setter + def password(self, pwd: bytes) -> None: + self.pwd_hash = md5(pwd).hexdigest() + + u = User(pwd_hash=md5(b"secret").hexdigest()) + assert u.check_password(b"secret") + assert not u.check_password(b"not-secret") + + u.password = b"not-secret" + assert "password" not in u._d_ + assert not u.check_password(b"secret") + assert u.check_password(b"not-secret") + + with raises(AttributeError): + u.password + + +def test_nested_can_be_assigned_to() -> None: + d1 = DocWithNested(comments=[Comment(title="First!")]) + d2 = DocWithNested() + + d2.comments = d1.comments + assert isinstance(d1.comments[0], Comment) + assert d2.comments == [{"title": "First!"}] + assert {"comments": [{"title": "First!"}]} == d2.to_dict() + assert isinstance(d2.comments[0], Comment) + + +def test_nested_can_be_none() -> None: + d = DocWithNested(comments=None, title="Hello World!") + + assert {"title": "Hello World!"} == d.to_dict() + + +def test_nested_defaults_to_list_and_can_be_updated() -> None: + md = DocWithNested() + + assert [] == md.comments + + md.comments.append({"title": "hello World!"}) + assert {"comments": [{"title": "hello World!"}]} == md.to_dict() + + +def test_to_dict_is_recursive_and_can_cope_with_multi_values() -> None: + md = MyDoc(name=["a", "b", "c"]) + md.inner = [MyInner(old_field="of1"), MyInner(old_field="of2")] + + assert isinstance(md.inner[0], MyInner) + + assert { + "name": ["a", "b", "c"], + "inner": [{"old_field": "of1"}, {"old_field": "of2"}], + } == md.to_dict() + + +def test_to_dict_ignores_empty_collections() -> None: + md = MySubDoc(name="", address={}, count=0, valid=False, tags=[]) + + assert {"name": "", "count": 0, "valid": False} == md.to_dict() + + +def test_declarative_mapping_definition() -> None: + assert issubclass(MyDoc, Document) + assert hasattr(MyDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "text"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MyDoc._doc_type.mapping.to_dict() + + +def test_you_can_supply_own_mapping_instance() -> None: + class MyD(Document): + title = field.Text() + + class Meta: + mapping = Mapping() + mapping.meta("_all", enabled=False) + + assert { + "_all": {"enabled": False}, + "properties": {"title": {"type": "text"}}, + } == MyD._doc_type.mapping.to_dict() + + +def test_document_can_be_created_dynamically() -> None: + n = datetime.now() + md = MyDoc(title="hello") + md.name = "My Fancy Document!" + md.created_at = n + + inner = md.inner + # consistent returns + assert inner is md.inner + inner.old_field = "Already defined." + + md.inner.new_field = ["undefined", "field"] + + assert { + "title": "hello", + "name": "My Fancy Document!", + "created_at": n, + "inner": {"old_field": "Already defined.", "new_field": ["undefined", "field"]}, + } == md.to_dict() + + +def test_invalid_date_will_raise_exception() -> None: + md = MyDoc() + md.created_at = "not-a-date" + with raises(ValidationException): + md.full_clean() + + +def test_document_inheritance() -> None: + assert issubclass(MySubDoc, MyDoc) + assert issubclass(MySubDoc, Document) + assert hasattr(MySubDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MySubDoc._doc_type.mapping.to_dict() + + +def test_child_class_can_override_parent() -> None: + class A(Document): + o = field.Object(dynamic=False, properties={"a": field.Text()}) + + class B(A): + o = field.Object(dynamic="strict", properties={"b": field.Text()}) + + assert { + "properties": { + "o": { + "dynamic": "strict", + "properties": {"a": {"type": "text"}, "b": {"type": "text"}}, + "type": "object", + } + } + } == B._doc_type.mapping.to_dict() + + +def test_meta_fields_are_stored_in_meta_and_ignored_by_to_dict() -> None: + md = MySubDoc(meta={"id": 42}, name="My First doc!") + + md.meta.index = "my-index" + assert md.meta.index == "my-index" + assert md.meta.id == 42 + assert {"name": "My First doc!"} == md.to_dict() + assert {"id": 42, "index": "my-index"} == md.meta.to_dict() + + +def test_index_inheritance() -> None: + assert issubclass(MyMultiSubDoc, MySubDoc) + assert issubclass(MyMultiSubDoc, MyDoc2) + assert issubclass(MyMultiSubDoc, Document) + assert hasattr(MyMultiSubDoc, "_doc_type") + assert hasattr(MyMultiSubDoc, "_index") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + "extra": {"type": "long"}, + } + } == MyMultiSubDoc._doc_type.mapping.to_dict() + + +def test_meta_fields_can_be_set_directly_in_init() -> None: + p = object() + md = MyDoc(_id=p, title="Hello World!") + + assert md.meta.id is p + + +@pytest.mark.sync +def test_save_no_index(mock_client: Any) -> None: + md = MyDoc() + with raises(ValidationException): + md.save(using="mock") + + +@pytest.mark.sync +def test_delete_no_index(mock_client: Any) -> None: + md = MyDoc() + with raises(ValidationException): + md.delete(using="mock") + + +@pytest.mark.sync +def test_update_no_fields() -> None: + md = MyDoc() + with raises(IllegalOperation): + md.update() + + +def test_search_with_custom_alias_and_index() -> None: + search_object = MyDoc.search( + using="staging", index=["custom_index1", "custom_index2"] + ) + + assert search_object._using == "staging" + assert search_object._index == ["custom_index1", "custom_index2"] + + +def test_from_es_respects_underscored_non_meta_fields() -> None: + doc = { + "_index": "test-index", + "_id": "elasticsearch", + "_score": 12.0, + "fields": {"hello": "world", "_routing": "es", "_tags": ["search"]}, + "_source": { + "city": "Amsterdam", + "name": "Elasticsearch", + "_tagline": "You know, for search", + }, + } + + class Company(Document): + class Index: + name = "test-company" + + c = Company.from_es(doc) + + assert c.meta.fields._tags == ["search"] + assert c.meta.fields._routing == "es" + assert c._tagline == "You know, for search" + + +def test_nested_and_object_inner_doc() -> None: + class MySubDocWithNested(MyDoc): + nested_inner = field.Nested(MyInner) + + props = MySubDocWithNested._doc_type.mapping.to_dict()["properties"] + assert props == { + "created_at": {"type": "date"}, + "inner": {"properties": {"old_field": {"type": "text"}}, "type": "object"}, + "name": {"type": "text"}, + "nested_inner": { + "properties": {"old_field": {"type": "text"}}, + "type": "nested", + }, + "title": {"type": "keyword"}, + } + + +def test_doc_with_type_hints() -> None: + class TypedInnerDoc(InnerDoc): + st: M[str] + dt: M[Optional[datetime]] + li: M[List[int]] + + class TypedDoc(Document): + st: str + dt: Optional[datetime] + li: List[int] + ob: TypedInnerDoc + ns: List[TypedInnerDoc] + ip: Optional[str] = field.Ip() + k1: str = field.Keyword(required=True) + k2: M[str] = field.Keyword() + k3: str = mapped_field(field.Keyword(), default="foo") + k4: M[Optional[str]] = mapped_field(field.Keyword()) # type: ignore[misc] + s1: Secret = SecretField() + s2: M[Secret] = SecretField() + s3: Secret = mapped_field(SecretField()) # type: ignore[misc] + s4: M[Optional[Secret]] = mapped_field( + SecretField(), default_factory=lambda: "foo" + ) + i1: ClassVar + i2: ClassVar[int] + + props = TypedDoc._doc_type.mapping.to_dict()["properties"] + assert props == { + "st": {"type": "text"}, + "dt": {"type": "date"}, + "li": {"type": "integer"}, + "ob": { + "type": "object", + "properties": { + "st": {"type": "text"}, + "dt": {"type": "date"}, + "li": {"type": "integer"}, + }, + }, + "ns": { + "type": "nested", + "properties": { + "st": {"type": "text"}, + "dt": {"type": "date"}, + "li": {"type": "integer"}, + }, + }, + "ip": {"type": "ip"}, + "k1": {"type": "keyword"}, + "k2": {"type": "keyword"}, + "k3": {"type": "keyword"}, + "k4": {"type": "keyword"}, + "s1": {"type": "text"}, + "s2": {"type": "text"}, + "s3": {"type": "text"}, + "s4": {"type": "text"}, + } + + TypedDoc.i1 = "foo" + TypedDoc.i2 = 123 + + doc = TypedDoc() + assert doc.k3 == "foo" + assert doc.s4 == "foo" + with raises(ValidationException) as exc_info: + doc.full_clean() + assert set(exc_info.value.args[0].keys()) == { + "st", + "k1", + "k2", + "ob", + "s1", + "s2", + "s3", + } + + assert TypedDoc.i1 == "foo" + assert TypedDoc.i2 == 123 + + doc.st = "s" + doc.li = [1, 2, 3] + doc.k1 = "k1" + doc.k2 = "k2" + doc.ob.st = "s" + doc.ob.li = [1] + doc.s1 = "s1" + doc.s2 = "s2" + doc.s3 = "s3" + doc.full_clean() + + doc.ob = TypedInnerDoc(li=[1]) + with raises(ValidationException) as exc_info: + doc.full_clean() + assert set(exc_info.value.args[0].keys()) == {"ob"} + assert set(exc_info.value.args[0]["ob"][0].args[0].keys()) == {"st"} + + doc.ob.st = "s" + doc.ns.append(TypedInnerDoc(li=[1, 2])) + with raises(ValidationException) as exc_info: + doc.full_clean() + + doc.ns[0].st = "s" + doc.full_clean() + + doc.ip = "1.2.3.4" + n = datetime.now() + doc.dt = n + assert doc.to_dict() == { + "st": "s", + "li": [1, 2, 3], + "dt": n, + "ob": { + "st": "s", + "li": [1], + }, + "ns": [ + { + "st": "s", + "li": [1, 2], + } + ], + "ip": "1.2.3.4", + "k1": "k1", + "k2": "k2", + "k3": "foo", + "s1": "s1", + "s2": "s2", + "s3": "s3", + "s4": "foo", + } + + s = TypedDoc.search().sort(TypedDoc.st, -TypedDoc.dt, +TypedDoc.ob.st) + s.aggs.bucket("terms_agg", "terms", field=TypedDoc.k1) + assert s.to_dict() == { + "aggs": {"terms_agg": {"terms": {"field": "k1"}}}, + "sort": ["st", {"dt": {"order": "desc"}}, "ob.st"], + } + + +@pytest.mark.skipif(sys.version_info < (3, 10), reason="requires Python 3.10") +def test_doc_with_pipe_type_hints() -> None: + with pytest.raises(TypeError): + + class BadlyTypedDoc(Document): + s: str + f: str | int | None # type: ignore[syntax] + + class TypedDoc(Document): + s: str + f1: str | None # type: ignore[syntax] + f2: M[int | None] # type: ignore[syntax] + f3: M[datetime | None] # type: ignore[syntax] + + props = TypedDoc._doc_type.mapping.to_dict()["properties"] + assert props == { + "s": {"type": "text"}, + "f1": {"type": "text"}, + "f2": {"type": "integer"}, + "f3": {"type": "date"}, + } + + doc = TypedDoc() + with raises(ValidationException) as exc_info: + doc.full_clean() + assert set(exc_info.value.args[0].keys()) == {"s"} + doc.s = "s" + doc.full_clean() + + +def test_instrumented_field() -> None: + class Child(InnerDoc): + st: M[str] + + class Doc(Document): + st: str + ob: Child + ns: List[Child] + + doc = Doc( + st="foo", + ob=Child(st="bar"), + ns=[ + Child(st="baz"), + Child(st="qux"), + ], + ) + + assert type(doc.st) is str + assert doc.st == "foo" + + assert type(doc.ob) is Child + assert doc.ob.st == "bar" + + assert type(doc.ns) is utils.AttrList + assert doc.ns[0].st == "baz" + assert doc.ns[1].st == "qux" + assert type(doc.ns[0]) is Child + assert type(doc.ns[1]) is Child + + assert type(Doc.st) is InstrumentedField + assert str(Doc.st) == "st" + assert +Doc.st == "st" + assert -Doc.st == "-st" + assert Doc.st.to_dict() == {"type": "text"} + with raises(AttributeError): + Doc.st.something + + assert type(Doc.ob) is InstrumentedField + assert str(Doc.ob) == "ob" + assert str(Doc.ob.st) == "ob.st" + assert +Doc.ob.st == "ob.st" + assert -Doc.ob.st == "-ob.st" + assert Doc.ob.st.to_dict() == {"type": "text"} + with raises(AttributeError): + Doc.ob.something + with raises(AttributeError): + Doc.ob.st.something + + assert type(Doc.ns) is InstrumentedField + assert str(Doc.ns) == "ns" + assert str(Doc.ns.st) == "ns.st" + assert +Doc.ns.st == "ns.st" + assert -Doc.ns.st == "-ns.st" + assert Doc.ns.st.to_dict() == {"type": "text"} + with raises(AttributeError): + Doc.ns.something + with raises(AttributeError): + Doc.ns.st.something diff --git a/test_elasticsearch/test_dsl/_sync/test_faceted_search.py b/test_elasticsearch/test_dsl/_sync/test_faceted_search.py new file mode 100644 index 000000000..33b17bd1e --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/test_faceted_search.py @@ -0,0 +1,201 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +import pytest + +from elasticsearch.dsl.faceted_search import ( + DateHistogramFacet, + FacetedSearch, + TermsFacet, +) + + +class BlogSearch(FacetedSearch): + doc_types = ["user", "post"] + fields = [ + "title^5", + "body", + ] + + facets = { + "category": TermsFacet(field="category.raw"), + "tags": TermsFacet(field="tags"), + } + + +def test_query_is_created_properly() -> None: + bs = BlogSearch("python search") + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +def test_query_is_created_properly_with_sort_tuple() -> None: + bs = BlogSearch("python search", sort=("category", "-title")) + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + "sort": ["category", {"title": {"order": "desc"}}], + } == s.to_dict() + + +def test_filter_is_applied_to_search_but_not_relevant_facet() -> None: + bs = BlogSearch("python search", filters={"category": "elastic"}) + s = bs.build_search() + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["elastic"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "post_filter": {"terms": {"category.raw": ["elastic"]}}, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +def test_filters_are_applied_to_search_ant_relevant_facets() -> None: + bs = BlogSearch( + "python search", filters={"category": "elastic", "tags": ["python", "django"]} + ) + s = bs.build_search() + + d = s.to_dict() + + # we need to test post_filter without relying on order + f = d["post_filter"]["bool"].pop("must") + assert len(f) == 2 + assert {"terms": {"category.raw": ["elastic"]}} in f + assert {"terms": {"tags": ["python", "django"]}} in f + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["elastic"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"terms": {"tags": ["python", "django"]}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "post_filter": {"bool": {}}, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == d + + +def test_date_histogram_facet_with_1970_01_01_date() -> None: + dhf = DateHistogramFacet() + assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0) # type: ignore[arg-type] + assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0) # type: ignore[arg-type] + + +@pytest.mark.parametrize( + ["interval_type", "interval"], + [ + ("interval", "year"), + ("calendar_interval", "year"), + ("interval", "month"), + ("calendar_interval", "month"), + ("interval", "week"), + ("calendar_interval", "week"), + ("interval", "day"), + ("calendar_interval", "day"), + ("fixed_interval", "day"), + ("interval", "hour"), + ("fixed_interval", "hour"), + ("interval", "1Y"), + ("calendar_interval", "1Y"), + ("interval", "1M"), + ("calendar_interval", "1M"), + ("interval", "1w"), + ("calendar_interval", "1w"), + ("interval", "1d"), + ("calendar_interval", "1d"), + ("fixed_interval", "1d"), + ("interval", "1h"), + ("fixed_interval", "1h"), + ], +) +def test_date_histogram_interval_types(interval_type: str, interval: str) -> None: + dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval}) + assert dhf.get_aggregation().to_dict() == { + "date_histogram": { + "field": "@timestamp", + interval_type: interval, + "min_doc_count": 0, + } + } + dhf.get_value_filter(datetime.now()) + + +def test_date_histogram_no_interval_keyerror() -> None: + dhf = DateHistogramFacet(field="@timestamp") + with pytest.raises(KeyError) as e: + dhf.get_value_filter(datetime.now()) + assert str(e.value) == "'interval'" + + +def test_params_added_to_search() -> None: + bs = BlogSearch("python search") + assert bs._s._params == {} + bs.params(routing="42") + assert bs._s._params == {"routing": "42"} diff --git a/test_elasticsearch/test_dsl/_sync/test_index.py b/test_elasticsearch/test_dsl/_sync/test_index.py new file mode 100644 index 000000000..c6d1b7904 --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/test_index.py @@ -0,0 +1,190 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import string +from random import choice +from typing import Any, Dict + +import pytest +from pytest import raises + +from elasticsearch.dsl import Date, Document, Index, IndexTemplate, Text, analyzer + + +class Post(Document): + title = Text() + published_from = Date() + + +def test_multiple_doc_types_will_combine_mappings() -> None: + class User(Document): + username = Text() + + i = Index("i") + i.document(Post) + i.document(User) + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "username": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +def test_search_is_limited_to_index_name() -> None: + i = Index("my-index") + s = i.search() + + assert s._index == ["my-index"] + + +def test_cloned_index_has_copied_settings_and_using() -> None: + client = object() + i = Index("my-index", using=client) # type: ignore[arg-type] + i.settings(number_of_shards=1) + + i2 = i.clone("my-other-index") + + assert "my-other-index" == i2._name + assert client is i2._using + assert i._settings == i2._settings + assert i._settings is not i2._settings + + +def test_cloned_index_has_analysis_attribute() -> None: + """ + Regression test for Issue #582 in which `AsyncIndex.clone()` was not copying + over the `_analysis` attribute. + """ + client = object() + i = Index("my-index", using=client) # type: ignore[arg-type] + + random_analyzer_name = "".join(choice(string.ascii_letters) for _ in range(100)) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + i.analyzer(random_analyzer) + + i2 = i.clone("my-clone-index") + + assert i.to_dict()["settings"]["analysis"] == i2.to_dict()["settings"]["analysis"] + + +def test_settings_are_saved() -> None: + i = Index("i") + i.settings(number_of_replicas=0) + i.settings(number_of_shards=1) + + assert {"settings": {"number_of_shards": 1, "number_of_replicas": 0}} == i.to_dict() + + +def test_registered_doc_type_included_in_to_dict() -> None: + i = Index("i", using="alias") + i.document(Post) + + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +def test_registered_doc_type_included_in_search() -> None: + i = Index("i", using="alias") + i.document(Post) + + s = i.search() + + assert s._doc_type == [Post] + + +def test_aliases_add_to_object() -> None: + random_alias = "".join(choice(string.ascii_letters) for _ in range(100)) + alias_dict: Dict[str, Any] = {random_alias: {}} + + index = Index("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == alias_dict + + +def test_aliases_returned_from_to_dict() -> None: + random_alias = "".join(choice(string.ascii_letters) for _ in range(100)) + alias_dict: Dict[str, Any] = {random_alias: {}} + + index = Index("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == index.to_dict()["aliases"] == alias_dict + + +def test_analyzers_added_to_object() -> None: + random_analyzer_name = "".join(choice(string.ascii_letters) for _ in range(100)) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + index = Index("i", using="alias") + index.analyzer(random_analyzer) + + assert index._analysis["analyzer"][random_analyzer_name] == { + "filter": ["standard"], + "type": "custom", + "tokenizer": "standard", + } + + +def test_analyzers_returned_from_to_dict() -> None: + random_analyzer_name = "".join(choice(string.ascii_letters) for _ in range(100)) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + index = Index("i", using="alias") + index.analyzer(random_analyzer) + + assert index.to_dict()["settings"]["analysis"]["analyzer"][ + random_analyzer_name + ] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"} + + +def test_conflicting_analyzer_raises_error() -> None: + i = Index("i") + i.analyzer("my_analyzer", tokenizer="whitespace", filter=["lowercase", "stop"]) + + with raises(ValueError): + i.analyzer("my_analyzer", tokenizer="keyword", filter=["lowercase", "stop"]) + + +def test_index_template_can_have_order() -> None: + i = Index("i-*") + it = i.as_template("i", order=2) + + assert {"index_patterns": ["i-*"], "order": 2} == it.to_dict() + + +@pytest.mark.sync +def test_index_template_save_result(mock_client: Any) -> None: + it = IndexTemplate("test-template", "test-*") + + assert it.save(using="mock") == mock_client.indices.put_template() diff --git a/test_elasticsearch/test_dsl/_sync/test_mapping.py b/test_elasticsearch/test_dsl/_sync/test_mapping.py new file mode 100644 index 000000000..0e63d2e05 --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/test_mapping.py @@ -0,0 +1,222 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json + +from elasticsearch.dsl import Keyword, Mapping, Nested, Text, analysis + + +def test_mapping_can_has_fields() -> None: + m = Mapping() + m.field("name", "text").field("tags", "keyword") + + assert { + "properties": {"name": {"type": "text"}, "tags": {"type": "keyword"}} + } == m.to_dict() + + +def test_mapping_update_is_recursive() -> None: + m1 = Mapping() + m1.field("title", "text") + m1.field("author", "object") + m1.field("author", "object", properties={"name": {"type": "text"}}) + m1.meta("_all", enabled=False) + m1.meta("dynamic", False) + + m2 = Mapping() + m2.field("published_from", "date") + m2.field("author", "object", properties={"email": {"type": "text"}}) + m2.field("title", "text") + m2.field("lang", "keyword") + m2.meta("_analyzer", path="lang") + + m1.update(m2, update_only=True) + + assert { + "_all": {"enabled": False}, + "_analyzer": {"path": "lang"}, + "dynamic": False, + "properties": { + "published_from": {"type": "date"}, + "title": {"type": "text"}, + "lang": {"type": "keyword"}, + "author": { + "type": "object", + "properties": {"name": {"type": "text"}, "email": {"type": "text"}}, + }, + }, + } == m1.to_dict() + + +def test_properties_can_iterate_over_all_the_fields() -> None: + m = Mapping() + m.field("f1", "text", test_attr="f1", fields={"f2": Keyword(test_attr="f2")}) + m.field("f3", Nested(test_attr="f3", properties={"f4": Text(test_attr="f4")})) + + assert {"f1", "f2", "f3", "f4"} == { + f.test_attr for f in m.properties._collect_fields() + } + + +def test_mapping_can_collect_all_analyzers_and_normalizers() -> None: + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer("english") + a3 = analysis.analyzer("unknown_custom") + a4 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + a5 = analysis.analyzer("my_analyzer3", tokenizer="keyword") + n1 = analysis.normalizer("my_normalizer1", filter=["lowercase"]) + n2 = analysis.normalizer( + "my_normalizer2", + filter=[ + "my_filter1", + "my_filter2", + analysis.token_filter("my_filter3", "stop", stopwords=["e", "f"]), + ], + ) + n3 = analysis.normalizer("unknown_custom") + + m = Mapping() + m.field( + "title", + "text", + analyzer=a1, + fields={"english": Text(analyzer=a2), "unknown": Keyword(search_analyzer=a3)}, + ) + m.field("comments", Nested(properties={"author": Text(analyzer=a4)})) + m.field("normalized_title", "keyword", normalizer=n1) + m.field("normalized_comment", "keyword", normalizer=n2) + m.field("unknown", "keyword", normalizer=n3) + m.meta("_all", analyzer=a5) + + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + "my_analyzer3": {"tokenizer": "keyword", "type": "custom"}, + }, + "normalizer": { + "my_normalizer1": {"filter": ["lowercase"], "type": "custom"}, + "my_normalizer2": { + "filter": ["my_filter1", "my_filter2", "my_filter3"], + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + "my_filter3": {"stopwords": ["e", "f"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + assert json.loads(json.dumps(m.to_dict())) == m.to_dict() + + +def test_mapping_can_collect_multiple_analyzers() -> None: + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + m = Mapping() + m.field("title", "text", analyzer=a1, search_analyzer=a2) + m.field( + "text", + "text", + analyzer=a1, + fields={ + "english": Text(analyzer=a1), + "unknown": Keyword(analyzer=a1, search_analyzer=a2), + }, + ) + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + +def test_even_non_custom_analyzers_can_have_params() -> None: + a1 = analysis.analyzer("whitespace", type="pattern", pattern=r"\\s+") + m = Mapping() + m.field("title", "text", analyzer=a1) + + assert { + "analyzer": {"whitespace": {"type": "pattern", "pattern": r"\\s+"}} + } == m._collect_analysis() + + +def test_resolve_field_can_resolve_multifields() -> None: + m = Mapping() + m.field("title", "text", fields={"keyword": Keyword()}) + + assert isinstance(m.resolve_field("title.keyword"), Keyword) + + +def test_resolve_nested() -> None: + m = Mapping() + m.field("n1", "nested", properties={"n2": Nested(properties={"k1": Keyword()})}) + m.field("k2", "keyword") + + nested, field = m.resolve_nested("n1.n2.k1") + assert nested == ["n1", "n1.n2"] + assert isinstance(field, Keyword) + + nested, field = m.resolve_nested("k2") + assert nested == [] + assert isinstance(field, Keyword) diff --git a/test_elasticsearch/test_dsl/_sync/test_search.py b/test_elasticsearch/test_dsl/_sync/test_search.py new file mode 100644 index 000000000..04b0ad53e --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/test_search.py @@ -0,0 +1,831 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from copy import deepcopy +from typing import Any + +import pytest +from pytest import raises + +from elasticsearch.dsl import Document, EmptySearch, Q, Search, query, types, wrappers +from elasticsearch.dsl.exceptions import IllegalOperation + + +def test_expand__to_dot_is_respected() -> None: + s = Search().query("match", a__b=42, _expand__to_dot=False) + + assert {"query": {"match": {"a__b": 42}}} == s.to_dict() + + +@pytest.mark.sync +def test_execute_uses_cache() -> None: + s = Search() + r = object() + s._response = r # type: ignore[assignment] + + assert r is s.execute() + + +@pytest.mark.sync +def test_cache_can_be_ignored(mock_client: Any) -> None: + s = Search(using="mock") + r = object() + s._response = r # type: ignore[assignment] + s.execute(ignore_cache=True) + + mock_client.search.assert_called_once_with(index=None, body={}) + + +@pytest.mark.sync +def test_iter_iterates_over_hits() -> None: + s = Search() + s._response = [1, 2, 3] # type: ignore[assignment] + + assert [1, 2, 3] == [hit for hit in s] + + +def test_cache_isnt_cloned() -> None: + s = Search() + s._response = object() # type: ignore[assignment] + + assert not hasattr(s._clone(), "_response") + + +def test_search_starts_with_no_query() -> None: + s = Search() + + assert s.query._proxied is None + + +def test_search_query_combines_query() -> None: + s = Search() + + s2 = s.query("match", f=42) + assert s2.query._proxied == query.Match(f=42) + assert s.query._proxied is None + + s3 = s2.query("match", f=43) + assert s2.query._proxied == query.Match(f=42) + assert s3.query._proxied == query.Bool(must=[query.Match(f=42), query.Match(f=43)]) + + +def test_query_can_be_assigned_to() -> None: + s = Search() + + q = Q("match", title="python") + s.query = q # type: ignore + + assert s.query._proxied is q + + +def test_query_can_be_wrapped() -> None: + s = Search().query("match", title="python") + + s.query = Q("function_score", query=s.query, field_value_factor={"field": "rating"}) # type: ignore + + assert { + "query": { + "function_score": { + "functions": [{"field_value_factor": {"field": "rating"}}], + "query": {"match": {"title": "python"}}, + } + } + } == s.to_dict() + + +def test_using() -> None: + o = object() + o2 = object() + s = Search(using=o) + assert s._using is o + s2 = s.using(o2) # type: ignore[arg-type] + assert s._using is o + assert s2._using is o2 + + +def test_methods_are_proxied_to_the_query() -> None: + s = Search().query("match_all") + + assert s.query.to_dict() == {"match_all": {}} + + +def test_query_always_returns_search() -> None: + s = Search() + + assert isinstance(s.query("match", f=42), Search) + + +def test_source_copied_on_clone() -> None: + s = Search().source(False) + assert s._clone()._source == s._source + assert s._clone()._source is False + + s2 = Search().source([]) + assert s2._clone()._source == s2._source + assert s2._source == [] + + s3 = Search().source(["some", "fields"]) + assert s3._clone()._source == s3._source + assert s3._clone()._source == ["some", "fields"] + + +def test_copy_clones() -> None: + from copy import copy + + s1 = Search().source(["some", "fields"]) + s2 = copy(s1) + + assert s1 == s2 + assert s1 is not s2 + + +def test_aggs_allow_two_metric() -> None: + s = Search() + + s.aggs.metric("a", "max", field="a").metric("b", "max", field="b") + + assert s.to_dict() == { + "aggs": {"a": {"max": {"field": "a"}}, "b": {"max": {"field": "b"}}} + } + + +def test_aggs_get_copied_on_change() -> None: + s = Search().query("match_all") + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + + s2 = s.query("match_all") + s2.aggs.bucket("per_month", "date_histogram", field="date", interval="month") + s3 = s2.query("match_all") + s3.aggs["per_month"].metric("max_score", "max", field="score") + s4 = s3._clone() + s4.aggs.metric("max_score", "max", field="score") + + d: Any = { + "query": {"match_all": {}}, + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + } + + assert d == s.to_dict() + d["aggs"]["per_month"] = {"date_histogram": {"field": "date", "interval": "month"}} + assert d == s2.to_dict() + d["aggs"]["per_month"]["aggs"] = {"max_score": {"max": {"field": "score"}}} + assert d == s3.to_dict() + d["aggs"]["max_score"] = {"max": {"field": "score"}} + assert d == s4.to_dict() + + +def test_search_index() -> None: + s = Search(index="i") + assert s._index == ["i"] + s = s.index("i2") + assert s._index == ["i", "i2"] + s = s.index("i3") + assert s._index == ["i", "i2", "i3"] + s = s.index() + assert s._index is None + s = Search(index=("i", "i2")) + assert s._index == ["i", "i2"] + s = Search(index=["i", "i2"]) + assert s._index == ["i", "i2"] + s = Search() + s = s.index("i", "i2") + assert s._index == ["i", "i2"] + s2 = s.index("i3") + assert s._index == ["i", "i2"] + assert s2._index == ["i", "i2", "i3"] + s = Search() + s = s.index(["i", "i2"], "i3") + assert s._index == ["i", "i2", "i3"] + s2 = s.index("i4") + assert s._index == ["i", "i2", "i3"] + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(["i4"]) + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(("i4", "i5")) + assert s2._index == ["i", "i2", "i3", "i4", "i5"] + + +def test_doc_type_document_class() -> None: + class MyDocument(Document): + pass + + s = Search(doc_type=MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + s = Search().doc_type(MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + +def test_knn() -> None: + s = Search() + + with raises(TypeError): + s.knn() # type: ignore[call-arg] + with raises(TypeError): + s.knn("field") # type: ignore[call-arg] + with raises(TypeError): + s.knn("field", 5) # type: ignore[call-arg] + with raises(ValueError): + s.knn("field", 5, 100) + with raises(ValueError): + s.knn("field", 5, 100, query_vector=[1, 2, 3], query_vector_builder={}) + + s = s.knn("field", 5, 100, query_vector=[1, 2, 3]) + assert { + "knn": { + "field": "field", + "k": 5, + "num_candidates": 100, + "query_vector": [1, 2, 3], + } + } == s.to_dict() + + s = s.knn( + k=4, + num_candidates=40, + boost=0.8, + field="name", + query_vector_builder={ + "text_embedding": {"model_id": "foo", "model_text": "search text"} + }, + inner_hits={"size": 1}, + ) + assert { + "knn": [ + { + "field": "field", + "k": 5, + "num_candidates": 100, + "query_vector": [1, 2, 3], + }, + { + "field": "name", + "k": 4, + "num_candidates": 40, + "query_vector_builder": { + "text_embedding": {"model_id": "foo", "model_text": "search text"} + }, + "boost": 0.8, + "inner_hits": {"size": 1}, + }, + ] + } == s.to_dict() + + +def test_rank() -> None: + s = Search() + s.rank(rrf=False) + assert {} == s.to_dict() + + s = s.rank(rrf=True) + assert {"rank": {"rrf": {}}} == s.to_dict() + + s = s.rank(rrf={"window_size": 50, "rank_constant": 20}) + assert {"rank": {"rrf": {"window_size": 50, "rank_constant": 20}}} == s.to_dict() + + +def test_sort() -> None: + s = Search() + s = s.sort("fielda", "-fieldb") + + assert ["fielda", {"fieldb": {"order": "desc"}}] == s._sort + assert {"sort": ["fielda", {"fieldb": {"order": "desc"}}]} == s.to_dict() + + s = s.sort() + assert [] == s._sort + assert Search().to_dict() == s.to_dict() + + +def test_sort_by_score() -> None: + s = Search() + s = s.sort("_score") + assert {"sort": ["_score"]} == s.to_dict() + + s = Search() + with raises(IllegalOperation): + s.sort("-_score") + + +def test_collapse() -> None: + s = Search() + + inner_hits = {"name": "most_recent", "size": 5, "sort": [{"@timestamp": "desc"}]} + s = s.collapse("user.id", inner_hits=inner_hits, max_concurrent_group_searches=4) + + assert { + "field": "user.id", + "inner_hits": { + "name": "most_recent", + "size": 5, + "sort": [{"@timestamp": "desc"}], + }, + "max_concurrent_group_searches": 4, + } == s._collapse + assert { + "collapse": { + "field": "user.id", + "inner_hits": { + "name": "most_recent", + "size": 5, + "sort": [{"@timestamp": "desc"}], + }, + "max_concurrent_group_searches": 4, + } + } == s.to_dict() + + s = s.collapse() + assert {} == s._collapse + assert Search().to_dict() == s.to_dict() + + +def test_slice() -> None: + s = Search() + assert {"from": 3, "size": 7} == s[3:10].to_dict() + assert {"size": 5} == s[:5].to_dict() + assert {"from": 3} == s[3:].to_dict() + assert {"from": 0, "size": 0} == s[0:0].to_dict() + assert {"from": 20, "size": 0} == s[20:0].to_dict() + assert {"from": 10, "size": 5} == s[10:][:5].to_dict() + assert {"from": 10, "size": 0} == s[:5][10:].to_dict() + assert {"size": 10} == s[:10][:40].to_dict() + assert {"size": 10} == s[:40][:10].to_dict() + assert {"size": 40} == s[:40][:80].to_dict() + assert {"from": 12, "size": 0} == s[:5][10:][2:].to_dict() + assert {"from": 15, "size": 0} == s[10:][:5][5:].to_dict() + assert {} == s[:].to_dict() + with raises(ValueError): + s[-1:] + with raises(ValueError): + s[4:-1] + with raises(ValueError): + s[-3:-2] + + +def test_index() -> None: + s = Search() + assert {"from": 3, "size": 1} == s[3].to_dict() + assert {"from": 3, "size": 1} == s[3][0].to_dict() + assert {"from": 8, "size": 0} == s[3][5].to_dict() + assert {"from": 4, "size": 1} == s[3:10][1].to_dict() + with raises(ValueError): + s[-3] + + +def test_search_to_dict() -> None: + s = Search() + assert {} == s.to_dict() + + s = s.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == s.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == s.to_dict(size=10) + + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + d = { + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + "query": {"match": {"f": 42}}, + } + assert d == s.to_dict() + + s = Search(extra={"size": 5}) + assert {"size": 5} == s.to_dict() + s = s.extra(from_=42) + assert {"size": 5, "from": 42} == s.to_dict() + + +def test_complex_example() -> None: + s = Search() + s = ( + s.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .collapse("user_id") + .post_filter("terms", tags=["prague", "czech"]) + .script_fields(more_attendees="doc['attendees'].value + 42") + ) + + s.aggs.bucket("per_country", "terms", field="country").metric( + "avg_attendees", "avg", field="attendees" + ) + + s.query.minimum_should_match = 2 + + s = s.highlight_options(order="score").highlight("title", "body", fragment_size=50) + + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "post_filter": {"terms": {"tags": ["prague", "czech"]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "collapse": {"field": "user_id"}, + "highlight": { + "order": "score", + "fields": {"title": {"fragment_size": 50}, "body": {"fragment_size": 50}}, + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } == s.to_dict() + + +def test_reverse() -> None: + d = { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [ + { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + } + ], + } + }, + "post_filter": {"bool": {"must": [{"terms": {"tags": ["prague", "czech"]}}]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "sort": ["title", {"category": {"order": "desc"}}, "_score"], + "size": 5, + "highlight": {"order": "score", "fields": {"title": {"fragment_size": 50}}}, + "suggest": { + "my-title-suggestions-1": { + "text": "devloping distibutd saerch engies", + "term": {"size": 3, "field": "title"}, + } + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } + + d2 = deepcopy(d) + + s = Search.from_dict(d) + + # make sure we haven't modified anything in place + assert d == d2 + assert {"size": 5} == s._extra + assert d == s.to_dict() + + +def test_code_generated_classes() -> None: + s = Search() + s = ( + s.query(query.Match("title", types.MatchQuery(query="python"))) + .query(~query.Match("title", types.MatchQuery(query="ruby"))) + .query( + query.Knn( + field="title", + query_vector=[1.0, 2.0, 3.0], + num_candidates=10, + k=3, + filter=query.Range("year", wrappers.Range(gt="2004")), + ) + ) + .filter( + query.Term("category", types.TermQuery(value="meetup")) + | query.Term("category", types.TermQuery(value="conference")) + ) + .collapse("user_id") + .post_filter(query.Terms(tags=["prague", "czech"])) + .script_fields(more_attendees="doc['attendees'].value + 42") + ) + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": {"value": "meetup"}}}, + {"term": {"category": {"value": "conference"}}}, + ] + } + } + ], + "must": [ + {"match": {"title": {"query": "python"}}}, + { + "knn": { + "field": "title", + "filter": [ + { + "range": { + "year": { + "gt": "2004", + }, + }, + }, + ], + "k": 3, + "num_candidates": 10, + "query_vector": [ + 1.0, + 2.0, + 3.0, + ], + }, + }, + ], + "must_not": [{"match": {"title": {"query": "ruby"}}}], + } + }, + "post_filter": {"terms": {"tags": ["prague", "czech"]}}, + "collapse": {"field": "user_id"}, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } == s.to_dict() + + +def test_from_dict_doesnt_need_query() -> None: + s = Search.from_dict({"size": 5}) + + assert {"size": 5} == s.to_dict() + + +@pytest.mark.sync +def test_params_being_passed_to_search(mock_client: Any) -> None: + s = Search(using="mock") + s = s.params(routing="42") + s.execute() + + mock_client.search.assert_called_once_with(index=None, body={}, routing="42") + + +def test_source() -> None: + assert {} == Search().source().to_dict() + + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]} + } == Search().source(includes=["foo.bar.*"], excludes=("foo.one",)).to_dict() + + assert {"_source": False} == Search().source(False).to_dict() + + assert {"_source": ["f1", "f2"]} == Search().source( + includes=["foo.bar.*"], excludes=["foo.one"] + ).source(["f1", "f2"]).to_dict() + + +def test_source_on_clone() -> None: + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]}, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == Search().source(includes=["foo.bar.*"]).source(excludes=["foo.one"]).filter( + "term", title="python" + ).to_dict() + assert { + "_source": False, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == Search().source(False).filter("term", title="python").to_dict() + + +def test_source_on_clear() -> None: + assert ( + {} + == Search() + .source(includes=["foo.bar.*"]) + .source(includes=None, excludes=None) + .to_dict() + ) + + +def test_suggest_accepts_global_text() -> None: + s = Search.from_dict( + { + "suggest": { + "text": "the amsterdma meetpu", + "my-suggest-1": {"term": {"field": "title"}}, + "my-suggest-2": {"text": "other", "term": {"field": "body"}}, + } + } + ) + + assert { + "suggest": { + "my-suggest-1": { + "term": {"field": "title"}, + "text": "the amsterdma meetpu", + }, + "my-suggest-2": {"term": {"field": "body"}, "text": "other"}, + } + } == s.to_dict() + + +def test_suggest() -> None: + s = Search() + s = s.suggest("my_suggestion", "pyhton", term={"field": "title"}) + + assert { + "suggest": {"my_suggestion": {"term": {"field": "title"}, "text": "pyhton"}} + } == s.to_dict() + + +def test_exclude() -> None: + s = Search() + s = s.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == s.to_dict() + + +@pytest.mark.sync +def test_delete_by_query(mock_client: Any) -> None: + s = Search(using="mock", index="i").query("match", lang="java") + s.delete() + + mock_client.delete_by_query.assert_called_once_with( + index=["i"], body={"query": {"match": {"lang": "java"}}} + ) + + +def test_update_from_dict() -> None: + s = Search() + s.update_from_dict({"indices_boost": [{"important-documents": 2}]}) + s.update_from_dict({"_source": ["id", "name"]}) + s.update_from_dict({"collapse": {"field": "user_id"}}) + + assert { + "indices_boost": [{"important-documents": 2}], + "_source": ["id", "name"], + "collapse": {"field": "user_id"}, + } == s.to_dict() + + +def test_rescore_query_to_dict() -> None: + s = Search(index="index-name") + + positive_query = Q( + "function_score", + query=Q("term", tags="a"), + script_score={"script": "_score * 1"}, + ) + + negative_query = Q( + "function_score", + query=Q("term", tags="b"), + script_score={"script": "_score * -100"}, + ) + + s = s.query(positive_query) + s = s.extra( + rescore={"window_size": 100, "query": {"rescore_query": negative_query}} + ) + assert s.to_dict() == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 100, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "b"}}, + "functions": [{"script_score": {"script": "_score * -100"}}], + } + } + }, + }, + } + + assert s.to_dict( + rescore={"window_size": 10, "query": {"rescore_query": positive_query}} + ) == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 10, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + } + }, + }, + } + + +@pytest.mark.sync +def test_empty_search() -> None: + s = EmptySearch(index="index-name") + s = s.query("match", lang="java") + s.aggs.bucket("versions", "terms", field="version") + + assert s.count() == 0 + assert [hit for hit in s] == [] + assert [hit for hit in s.scan()] == [] + s.delete() # should not error + + +def test_suggest_completion() -> None: + s = Search() + s = s.suggest("my_suggestion", "pyhton", completion={"field": "title"}) + + assert { + "suggest": { + "my_suggestion": {"completion": {"field": "title"}, "prefix": "pyhton"} + } + } == s.to_dict() + + +def test_suggest_regex_query() -> None: + s = Search() + s = s.suggest("my_suggestion", regex="py[thon|py]", completion={"field": "title"}) + + assert { + "suggest": { + "my_suggestion": {"completion": {"field": "title"}, "regex": "py[thon|py]"} + } + } == s.to_dict() + + +def test_suggest_must_pass_text_or_regex() -> None: + s = Search() + with raises(ValueError): + s.suggest("my_suggestion") + + +def test_suggest_can_only_pass_text_or_regex() -> None: + s = Search() + with raises(ValueError): + s.suggest("my_suggestion", text="python", regex="py[hton|py]") + + +def test_suggest_regex_must_be_wtih_completion() -> None: + s = Search() + with raises(ValueError): + s.suggest("my_suggestion", regex="py[thon|py]") diff --git a/test_elasticsearch/test_dsl/_sync/test_update_by_query.py b/test_elasticsearch/test_dsl/_sync/test_update_by_query.py new file mode 100644 index 000000000..390257ffb --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/test_update_by_query.py @@ -0,0 +1,180 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from copy import deepcopy +from typing import Any + +import pytest + +from elasticsearch.dsl import Q, UpdateByQuery +from elasticsearch.dsl.response import UpdateByQueryResponse +from elasticsearch.dsl.search_base import SearchBase + + +def test_ubq_starts_with_no_query() -> None: + ubq = UpdateByQuery() + + assert ubq.query._proxied is None + + +def test_ubq_to_dict() -> None: + ubq = UpdateByQuery() + assert {} == ubq.to_dict() + + ubq = ubq.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == ubq.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == ubq.to_dict(size=10) + + ubq = UpdateByQuery(extra={"size": 5}) + assert {"size": 5} == ubq.to_dict() + + ubq = UpdateByQuery(extra={"extra_q": Q("term", category="conference")}) + assert {"extra_q": {"term": {"category": "conference"}}} == ubq.to_dict() + + +def test_complex_example() -> None: + ubq = UpdateByQuery() + ubq = ( + ubq.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + ) + + ubq.query.minimum_should_match = 2 + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } == ubq.to_dict() + + +def test_exclude() -> None: + ubq = UpdateByQuery() + ubq = ubq.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == ubq.to_dict() + + +def test_reverse() -> None: + d = { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [ + { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + } + ], + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } + + d2 = deepcopy(d) + + ubq = UpdateByQuery.from_dict(d) + + assert d == d2 + assert d == ubq.to_dict() + + +def test_from_dict_doesnt_need_query() -> None: + ubq = UpdateByQuery.from_dict({"script": {"source": "test"}}) + + assert {"script": {"source": "test"}} == ubq.to_dict() + + +@pytest.mark.sync +def test_params_being_passed_to_search(mock_client: Any) -> None: + ubq = UpdateByQuery(using="mock", index="i") + ubq = ubq.params(routing="42") + ubq.execute() + + mock_client.update_by_query.assert_called_once_with(index=["i"], routing="42") + + +def test_overwrite_script() -> None: + ubq = UpdateByQuery() + ubq = ubq.script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + assert { + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + } + } == ubq.to_dict() + ubq = ubq.script(source="ctx._source.likes++") + assert {"script": {"source": "ctx._source.likes++"}} == ubq.to_dict() + + +def test_update_by_query_response_success() -> None: + ubqr = UpdateByQueryResponse(SearchBase(), {"timed_out": False, "failures": []}) + assert ubqr.success() + + ubqr = UpdateByQueryResponse(SearchBase(), {"timed_out": True, "failures": []}) + assert not ubqr.success() + + ubqr = UpdateByQueryResponse(SearchBase(), {"timed_out": False, "failures": [{}]}) + assert not ubqr.success() diff --git a/test_elasticsearch/test_dsl/async_sleep.py b/test_elasticsearch/test_dsl/async_sleep.py new file mode 100644 index 000000000..ce5ced1c5 --- /dev/null +++ b/test_elasticsearch/test_dsl/async_sleep.py @@ -0,0 +1,24 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import asyncio +from typing import Union + + +async def sleep(secs: Union[int, float]) -> None: + """Tests can use this function to sleep.""" + await asyncio.sleep(secs) diff --git a/test_elasticsearch/test_dsl/conftest.py b/test_elasticsearch/test_dsl/conftest.py new file mode 100644 index 000000000..f1d865761 --- /dev/null +++ b/test_elasticsearch/test_dsl/conftest.py @@ -0,0 +1,486 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import asyncio +import os +import re +import time +from datetime import datetime +from typing import Any, AsyncGenerator, Dict, Generator, Tuple, cast +from unittest import SkipTest, TestCase +from unittest.mock import AsyncMock, Mock + +import pytest_asyncio +from elastic_transport import ObjectApiResponse +from elasticsearch import AsyncElasticsearch, Elasticsearch +from elasticsearch.exceptions import ConnectionError +from elasticsearch.helpers import bulk +from pytest import fixture, skip + +from elasticsearch.dsl import Search +from elasticsearch.dsl.async_connections import add_connection as add_async_connection +from elasticsearch.dsl.async_connections import connections as async_connections +from elasticsearch.dsl.connections import add_connection, connections + +from .test_integration._async import test_document as async_document +from .test_integration._sync import test_document as sync_document +from .test_integration.test_data import ( + DATA, + FLAT_DATA, + TEST_GIT_DATA, + create_flat_git_index, + create_git_index, +) + +if "ELASTICSEARCH_URL" in os.environ: + ELASTICSEARCH_URL = os.environ["ELASTICSEARCH_URL"] +else: + ELASTICSEARCH_URL = "http://localhost:9200" + + +def get_test_client(wait: bool = True, **kwargs: Any) -> Elasticsearch: + # construct kwargs from the environment + kw: Dict[str, Any] = {"request_timeout": 30} + + if "PYTHON_CONNECTION_CLASS" in os.environ: + kw["node_class"] = os.environ["PYTHON_CONNECTION_CLASS"] + + kw.update(kwargs) + client = Elasticsearch(ELASTICSEARCH_URL, **kw) + + # wait for yellow status + for tries_left in range(100 if wait else 1, 0, -1): + try: + client.cluster.health(wait_for_status="yellow") + return client + except ConnectionError: + if wait and tries_left == 1: + raise + time.sleep(0.1) + + raise SkipTest("Elasticsearch failed to start.") + + +async def get_async_test_client(wait: bool = True, **kwargs: Any) -> AsyncElasticsearch: + # construct kwargs from the environment + kw: Dict[str, Any] = {"request_timeout": 30} + + if "PYTHON_CONNECTION_CLASS" in os.environ: + kw["node_class"] = os.environ["PYTHON_CONNECTION_CLASS"] + + kw.update(kwargs) + client = AsyncElasticsearch(ELASTICSEARCH_URL, **kw) + + # wait for yellow status + for tries_left in range(100 if wait else 1, 0, -1): + try: + await client.cluster.health(wait_for_status="yellow") + return client + except ConnectionError: + if wait and tries_left == 1: + raise + await asyncio.sleep(0.1) + + await client.close() + raise SkipTest("Elasticsearch failed to start.") + + +class ElasticsearchTestCase(TestCase): + client: Elasticsearch + + @staticmethod + def _get_client() -> Elasticsearch: + return get_test_client() + + @classmethod + def setup_class(cls) -> None: + cls.client = cls._get_client() + + def teardown_method(self, _: Any) -> None: + # Hidden indices expanded in wildcards in ES 7.7 + expand_wildcards = ["open", "closed"] + if self.es_version() >= (7, 7): + expand_wildcards.append("hidden") + + self.client.indices.delete_data_stream( + name="*", expand_wildcards=expand_wildcards + ) + self.client.indices.delete(index="*", expand_wildcards=expand_wildcards) + self.client.indices.delete_template(name="*") + self.client.indices.delete_index_template(name="*") + + def es_version(self) -> Tuple[int, ...]: + if not hasattr(self, "_es_version"): + self._es_version = _get_version(self.client.info()["version"]["number"]) + return self._es_version + + +def _get_version(version_string: str) -> Tuple[int, ...]: + if "." not in version_string: + return () + version = version_string.strip().split(".") + return tuple(int(v) if v.isdigit() else 999 for v in version) + + +@fixture(scope="session") +def client() -> Elasticsearch: + try: + connection = get_test_client(wait="WAIT_FOR_ES" in os.environ) + add_connection("default", connection) + return connection + except SkipTest: + skip() + + +@pytest_asyncio.fixture +async def async_client() -> AsyncGenerator[AsyncElasticsearch, None]: + try: + connection = await get_async_test_client(wait="WAIT_FOR_ES" in os.environ) + add_async_connection("default", connection) + yield connection + await connection.close() + except SkipTest: + skip() + + +@fixture(scope="session") +def es_version(client: Elasticsearch) -> Generator[Tuple[int, ...], None, None]: + info = client.info() + yield tuple( + int(x) + for x in re.match(r"^([0-9.]+)", info["version"]["number"]).group(1).split(".") # type: ignore + ) + + +@fixture +def write_client(client: Elasticsearch) -> Generator[Elasticsearch, None, None]: + yield client + for index_name in client.indices.get(index="test-*", expand_wildcards="all"): + client.indices.delete(index=index_name) + client.options(ignore_status=404).indices.delete_template(name="test-template") + client.options(ignore_status=404).indices.delete_index_template( + name="test-template" + ) + + +@pytest_asyncio.fixture +async def async_write_client( + write_client: Elasticsearch, async_client: AsyncElasticsearch +) -> AsyncGenerator[AsyncElasticsearch, None]: + yield async_client + + +@fixture +def mock_client( + dummy_response: ObjectApiResponse[Any], +) -> Generator[Elasticsearch, None, None]: + client = Mock() + client.search.return_value = dummy_response + client.update_by_query.return_value = dummy_response + add_connection("mock", client) + + yield client + connections._conns = {} + connections._kwargs = {} + + +@fixture +def async_mock_client( + dummy_response: ObjectApiResponse[Any], +) -> Generator[Elasticsearch, None, None]: + client = Mock() + client.search = AsyncMock(return_value=dummy_response) + client.indices = AsyncMock() + client.update_by_query = AsyncMock() + client.delete_by_query = AsyncMock() + add_async_connection("mock", client) + + yield client + async_connections._conns = {} + async_connections._kwargs = {} + + +@fixture(scope="session") +def data_client(client: Elasticsearch) -> Generator[Elasticsearch, None, None]: + # create mappings + create_git_index(client, "git") + create_flat_git_index(client, "flat-git") + # load data + bulk(client, DATA, raise_on_error=True, refresh=True) + bulk(client, FLAT_DATA, raise_on_error=True, refresh=True) + yield client + client.indices.delete(index="git") + client.indices.delete(index="flat-git") + + +@pytest_asyncio.fixture +async def async_data_client( + data_client: Elasticsearch, async_client: AsyncElasticsearch +) -> AsyncGenerator[AsyncElasticsearch, None]: + yield async_client + + +@fixture +def dummy_response() -> ObjectApiResponse[Any]: + return ObjectApiResponse( + meta=None, + body={ + "_shards": {"failed": 0, "successful": 10, "total": 10}, + "hits": { + "hits": [ + { + "_index": "test-index", + "_type": "company", + "_id": "elasticsearch", + "_score": 12.0, + "_source": {"city": "Amsterdam", "name": "Elasticsearch"}, + }, + { + "_index": "test-index", + "_type": "employee", + "_id": "42", + "_score": 11.123, + "_routing": "elasticsearch", + "_source": { + "name": {"first": "Shay", "last": "Bannon"}, + "lang": "java", + "twitter": "kimchy", + }, + }, + { + "_index": "test-index", + "_type": "employee", + "_id": "47", + "_score": 1, + "_routing": "elasticsearch", + "_source": { + "name": {"first": "Honza", "last": "Král"}, + "lang": "python", + "twitter": "honzakral", + }, + }, + { + "_index": "test-index", + "_type": "employee", + "_id": "53", + "_score": 16.0, + "_routing": "elasticsearch", + }, + ], + "max_score": 12.0, + "total": 123, + }, + "timed_out": False, + "took": 123, + }, + ) + + +@fixture +def aggs_search() -> Search: + s = Search(index="flat-git") + s.aggs.bucket("popular_files", "terms", field="files", size=2).metric( + "line_stats", "stats", field="stats.lines" + ).metric("top_commits", "top_hits", size=2, _source=["stats.*", "committed_date"]) + s.aggs.bucket( + "per_month", "date_histogram", interval="month", field="info.committed_date" + ) + s.aggs.metric("sum_lines", "sum", field="stats.lines") + return s + + +@fixture +def aggs_data() -> Dict[str, Any]: + return { + "took": 4, + "timed_out": False, + "_shards": {"total": 1, "successful": 1, "failed": 0}, + "hits": {"total": 52, "hits": [], "max_score": 0.0}, + "aggregations": { + "sum_lines": {"value": 25052.0}, + "per_month": { + "buckets": [ + { + "doc_count": 38, + "key": 1393632000000, + "key_as_string": "2014-03-01T00:00:00.000Z", + }, + { + "doc_count": 11, + "key": 1396310400000, + "key_as_string": "2014-04-01T00:00:00.000Z", + }, + { + "doc_count": 3, + "key": 1398902400000, + "key_as_string": "2014-05-01T00:00:00.000Z", + }, + ] + }, + "popular_files": { + "buckets": [ + { + "key": "elasticsearch_dsl", + "line_stats": { + "count": 40, + "max": 228.0, + "min": 2.0, + "sum": 2151.0, + "avg": 53.775, + }, + "doc_count": 40, + "top_commits": { + "hits": { + "total": 40, + "hits": [ + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "_type": "doc", + "_source": { + "stats": { + "files": 4, + "deletions": 7, + "lines": 30, + "insertions": 23, + }, + "committed_date": "2014-05-02T13:47:19", + }, + "_score": 1.0, + "_index": "flat-git", + }, + { + "_id": "eb3e543323f189fd7b698e66295427204fff5755", + "_type": "doc", + "_source": { + "stats": { + "files": 1, + "deletions": 0, + "lines": 18, + "insertions": 18, + }, + "committed_date": "2014-05-01T13:32:14", + }, + "_score": 1.0, + "_index": "flat-git", + }, + ], + "max_score": 1.0, + } + }, + }, + { + "key": "test_elasticsearch_dsl", + "line_stats": { + "count": 35, + "max": 228.0, + "min": 2.0, + "sum": 1939.0, + "avg": 55.4, + }, + "doc_count": 35, + "top_commits": { + "hits": { + "total": 35, + "hits": [ + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "_type": "doc", + "_source": { + "stats": { + "files": 4, + "deletions": 7, + "lines": 30, + "insertions": 23, + }, + "committed_date": "2014-05-02T13:47:19", + }, + "_score": 1.0, + "_index": "flat-git", + }, + { + "_id": "dd15b6ba17dd9ba16363a51f85b31f66f1fb1157", + "_type": "doc", + "_source": { + "stats": { + "files": 3, + "deletions": 18, + "lines": 62, + "insertions": 44, + }, + "committed_date": "2014-05-01T13:30:44", + }, + "_score": 1.0, + "_index": "flat-git", + }, + ], + "max_score": 1.0, + } + }, + }, + ], + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 120, + }, + }, + } + + +def make_pr(pr_module: Any) -> Any: + return pr_module.PullRequest( + _id=42, + comments=[ + pr_module.Comment( + content="Hello World!", + author=pr_module.User(name="honzakral"), + created_at=datetime(2018, 1, 9, 10, 17, 3, 21184), + history=[ + pr_module.History( + timestamp=datetime(2012, 1, 1), + diff="-Ahoj Svete!\n+Hello World!", + ) + ], + ), + ], + created_at=datetime(2018, 1, 9, 9, 17, 3, 21184), + ) + + +@fixture +def pull_request(write_client: Elasticsearch) -> sync_document.PullRequest: + sync_document.PullRequest.init() + pr = cast(sync_document.PullRequest, make_pr(sync_document)) + pr.save(refresh=True) + return pr + + +@pytest_asyncio.fixture +async def async_pull_request( + async_write_client: AsyncElasticsearch, +) -> async_document.PullRequest: + await async_document.PullRequest.init() + pr = cast(async_document.PullRequest, make_pr(async_document)) + await pr.save(refresh=True) + return pr + + +@fixture +def setup_ubq_tests(client: Elasticsearch) -> str: + index = "test-git" + create_git_index(client, index) + bulk(client, TEST_GIT_DATA, raise_on_error=True, refresh=True) + return index diff --git a/test_elasticsearch/test_dsl/sleep.py b/test_elasticsearch/test_dsl/sleep.py new file mode 100644 index 000000000..83009566e --- /dev/null +++ b/test_elasticsearch/test_dsl/sleep.py @@ -0,0 +1,24 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import time +from typing import Union + + +def sleep(secs: Union[int, float]) -> None: + """Tests can use this function to sleep.""" + time.sleep(secs) diff --git a/test_elasticsearch/test_dsl/test_aggs.py b/test_elasticsearch/test_dsl/test_aggs.py new file mode 100644 index 000000000..f1dc10aa5 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_aggs.py @@ -0,0 +1,530 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from elasticsearch.dsl import aggs, query, types + + +def test_repr() -> None: + max_score = aggs.Max(field="score") + a = aggs.A("terms", field="tags", aggs={"max_score": max_score}) + + assert "Terms(aggs={'max_score': Max(field='score')}, field='tags')" == repr(a) + + +def test_meta() -> None: + max_score = aggs.Max(field="score") + a = aggs.A( + "terms", field="tags", aggs={"max_score": max_score}, meta={"some": "metadata"} + ) + + assert { + "terms": {"field": "tags"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + "meta": {"some": "metadata"}, + } == a.to_dict() + + +def test_meta_from_dict() -> None: + max_score = aggs.Max(field="score") + a = aggs.A( + "terms", field="tags", aggs={"max_score": max_score}, meta={"some": "metadata"} + ) + + assert aggs.A(a.to_dict()) == a + + +def test_A_creates_proper_agg() -> None: + a = aggs.A("terms", field="tags") + + assert isinstance(a, aggs.Terms) + assert a._params == {"field": "tags"} + + +def test_A_handles_nested_aggs_properly() -> None: + max_score = aggs.Max(field="score") + a = aggs.A("terms", field="tags", aggs={"max_score": max_score}) + + assert isinstance(a, aggs.Terms) + assert a._params == {"field": "tags", "aggs": {"max_score": max_score}} + + +def test_A_passes_aggs_through() -> None: + a = aggs.A("terms", field="tags") + assert aggs.A(a) is a + + +def test_A_from_dict() -> None: + d = { + "terms": {"field": "tags"}, + "aggs": {"per_author": {"terms": {"field": "author.raw"}}}, + } + a = aggs.A(d) + + assert isinstance(a, aggs.Terms) + assert a._params == { + "field": "tags", + "aggs": {"per_author": aggs.A("terms", field="author.raw")}, + } + assert a["per_author"] == aggs.A("terms", field="author.raw") + assert a.aggs.per_author == aggs.A("terms", field="author.raw") # type: ignore[attr-defined] + + +def test_A_fails_with_incorrect_dict() -> None: + correct_d = { + "terms": {"field": "tags"}, + "aggs": {"per_author": {"terms": {"field": "author.raw"}}}, + } + + with raises(Exception): + aggs.A(correct_d, field="f") + + d = correct_d.copy() + del d["terms"] + with raises(Exception): + aggs.A(d) + + d = correct_d.copy() + d["xx"] = {} + with raises(Exception): + aggs.A(d) + + +def test_A_fails_with_agg_and_params() -> None: + a = aggs.A("terms", field="tags") + + with raises(Exception): + aggs.A(a, field="score") + + +def test_buckets_are_nestable() -> None: + a = aggs.Terms(field="tags") + b = a.bucket("per_author", "terms", field="author.raw") + + assert isinstance(b, aggs.Terms) + assert b._params == {"field": "author.raw"} + assert a.aggs == {"per_author": b} + + +def test_metric_inside_buckets() -> None: + a = aggs.Terms(field="tags") + b = a.metric("max_score", "max", field="score") + + # returns bucket so it's chainable + assert a is b + assert a.aggs["max_score"] == aggs.Max(field="score") + + +def test_buckets_equals_counts_subaggs() -> None: + a = aggs.Terms(field="tags") + a.bucket("per_author", "terms", field="author.raw") + b = aggs.Terms(field="tags") + + assert a != b + + +def test_buckets_to_dict() -> None: + a = aggs.Terms(field="tags") + a.bucket("per_author", "terms", field="author.raw") + + assert { + "terms": {"field": "tags"}, + "aggs": {"per_author": {"terms": {"field": "author.raw"}}}, + } == a.to_dict() + + a = aggs.Terms(field="tags") + a.metric("max_score", "max", field="score") + + assert { + "terms": {"field": "tags"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } == a.to_dict() + + +def test_nested_buckets_are_reachable_as_getitem() -> None: + a = aggs.Terms(field="tags") + b = a.bucket("per_author", "terms", field="author.raw") + + assert a["per_author"] is not b + assert a["per_author"] == b + + +def test_nested_buckets_are_settable_as_getitem() -> None: + a = aggs.Terms(field="tags") + b = a["per_author"] = aggs.A("terms", field="author.raw") + + assert a.aggs["per_author"] is b + + +def test_filter_can_be_instantiated_using_positional_args() -> None: + a = aggs.Filter(query.Q("term", f=42)) + + assert {"filter": {"term": {"f": 42}}} == a.to_dict() + + assert a == aggs.A("filter", query.Q("term", f=42)) + + +def test_filter_aggregation_as_nested_agg() -> None: + a = aggs.Terms(field="tags") + a.bucket("filtered", "filter", query.Q("term", f=42)) + + assert { + "terms": {"field": "tags"}, + "aggs": {"filtered": {"filter": {"term": {"f": 42}}}}, + } == a.to_dict() + + +def test_filter_aggregation_with_nested_aggs() -> None: + a = aggs.Filter(query.Q("term", f=42)) + a.bucket("testing", "terms", field="tags") + + assert { + "filter": {"term": {"f": 42}}, + "aggs": {"testing": {"terms": {"field": "tags"}}}, + } == a.to_dict() + + +def test_filters_correctly_identifies_the_hash() -> None: + a = aggs.A( + "filters", + filters={ + "group_a": {"term": {"group": "a"}}, + "group_b": {"term": {"group": "b"}}, + }, + ) + + assert { + "filters": { + "filters": { + "group_a": {"term": {"group": "a"}}, + "group_b": {"term": {"group": "b"}}, + } + } + } == a.to_dict() + assert a.filters.group_a == query.Q("term", group="a") + + +def test_bucket_sort_agg() -> None: + # test the dictionary (type ignored) and fully typed alterantives + bucket_sort_agg = aggs.BucketSort(sort=[{"total_sales": {"order": "desc"}}], size=3) # type: ignore + assert bucket_sort_agg.to_dict() == { + "bucket_sort": {"sort": [{"total_sales": {"order": "desc"}}], "size": 3} + } + bucket_sort_agg = aggs.BucketSort( + sort=[types.SortOptions("total_sales", types.FieldSort(order="desc"))], size=3 + ) + assert bucket_sort_agg.to_dict() == { + "bucket_sort": {"sort": [{"total_sales": {"order": "desc"}}], "size": 3} + } + + a = aggs.DateHistogram(field="date", interval="month") + a.bucket("total_sales", "sum", field="price") + a.bucket( + "sales_bucket_sort", + "bucket_sort", + sort=[{"total_sales": {"order": "desc"}}], + size=3, + ) + assert { + "date_histogram": {"field": "date", "interval": "month"}, + "aggs": { + "total_sales": {"sum": {"field": "price"}}, + "sales_bucket_sort": { + "bucket_sort": {"sort": [{"total_sales": {"order": "desc"}}], "size": 3} + }, + }, + } == a.to_dict() + + +def test_bucket_sort_agg_only_trnunc() -> None: + # test the dictionary (type ignored) and fully typed alterantives + bucket_sort_agg = aggs.BucketSort(**{"from": 1, "size": 1, "_expand__to_dot": False}) # type: ignore + assert bucket_sort_agg.to_dict() == {"bucket_sort": {"from": 1, "size": 1}} + bucket_sort_agg = aggs.BucketSort(from_=1, size=1, _expand__to_dot=False) + assert bucket_sort_agg.to_dict() == {"bucket_sort": {"from": 1, "size": 1}} + + a = aggs.DateHistogram(field="date", interval="month") + a.bucket("bucket_truncate", "bucket_sort", **{"from": 1, "size": 1}) + assert { + "date_histogram": {"field": "date", "interval": "month"}, + "aggs": {"bucket_truncate": {"bucket_sort": {"from": 1, "size": 1}}}, + } == a.to_dict() + + +def test_geohash_grid_aggregation() -> None: + # test the dictionary (type ignored) and fully typed alterantives + a = aggs.GeohashGrid(**{"field": "centroid", "precision": 3}) # type: ignore + assert {"geohash_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + a = aggs.GeohashGrid(field="centroid", precision=3) + assert {"geohash_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + + +def test_geohex_grid_aggregation() -> None: + # test the dictionary (type ignored) and fully typed alterantives + a = aggs.GeohexGrid(**{"field": "centroid", "precision": 3}) # type: ignore + assert {"geohex_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + a = aggs.GeohexGrid(field="centroid", precision=3) + assert {"geohex_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + + +def test_geotile_grid_aggregation() -> None: + # test the dictionary (type ignored) and fully typed alterantives + a = aggs.GeotileGrid(**{"field": "centroid", "precision": 3}) # type: ignore + assert {"geotile_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + a = aggs.GeotileGrid(field="centroid", precision=3) + assert {"geotile_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + + +def test_boxplot_aggregation() -> None: + a = aggs.Boxplot(field="load_time") + + assert {"boxplot": {"field": "load_time"}} == a.to_dict() + + +def test_rare_terms_aggregation() -> None: + a = aggs.RareTerms(field="the-field") + a.bucket("total_sales", "sum", field="price") + a.bucket( + "sales_bucket_sort", + "bucket_sort", + sort=[{"total_sales": {"order": "desc"}}], + size=3, + ) + + assert { + "aggs": { + "sales_bucket_sort": { + "bucket_sort": {"size": 3, "sort": [{"total_sales": {"order": "desc"}}]} + }, + "total_sales": {"sum": {"field": "price"}}, + }, + "rare_terms": {"field": "the-field"}, + } == a.to_dict() + + +def test_variable_width_histogram_aggregation() -> None: + a = aggs.VariableWidthHistogram(field="price", buckets=2) + assert {"variable_width_histogram": {"buckets": 2, "field": "price"}} == a.to_dict() + + +def test_ip_prefix_aggregation() -> None: + # test the dictionary (type ignored) and fully typed alterantives + a = aggs.IPPrefix(**{"field": "ipv4", "prefix_length": 24}) # type: ignore + assert {"ip_prefix": {"field": "ipv4", "prefix_length": 24}} == a.to_dict() + a = aggs.IPPrefix(field="ipv4", prefix_length=24) + assert {"ip_prefix": {"field": "ipv4", "prefix_length": 24}} == a.to_dict() + + +def test_ip_prefix_aggregation_extra() -> None: + a = aggs.IPPrefix(field="ipv6", prefix_length=64, is_ipv6=True) + + assert { + "ip_prefix": { + "field": "ipv6", + "prefix_length": 64, + "is_ipv6": True, + }, + } == a.to_dict() + + +def test_multi_terms_aggregation() -> None: + a = aggs.MultiTerms(terms=[{"field": "tags"}, {"field": "author.row"}]) + assert { + "multi_terms": { + "terms": [ + {"field": "tags"}, + {"field": "author.row"}, + ] + } + } == a.to_dict() + a = aggs.MultiTerms( + terms=[ + types.MultiTermLookup(field="tags"), + types.MultiTermLookup(field="author.row"), + ] + ) + assert { + "multi_terms": { + "terms": [ + {"field": "tags"}, + {"field": "author.row"}, + ] + } + } == a.to_dict() + + +def test_categorize_text_aggregation() -> None: + a = aggs.CategorizeText( + field="tags", + categorization_filters=["\\w+\\_\\d{3}"], + max_matched_tokens=2, + similarity_threshold=30, + ) + assert { + "categorize_text": { + "field": "tags", + "categorization_filters": ["\\w+\\_\\d{3}"], + "max_matched_tokens": 2, + "similarity_threshold": 30, + } + } == a.to_dict() + + +def test_median_absolute_deviation_aggregation() -> None: + a = aggs.MedianAbsoluteDeviation(field="rating") + + assert {"median_absolute_deviation": {"field": "rating"}} == a.to_dict() + + +def test_t_test_aggregation() -> None: + a = aggs.TTest( + a={"field": "startup_time_before"}, + b={"field": "startup_time_after"}, + type="paired", + ) + + assert { + "t_test": { + "a": {"field": "startup_time_before"}, + "b": {"field": "startup_time_after"}, + "type": "paired", + } + } == a.to_dict() + + +def test_geo_line_aggregation() -> None: + a = aggs.GeoLine(point={"field": "centroid"}, sort={"field": "date"}) + + assert { + "geo_line": { + "point": {"field": "centroid"}, + "sort": {"field": "date"}, + }, + } == a.to_dict() + + +def test_inference_aggregation() -> None: + a = aggs.Inference(model_id="model-id", buckets_path={"agg_name": "agg_name"}) + assert { + "inference": {"buckets_path": {"agg_name": "agg_name"}, "model_id": "model-id"} + } == a.to_dict() + + +def test_matrix_stats_aggregation() -> None: + a = aggs.MatrixStats(fields=["poverty", "income"]) + + assert {"matrix_stats": {"fields": ["poverty", "income"]}} == a.to_dict() + + +def test_moving_percentiles_aggregation() -> None: + a = aggs.DateHistogram() + a.bucket("the_percentile", "percentiles", field="price", percents=[1.0, 99.0]) + a.pipeline( + "the_movperc", "moving_percentiles", buckets_path="the_percentile", window=10 + ) + + assert { + "aggs": { + "the_movperc": { + "moving_percentiles": {"buckets_path": "the_percentile", "window": 10} + }, + "the_percentile": { + "percentiles": {"field": "price", "percents": [1.0, 99.0]} + }, + }, + "date_histogram": {}, + } == a.to_dict() + + +def test_normalize_aggregation() -> None: + a = aggs.Normalize(buckets_path="normalized", method="percent_of_sum") + assert { + "normalize": {"buckets_path": "normalized", "method": "percent_of_sum"} + } == a.to_dict() + + +def test_random_sampler_aggregation() -> None: + a = aggs.RandomSampler(probability=0.1).metric( + "price_percentiles", + "percentiles", + field="price", + ) + + assert { + "random_sampler": { + "probability": 0.1, + }, + "aggs": { + "price_percentiles": { + "percentiles": {"field": "price"}, + }, + }, + } == a.to_dict() + + +def test_adjancecy_matrix_aggregation() -> None: + a = aggs.AdjacencyMatrix(filters={"grpA": {"terms": {"accounts": ["hillary", "sidney"]}}, "grpB": {"terms": {"accounts": ["donald", "mitt"]}}, "grpC": {"terms": {"accounts": ["vladimir", "nigel"]}}}) # type: ignore + assert { + "adjacency_matrix": { + "filters": { + "grpA": {"terms": {"accounts": ["hillary", "sidney"]}}, + "grpB": {"terms": {"accounts": ["donald", "mitt"]}}, + "grpC": {"terms": {"accounts": ["vladimir", "nigel"]}}, + } + } + } == a.to_dict() + a = aggs.AdjacencyMatrix( + filters={ + "grpA": query.Terms(accounts=["hillary", "sidney"]), + "grpB": query.Terms(accounts=["donald", "mitt"]), + "grpC": query.Terms(accounts=["vladimir", "nigel"]), + } + ) + assert { + "adjacency_matrix": { + "filters": { + "grpA": {"terms": {"accounts": ["hillary", "sidney"]}}, + "grpB": {"terms": {"accounts": ["donald", "mitt"]}}, + "grpC": {"terms": {"accounts": ["vladimir", "nigel"]}}, + } + } + } == a.to_dict() + + +def test_top_metrics_aggregation() -> None: + # test the dictionary (type ignored) and fully typed alterantives + a = aggs.TopMetrics(metrics={"field": "m"}, sort={"s": "desc"}) # type: ignore + assert { + "top_metrics": {"metrics": {"field": "m"}, "sort": {"s": "desc"}} + } == a.to_dict() + a = aggs.TopMetrics( + metrics=types.TopMetricsValue(field="m"), + sort=types.SortOptions("s", types.FieldSort(order="desc")), + ) + assert { + "top_metrics": {"metrics": {"field": "m"}, "sort": {"s": {"order": "desc"}}} + } == a.to_dict() + + +def test_bucket_agg_with_filter() -> None: + b = aggs.Filter(query.Terms(something=[1, 2, 3])) + + a = aggs.Terms(field="some_field", size=100) + a.bucket("b", b) + + assert a.aggs["b"] == a["b"] # a['b'] threw exception before patch #1902 diff --git a/test_elasticsearch/test_dsl/test_analysis.py b/test_elasticsearch/test_dsl/test_analysis.py new file mode 100644 index 000000000..47a08672d --- /dev/null +++ b/test_elasticsearch/test_dsl/test_analysis.py @@ -0,0 +1,216 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from elasticsearch.dsl import analysis + + +def test_analyzer_serializes_as_name() -> None: + a = analysis.analyzer("my_analyzer") + + assert "my_analyzer" == a.to_dict() # type: ignore + + +def test_analyzer_has_definition() -> None: + a = analysis.CustomAnalyzer( + "my_analyzer", tokenizer="keyword", filter=["lowercase"] + ) + + assert { + "type": "custom", + "tokenizer": "keyword", + "filter": ["lowercase"], + } == a.get_definition() + + +def test_simple_multiplexer_filter() -> None: + a = analysis.analyzer( + "my_analyzer", + tokenizer="keyword", + filter=[ + analysis.token_filter( + "my_multi", "multiplexer", filters=["lowercase", "lowercase, stop"] + ) + ], + ) + + assert { + "analyzer": { + "my_analyzer": { + "filter": ["my_multi"], + "tokenizer": "keyword", + "type": "custom", + } + }, + "filter": { + "my_multi": { + "filters": ["lowercase", "lowercase, stop"], + "type": "multiplexer", + } + }, + } == a.get_analysis_definition() + + +def test_multiplexer_with_custom_filter() -> None: + a = analysis.analyzer( + "my_analyzer", + tokenizer="keyword", + filter=[ + analysis.token_filter( + "my_multi", + "multiplexer", + filters=[ + [analysis.token_filter("en", "snowball", language="English")], + "lowercase, stop", + ], + ) + ], + ) + + assert { + "analyzer": { + "my_analyzer": { + "filter": ["my_multi"], + "tokenizer": "keyword", + "type": "custom", + } + }, + "filter": { + "en": {"type": "snowball", "language": "English"}, + "my_multi": {"filters": ["en", "lowercase, stop"], "type": "multiplexer"}, + }, + } == a.get_analysis_definition() + + +def test_conditional_token_filter() -> None: + a = analysis.analyzer( + "my_cond", + tokenizer=analysis.tokenizer("keyword"), + filter=[ + analysis.token_filter( + "testing", + "condition", + script={"source": "return true"}, + filter=[ + "lowercase", + analysis.token_filter("en", "snowball", language="English"), + ], + ), + "stop", + ], + ) + + assert { + "analyzer": { + "my_cond": { + "filter": ["testing", "stop"], + "tokenizer": "keyword", + "type": "custom", + } + }, + "filter": { + "en": {"language": "English", "type": "snowball"}, + "testing": { + "script": {"source": "return true"}, + "filter": ["lowercase", "en"], + "type": "condition", + }, + }, + } == a.get_analysis_definition() + + +def test_conflicting_nested_filters_cause_error() -> None: + a = analysis.analyzer( + "my_cond", + tokenizer=analysis.tokenizer("keyword"), + filter=[ + analysis.token_filter("en", "stemmer", language="english"), + analysis.token_filter( + "testing", + "condition", + script={"source": "return true"}, + filter=[ + "lowercase", + analysis.token_filter("en", "snowball", language="English"), + ], + ), + ], + ) + + with raises(ValueError): + a.get_analysis_definition() + + +def test_normalizer_serializes_as_name() -> None: + n = analysis.normalizer("my_normalizer") + + assert "my_normalizer" == n.to_dict() # type: ignore + + +def test_normalizer_has_definition() -> None: + n = analysis.CustomNormalizer( + "my_normalizer", filter=["lowercase", "asciifolding"], char_filter=["quote"] + ) + + assert { + "type": "custom", + "filter": ["lowercase", "asciifolding"], + "char_filter": ["quote"], + } == n.get_definition() + + +def test_tokenizer() -> None: + t = analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3) + + assert t.to_dict() == "trigram" # type: ignore + assert {"type": "nGram", "min_gram": 3, "max_gram": 3} == t.get_definition() + + +def test_custom_analyzer_can_collect_custom_items() -> None: + trigram = analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3) + my_stop = analysis.token_filter("my_stop", "stop", stopwords=["a", "b"]) + umlauts = analysis.char_filter("umlauts", "pattern_replace", mappings=["ü=>ue"]) + a = analysis.analyzer( + "my_analyzer", + tokenizer=trigram, + filter=["lowercase", my_stop], + char_filter=["html_strip", umlauts], + ) + + assert a.to_dict() == "my_analyzer" # type: ignore + assert { + "analyzer": { + "my_analyzer": { + "type": "custom", + "tokenizer": "trigram", + "filter": ["lowercase", "my_stop"], + "char_filter": ["html_strip", "umlauts"], + } + }, + "tokenizer": {"trigram": trigram.get_definition()}, + "filter": {"my_stop": my_stop.get_definition()}, + "char_filter": {"umlauts": umlauts.get_definition()}, + } == a.get_analysis_definition() + + +def test_stemmer_analyzer_can_pass_name() -> None: + t = analysis.token_filter( + "my_english_filter", name="minimal_english", type="stemmer" + ) + assert t.to_dict() == "my_english_filter" # type: ignore + assert {"type": "stemmer", "name": "minimal_english"} == t.get_definition() diff --git a/test_elasticsearch/test_dsl/test_connections.py b/test_elasticsearch/test_dsl/test_connections.py new file mode 100644 index 000000000..96706d298 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_connections.py @@ -0,0 +1,143 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, List + +from elasticsearch import Elasticsearch +from pytest import raises + +from elasticsearch.dsl import connections, serializer + + +class DummyElasticsearch: + def __init__(self, *args: Any, hosts: List[str], **kwargs: Any): + self.hosts = hosts + + +def test_default_connection_is_returned_by_default() -> None: + c = connections.Connections[object](elasticsearch_class=object) + + con, con2 = object(), object() + c.add_connection("default", con) + + c.add_connection("not-default", con2) + + assert c.get_connection() is con + + +def test_get_connection_created_connection_if_needed() -> None: + c = connections.Connections[DummyElasticsearch]( + elasticsearch_class=DummyElasticsearch + ) + c.configure( + default={"hosts": ["https://es.com:9200"]}, + local={"hosts": ["https://localhost:9200"]}, + ) + + default = c.get_connection() + local = c.get_connection("local") + + assert isinstance(default, DummyElasticsearch) + assert isinstance(local, DummyElasticsearch) + + assert default.hosts == ["https://es.com:9200"] + assert local.hosts == ["https://localhost:9200"] + + +def test_configure_preserves_unchanged_connections() -> None: + c = connections.Connections[DummyElasticsearch]( + elasticsearch_class=DummyElasticsearch + ) + + c.configure( + default={"hosts": ["https://es.com:9200"]}, + local={"hosts": ["https://localhost:9200"]}, + ) + default = c.get_connection() + local = c.get_connection("local") + + c.configure( + default={"hosts": ["https://not-es.com:9200"]}, + local={"hosts": ["https://localhost:9200"]}, + ) + new_default = c.get_connection() + new_local = c.get_connection("local") + + assert new_local is local + assert new_default is not default + + +def test_remove_connection_removes_both_conn_and_conf() -> None: + c = connections.Connections[object](elasticsearch_class=DummyElasticsearch) + + c.configure( + default={"hosts": ["https://es.com:9200"]}, + local={"hosts": ["https://localhost:9200"]}, + ) + c.add_connection("local2", object()) + + c.remove_connection("default") + c.get_connection("local2") + c.remove_connection("local2") + + with raises(Exception): + c.get_connection("local2") + c.get_connection("default") + + +def test_create_connection_constructs_client() -> None: + c = connections.Connections[DummyElasticsearch]( + elasticsearch_class=DummyElasticsearch + ) + c.create_connection("testing", hosts=["https://es.com:9200"]) + + con = c.get_connection("testing") + assert con.hosts == ["https://es.com:9200"] + + +def test_create_connection_adds_our_serializer() -> None: + c = connections.Connections[Elasticsearch](elasticsearch_class=Elasticsearch) + c.create_connection("testing", hosts=["https://es.com:9200"]) + + c_serializers = c.get_connection("testing").transport.serializers + assert c_serializers.serializers["application/json"] is serializer.serializer + + +def test_connection_has_correct_user_agent() -> None: + c = connections.Connections[Elasticsearch](elasticsearch_class=Elasticsearch) + + c.create_connection("testing", hosts=["https://es.com:9200"]) + assert ( + c.get_connection("testing") + ._headers["user-agent"] + .startswith("elasticsearch-dsl-py/") + ) + + my_client = Elasticsearch(hosts=["http://localhost:9200"]) + my_client = my_client.options(headers={"user-agent": "my-user-agent/1.0"}) + c.add_connection("default", my_client) + assert c.get_connection()._headers["user-agent"].startswith("elasticsearch-dsl-py/") + + my_client = Elasticsearch(hosts=["http://localhost:9200"]) + assert ( + c.get_connection(my_client) + ._headers["user-agent"] + .startswith("elasticsearch-dsl-py/") + ) + + not_a_client = object() + assert c.get_connection(not_a_client) == not_a_client # type: ignore[arg-type] diff --git a/test_elasticsearch/test_dsl/test_field.py b/test_elasticsearch/test_dsl/test_field.py new file mode 100644 index 000000000..423936ae3 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_field.py @@ -0,0 +1,234 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import base64 +import ipaddress +from datetime import date, datetime, time +from typing import cast + +import pytest +from dateutil import tz + +from elasticsearch.dsl import InnerDoc, Range, ValidationException, field + + +def test_date_range_deserialization() -> None: + data = {"lt": "2018-01-01T00:30:10"} + + r = field.DateRange().deserialize(data) + + assert isinstance(r, Range) + assert r.lt == datetime(2018, 1, 1, 0, 30, 10) + + +def test_boolean_deserialization() -> None: + bf = field.Boolean() + + assert not bf.deserialize("false") + assert not bf.deserialize(False) + assert not bf.deserialize("") + assert not bf.deserialize(0) + + assert bf.deserialize(True) + assert bf.deserialize("true") + assert bf.deserialize(1) + + +def test_datetime_deserialization() -> None: + f = field.Date() + dt = datetime.now() + assert dt == f._deserialize(dt.isoformat()) + + d = date.today() + assert datetime.combine(d, time()) == f._deserialize(d.isoformat()) + + +def test_date_deserialization() -> None: + f = field.Date(format="yyyy-MM-dd") + d = date.today() + assert d == f._deserialize(d.isoformat()) + + dt = datetime.now() + assert dt.date() == f._deserialize(dt.isoformat()) + + +def test_date_field_can_have_default_tz() -> None: + f = field.Date(default_timezone="UTC") + now = datetime.now() + + now_with_tz = cast(datetime, f._deserialize(now)) + + assert now_with_tz.tzinfo == tz.gettz("UTC") + assert now.isoformat() + "+00:00" == now_with_tz.isoformat() + + now_with_tz = cast(datetime, f._deserialize(now.isoformat())) + + assert now_with_tz.tzinfo == tz.gettz("UTC") + assert now.isoformat() + "+00:00" == now_with_tz.isoformat() + + +def test_custom_field_car_wrap_other_field() -> None: + class MyField(field.CustomField): + @property + def builtin_type(self) -> field.Text: + return field.Text(**self._params) + + assert {"type": "text", "index": "not_analyzed"} == MyField( + index="not_analyzed" + ).to_dict() + + +def test_field_from_dict() -> None: + f = field.construct_field({"type": "text", "index": "not_analyzed"}) + + assert isinstance(f, field.Text) + assert {"type": "text", "index": "not_analyzed"} == f.to_dict() + + +def test_multi_fields_are_accepted_and_parsed() -> None: + f = field.construct_field( + "text", + fields={"raw": {"type": "keyword"}, "eng": field.Text(analyzer="english")}, + ) + + assert isinstance(f, field.Text) + assert { + "type": "text", + "fields": { + "raw": {"type": "keyword"}, + "eng": {"type": "text", "analyzer": "english"}, + }, + } == f.to_dict() + + +def test_nested_provides_direct_access_to_its_fields() -> None: + f = field.Nested(properties={"name": {"type": "text", "index": "not_analyzed"}}) + + assert "name" in f + assert f["name"] == field.Text(index="not_analyzed") + + +def test_field_supports_multiple_analyzers() -> None: + f = field.Text(analyzer="snowball", search_analyzer="keyword") + assert { + "analyzer": "snowball", + "search_analyzer": "keyword", + "type": "text", + } == f.to_dict() + + +def test_multifield_supports_multiple_analyzers() -> None: + f = field.Text( + fields={ + "f1": field.Text(search_analyzer="keyword", analyzer="snowball"), + "f2": field.Text(analyzer="keyword"), + } + ) + assert { + "fields": { + "f1": { + "analyzer": "snowball", + "search_analyzer": "keyword", + "type": "text", + }, + "f2": {"analyzer": "keyword", "type": "text"}, + }, + "type": "text", + } == f.to_dict() + + +def test_scaled_float() -> None: + with pytest.raises(TypeError): + field.ScaledFloat() # type: ignore + f = field.ScaledFloat(123) + assert f.to_dict() == {"scaling_factor": 123, "type": "scaled_float"} + + +def test_ipaddress() -> None: + f = field.Ip() + assert f.deserialize("127.0.0.1") == ipaddress.ip_address("127.0.0.1") + assert f.deserialize("::1") == ipaddress.ip_address("::1") + assert f.serialize(f.deserialize("::1")) == "::1" + assert f.deserialize(None) is None + with pytest.raises(ValueError): + assert f.deserialize("not_an_ipaddress") + + +def test_float() -> None: + f = field.Float() + assert f.deserialize("42") == 42.0 + assert f.deserialize(None) is None + with pytest.raises(ValueError): + assert f.deserialize("not_a_float") + + +def test_integer() -> None: + f = field.Integer() + assert f.deserialize("42") == 42 + assert f.deserialize(None) is None + with pytest.raises(ValueError): + assert f.deserialize("not_an_integer") + + +def test_binary() -> None: + f = field.Binary() + assert f.deserialize(base64.b64encode(b"42")) == b"42" + assert f.deserialize(f.serialize(b"42")) == b"42" + assert f.deserialize(None) is None + + +def test_constant_keyword() -> None: + f = field.ConstantKeyword() + assert f.to_dict() == {"type": "constant_keyword"} + + +def test_rank_features() -> None: + f = field.RankFeatures() + assert f.to_dict() == {"type": "rank_features"} + + +def test_object_dynamic_values() -> None: + f = field.Object(dynamic=True) + assert f.to_dict()["dynamic"] is True + f = field.Object(dynamic=False) + assert f.to_dict()["dynamic"] is False + f = field.Object(dynamic="strict") + assert f.to_dict()["dynamic"] == "strict" + + +def test_object_disabled() -> None: + f = field.Object(enabled=False) + assert f.to_dict() == {"type": "object", "enabled": False} + + +def test_object_constructor() -> None: + expected = {"type": "object", "properties": {"inner_int": {"type": "integer"}}} + + class Inner(InnerDoc): + inner_int = field.Integer() + + obj_from_doc = field.Object(doc_class=Inner) + assert obj_from_doc.to_dict() == expected + + obj_from_props = field.Object(properties={"inner_int": field.Integer()}) + assert obj_from_props.to_dict() == expected + + with pytest.raises(ValidationException): + field.Object(doc_class=Inner, properties={"inner_int": field.Integer()}) + + with pytest.raises(ValidationException): + field.Object(doc_class=Inner, dynamic=False) diff --git a/test_elasticsearch/test_dsl/test_integration/__init__.py b/test_elasticsearch/test_dsl/test_integration/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/test_integration/_async/__init__.py b/test_elasticsearch/test_dsl/test_integration/_async/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_analysis.py b/test_elasticsearch/test_dsl/test_integration/_async/test_analysis.py new file mode 100644 index 000000000..1feae56cf --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_analysis.py @@ -0,0 +1,54 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from elasticsearch.dsl import analyzer, token_filter, tokenizer + + +@pytest.mark.asyncio +async def test_simulate_with_just__builtin_tokenizer( + async_client: AsyncElasticsearch, +) -> None: + a = analyzer("my-analyzer", tokenizer="keyword") + tokens = (await a.async_simulate("Hello World!", using=async_client)).tokens + + assert len(tokens) == 1 + assert tokens[0].token == "Hello World!" + + +@pytest.mark.asyncio +async def test_simulate_complex(async_client: AsyncElasticsearch) -> None: + a = analyzer( + "my-analyzer", + tokenizer=tokenizer("split_words", "simple_pattern_split", pattern=":"), + filter=["lowercase", token_filter("no-ifs", "stop", stopwords=["if"])], + ) + + tokens = (await a.async_simulate("if:this:works", using=async_client)).tokens + + assert len(tokens) == 2 + assert ["this", "works"] == [t.token for t in tokens] + + +@pytest.mark.asyncio +async def test_simulate_builtin(async_client: AsyncElasticsearch) -> None: + a = analyzer("my-analyzer", "english") + tokens = (await a.async_simulate("fixes running")).tokens + + assert ["fix", "run"] == [t.token for t in tokens] diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_document.py b/test_elasticsearch/test_dsl/test_integration/_async/test_document.py new file mode 100644 index 000000000..83b683e1e --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_document.py @@ -0,0 +1,852 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this file creates several documents using bad or no types because +# these are still supported and should be kept functional in spite +# of not having appropriate type hints. For that reason the comment +# below disables many mypy checks that fails as a result of this. +# mypy: disable-error-code="assignment, index, arg-type, call-arg, operator, comparison-overlap, attr-defined" + +from datetime import datetime +from ipaddress import ip_address +from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, List, Tuple, Union + +import pytest +from elasticsearch import AsyncElasticsearch, ConflictError, NotFoundError +from elasticsearch.helpers.errors import BulkIndexError +from pytest import raises +from pytz import timezone + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncSearch, + Binary, + Boolean, + Date, + DenseVector, + Double, + InnerDoc, + Ip, + Keyword, + Long, + Mapping, + MetaField, + Nested, + Object, + Q, + RankFeatures, + Text, + analyzer, + mapped_field, +) +from elasticsearch.dsl.utils import AttrList + +snowball = analyzer("my_snow", tokenizer="standard", filter=["lowercase", "snowball"]) + + +class User(InnerDoc): + name = Text(fields={"raw": Keyword()}) + + +class Wiki(AsyncDocument): + owner = Object(User) + views = Long() + ranked = RankFeatures() + + class Index: + name = "test-wiki" + + +class Repository(AsyncDocument): + owner = Object(User) + created_at = Date() + description = Text(analyzer=snowball) + tags = Keyword() + + @classmethod + def search(cls) -> AsyncSearch["Repository"]: # type: ignore[override] + return super().search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(AsyncDocument): + committed_date = Date() + authored_date = Date() + description = Text(analyzer=snowball) + + class Index: + name = "flat-git" + + class Meta: + mapping = Mapping() + + +class History(InnerDoc): + timestamp = Date() + diff = Text() + + +class Comment(InnerDoc): + content = Text() + created_at = Date() + author = Object(User) + history = Nested(History) + + class Meta: + dynamic = MetaField(False) + + +class PullRequest(AsyncDocument): + comments = Nested(Comment) + created_at = Date() + + class Index: + name = "test-prs" + + +class SerializationDoc(AsyncDocument): + i = Long() + b = Boolean() + d = Double() + bin = Binary() + ip = Ip() + + class Index: + name = "test-serialization" + + +class Tags(AsyncDocument): + tags = Keyword(multi=True) + + class Index: + name = "tags" + + +@pytest.mark.asyncio +async def test_serialization(async_write_client: AsyncElasticsearch) -> None: + await SerializationDoc.init() + await async_write_client.index( + index="test-serialization", + id=42, + body={ + "i": [1, 2, "3", None], + "b": [True, False, "true", "false", None], + "d": [0.1, "-0.1", None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "ip": ["::1", "127.0.0.1", None], + }, + ) + sd = await SerializationDoc.get(id=42) + assert sd is not None + + assert sd.i == [1, 2, 3, None] + assert sd.b == [True, False, True, False, None] + assert sd.d == [0.1, -0.1, None] + assert sd.bin == [b"Hello World", None] + assert sd.ip == [ip_address("::1"), ip_address("127.0.0.1"), None] + + assert sd.to_dict() == { + "b": [True, False, True, False, None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "d": [0.1, -0.1, None], + "i": [1, 2, 3, None], + "ip": ["::1", "127.0.0.1", None], + } + + +@pytest.mark.asyncio +async def test_nested_inner_hits_are_wrapped_properly(async_pull_request: Any) -> None: + history_query = Q( + "nested", + path="comments.history", + inner_hits={}, + query=Q("match", comments__history__diff="ahoj"), + ) + s = PullRequest.search().query( + "nested", inner_hits={}, path="comments", query=history_query + ) + + response = await s.execute() + pr = response.hits[0] + assert isinstance(pr, PullRequest) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].history[0], History) + + comment = pr.meta.inner_hits.comments.hits[0] + assert isinstance(comment, Comment) + assert comment.author.name == "honzakral" + assert isinstance(comment.history[0], History) + + history = comment.meta.inner_hits["comments.history"].hits[0] + assert isinstance(history, History) + assert history.timestamp == datetime(2012, 1, 1) + assert "score" in history.meta + + +@pytest.mark.asyncio +async def test_nested_inner_hits_are_deserialized_properly( + async_pull_request: Any, +) -> None: + s = PullRequest.search().query( + "nested", + inner_hits={}, + path="comments", + query=Q("match", comments__content="hello"), + ) + + response = await s.execute() + pr = response.hits[0] + assert isinstance(pr.created_at, datetime) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].created_at, datetime) + + +@pytest.mark.asyncio +async def test_nested_top_hits_are_wrapped_properly(async_pull_request: Any) -> None: + s = PullRequest.search() + s.aggs.bucket("comments", "nested", path="comments").metric( + "hits", "top_hits", size=1 + ) + + r = await s.execute() + + print(r._d_) + assert isinstance(r.aggregations.comments.hits.hits[0], Comment) + + +@pytest.mark.asyncio +async def test_update_object_field(async_write_client: AsyncElasticsearch) -> None: + await Wiki.init() + w = Wiki( + owner=User(name="Honza Kral"), + _id="elasticsearch-py", + ranked={"test1": 0.1, "topic2": 0.2}, + ) + await w.save() + + assert "updated" == await w.update(owner=[{"name": "Honza"}, User(name="Nick")]) + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + w = await Wiki.get(id="elasticsearch-py") + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + assert w.ranked == {"test1": 0.1, "topic2": 0.2} + + +@pytest.mark.asyncio +async def test_update_script(async_write_client: AsyncElasticsearch) -> None: + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + await w.save() + + await w.update(script="ctx._source.views += params.inc", inc=5) + w = await Wiki.get(id="elasticsearch-py") + assert w.views == 47 + + +@pytest.mark.asyncio +async def test_update_script_with_dict(async_write_client: AsyncElasticsearch) -> None: + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + await w.save() + + await w.update( + script={ + "source": "ctx._source.views += params.inc1 + params.inc2", + "params": {"inc1": 2}, + "lang": "painless", + }, + inc2=3, + ) + w = await Wiki.get(id="elasticsearch-py") + assert w.views == 47 + + +@pytest.mark.asyncio +async def test_update_retry_on_conflict(async_write_client: AsyncElasticsearch) -> None: + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + await w.save() + + w1 = await Wiki.get(id="elasticsearch-py") + w2 = await Wiki.get(id="elasticsearch-py") + assert w1 is not None + assert w2 is not None + + await w1.update( + script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1 + ) + await w2.update( + script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1 + ) + + w = await Wiki.get(id="elasticsearch-py") + assert w.views == 52 + + +@pytest.mark.asyncio +@pytest.mark.parametrize("retry_on_conflict", [None, 0]) +async def test_update_conflicting_version( + async_write_client: AsyncElasticsearch, retry_on_conflict: bool +) -> None: + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + await w.save() + + w1 = await Wiki.get(id="elasticsearch-py") + w2 = await Wiki.get(id="elasticsearch-py") + assert w1 is not None + assert w2 is not None + + await w1.update(script="ctx._source.views += params.inc", inc=5) + + with raises(ConflictError): + await w2.update( + script="ctx._source.views += params.inc", + inc=5, + retry_on_conflict=retry_on_conflict, + ) + + +@pytest.mark.asyncio +async def test_save_and_update_return_doc_meta( + async_write_client: AsyncElasticsearch, +) -> None: + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + resp = await w.save(return_doc_meta=True) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "created" + assert set(resp.keys()) == { + "_id", + "_index", + "_primary_term", + "_seq_no", + "_shards", + "_version", + "result", + } + + resp = await w.update( + script="ctx._source.views += params.inc", inc=5, return_doc_meta=True + ) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "updated" + assert set(resp.keys()) == { + "_id", + "_index", + "_primary_term", + "_seq_no", + "_shards", + "_version", + "result", + } + + +@pytest.mark.asyncio +async def test_init(async_write_client: AsyncElasticsearch) -> None: + await Repository.init(index="test-git") + + assert await async_write_client.indices.exists(index="test-git") + + +@pytest.mark.asyncio +async def test_get_raises_404_on_index_missing( + async_data_client: AsyncElasticsearch, +) -> None: + with raises(NotFoundError): + await Repository.get("elasticsearch-dsl-php", index="not-there") + + +@pytest.mark.asyncio +async def test_get_raises_404_on_non_existent_id( + async_data_client: AsyncElasticsearch, +) -> None: + with raises(NotFoundError): + await Repository.get("elasticsearch-dsl-php") + + +@pytest.mark.asyncio +async def test_get_returns_none_if_404_ignored( + async_data_client: AsyncElasticsearch, +) -> None: + assert None is await Repository.get( + "elasticsearch-dsl-php", using=async_data_client.options(ignore_status=404) + ) + + +@pytest.mark.asyncio +async def test_get_returns_none_if_404_ignored_and_index_doesnt_exist( + async_data_client: AsyncElasticsearch, +) -> None: + assert None is await Repository.get( + "42", index="not-there", using=async_data_client.options(ignore_status=404) + ) + + +@pytest.mark.asyncio +async def test_get(async_data_client: AsyncElasticsearch) -> None: + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + + assert isinstance(elasticsearch_repo, Repository) + assert elasticsearch_repo.owner.name == "elasticsearch" + assert datetime(2014, 3, 3) == elasticsearch_repo.created_at + + +@pytest.mark.asyncio +async def test_exists_return_true(async_data_client: AsyncElasticsearch) -> None: + assert await Repository.exists("elasticsearch-dsl-py") + + +@pytest.mark.asyncio +async def test_exists_false(async_data_client: AsyncElasticsearch) -> None: + assert not await Repository.exists("elasticsearch-dsl-php") + + +@pytest.mark.asyncio +async def test_get_with_tz_date(async_data_client: AsyncElasticsearch) -> None: + first_commit = await Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert first_commit is not None + + tzinfo = timezone("Europe/Prague") + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123000)) + == first_commit.authored_date + ) + + +@pytest.mark.asyncio +async def test_save_with_tz_date(async_data_client: AsyncElasticsearch) -> None: + tzinfo = timezone("Europe/Prague") + first_commit = await Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert first_commit is not None + + first_commit.committed_date = tzinfo.localize( + datetime(2014, 5, 2, 13, 47, 19, 123456) + ) + await first_commit.save() + + first_commit = await Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert first_commit is not None + + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123456)) + == first_commit.committed_date + ) + + +COMMIT_DOCS_WITH_MISSING = [ + {"_id": "0"}, # Missing + {"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"}, # Existing + {"_id": "f"}, # Missing + {"_id": "eb3e543323f189fd7b698e66295427204fff5755"}, # Existing +] + + +@pytest.mark.asyncio +async def test_mget(async_data_client: AsyncElasticsearch) -> None: + commits = await Commit.mget(COMMIT_DOCS_WITH_MISSING) + assert commits[0] is None + assert commits[1] is not None + assert commits[1].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[2] is None + assert commits[3] is not None + assert commits[3].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +@pytest.mark.asyncio +async def test_mget_raises_exception_when_missing_param_is_invalid( + async_data_client: AsyncElasticsearch, +) -> None: + with raises(ValueError): + await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raj") + + +@pytest.mark.asyncio +async def test_mget_raises_404_when_missing_param_is_raise( + async_data_client: AsyncElasticsearch, +) -> None: + with raises(NotFoundError): + await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raise") + + +@pytest.mark.asyncio +async def test_mget_ignores_missing_docs_when_missing_param_is_skip( + async_data_client: AsyncElasticsearch, +) -> None: + commits = await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="skip") + assert commits[0] is not None + assert commits[0].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[1] is not None + assert commits[1].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +@pytest.mark.asyncio +async def test_update_works_from_search_response( + async_data_client: AsyncElasticsearch, +) -> None: + elasticsearch_repo = (await Repository.search().execute())[0] + + await elasticsearch_repo.update(owner={"other_name": "elastic"}) + assert "elastic" == elasticsearch_repo.owner.other_name + + new_version = await Repository.get("elasticsearch-dsl-py") + assert new_version is not None + assert "elastic" == new_version.owner.other_name + assert "elasticsearch" == new_version.owner.name + + +@pytest.mark.asyncio +async def test_update(async_data_client: AsyncElasticsearch) -> None: + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + v = elasticsearch_repo.meta.version + + old_seq_no = elasticsearch_repo.meta.seq_no + await elasticsearch_repo.update( + owner={"new_name": "elastic"}, new_field="testing-update" + ) + + assert "elastic" == elasticsearch_repo.owner.new_name + assert "testing-update" == elasticsearch_repo.new_field + + # assert version has been updated + assert elasticsearch_repo.meta.version == v + 1 + + new_version = await Repository.get("elasticsearch-dsl-py") + assert new_version is not None + assert "testing-update" == new_version.new_field + assert "elastic" == new_version.owner.new_name + assert "elasticsearch" == new_version.owner.name + assert "seq_no" in new_version.meta + assert new_version.meta.seq_no != old_seq_no + assert "primary_term" in new_version.meta + + +@pytest.mark.asyncio +async def test_save_updates_existing_doc(async_data_client: AsyncElasticsearch) -> None: + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + + elasticsearch_repo.new_field = "testing-save" + old_seq_no = elasticsearch_repo.meta.seq_no + assert "updated" == await elasticsearch_repo.save() + + new_repo = await async_data_client.get(index="git", id="elasticsearch-dsl-py") + assert "testing-save" == new_repo["_source"]["new_field"] + assert new_repo["_seq_no"] != old_seq_no + assert new_repo["_seq_no"] == elasticsearch_repo.meta.seq_no + + +@pytest.mark.asyncio +async def test_update_empty_field(async_client: AsyncElasticsearch) -> None: + await Tags._index.delete(ignore_unavailable=True) + await Tags.init() + d = Tags(id="123", tags=["a", "b"]) + await d.save(refresh=True) + await d.update(tags=[], refresh=True) + assert d.tags == [] + + r = await Tags.search().execute() + assert r.hits[0].tags == [] + + +@pytest.mark.asyncio +async def test_save_automatically_uses_seq_no_and_primary_term( + async_data_client: AsyncElasticsearch, +) -> None: + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + elasticsearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + await elasticsearch_repo.save() + + +@pytest.mark.asyncio +async def test_delete_automatically_uses_seq_no_and_primary_term( + async_data_client: AsyncElasticsearch, +) -> None: + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + elasticsearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + await elasticsearch_repo.delete() + + +def assert_doc_equals(expected: Any, actual: Any) -> None: + for f in expected: + assert f in actual + assert actual[f] == expected[f] + + +@pytest.mark.asyncio +async def test_can_save_to_different_index( + async_write_client: AsyncElasticsearch, +) -> None: + test_repo = Repository(description="testing", meta={"id": 42}) + assert await test_repo.save(index="test-document") + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"description": "testing"}, + }, + await async_write_client.get(index="test-document", id=42), + ) + + +@pytest.mark.asyncio +async def test_save_without_skip_empty_will_include_empty_fields( + async_write_client: AsyncElasticsearch, +) -> None: + test_repo = Repository(field_1=[], field_2=None, field_3={}, meta={"id": 42}) + assert await test_repo.save(index="test-document", skip_empty=False) + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"field_1": [], "field_2": None, "field_3": {}}, + }, + await async_write_client.get(index="test-document", id=42), + ) + + +@pytest.mark.asyncio +async def test_delete(async_write_client: AsyncElasticsearch) -> None: + await async_write_client.create( + index="test-document", + id="elasticsearch-dsl-py", + body={ + "organization": "elasticsearch", + "created_at": "2014-03-03", + "owner": {"name": "elasticsearch"}, + }, + ) + + test_repo = Repository(meta={"id": "elasticsearch-dsl-py"}) + test_repo.meta.index = "test-document" + await test_repo.delete() + + assert not await async_write_client.exists( + index="test-document", + id="elasticsearch-dsl-py", + ) + + +@pytest.mark.asyncio +async def test_search(async_data_client: AsyncElasticsearch) -> None: + assert await Repository.search().count() == 1 + + +@pytest.mark.asyncio +async def test_search_returns_proper_doc_classes( + async_data_client: AsyncElasticsearch, +) -> None: + result = await Repository.search().execute() + + elasticsearch_repo = result.hits[0] + + assert isinstance(elasticsearch_repo, Repository) + assert elasticsearch_repo.owner.name == "elasticsearch" + + +@pytest.mark.asyncio +async def test_refresh_mapping(async_data_client: AsyncElasticsearch) -> None: + class Commit(AsyncDocument): + class Index: + name = "git" + + await Commit._index.load_mappings() + + assert "stats" in Commit._index._mapping + assert "committer" in Commit._index._mapping + assert "description" in Commit._index._mapping + assert "committed_date" in Commit._index._mapping + assert isinstance(Commit._index._mapping["committed_date"], Date) + + +@pytest.mark.asyncio +async def test_highlight_in_meta(async_data_client: AsyncElasticsearch) -> None: + commit = ( + await Commit.search() + .query("match", description="inverting") + .highlight("description") + .execute() + )[0] + + assert isinstance(commit, Commit) + assert "description" in commit.meta.highlight + assert isinstance(commit.meta.highlight["description"], AttrList) + assert len(commit.meta.highlight["description"]) > 0 + + +@pytest.mark.asyncio +async def test_bulk(async_data_client: AsyncElasticsearch) -> None: + class Address(InnerDoc): + street: str + active: bool + + class Doc(AsyncDocument): + if TYPE_CHECKING: + _id: int + name: str + age: int + languages: List[str] = mapped_field(Keyword()) + addresses: List[Address] + + class Index: + name = "bulk-index" + + await Doc._index.delete(ignore_unavailable=True) + await Doc.init() + + async def gen1() -> AsyncIterator[Union[Doc, Dict[str, Any]]]: + yield Doc( + name="Joe", + age=33, + languages=["en", "fr"], + addresses=[ + Address(street="123 Main St", active=True), + Address(street="321 Park Dr.", active=False), + ], + ) + yield Doc(name="Susan", age=20, languages=["en"]) + yield {"_op_type": "create", "_id": "45", "_source": Doc(name="Sarah", age=45)} + + await Doc.bulk(gen1(), refresh=True) + docs = list(await Doc.search().execute()) + assert len(docs) == 3 + assert docs[0].to_dict() == { + "name": "Joe", + "age": 33, + "languages": [ + "en", + "fr", + ], + "addresses": [ + { + "active": True, + "street": "123 Main St", + }, + { + "active": False, + "street": "321 Park Dr.", + }, + ], + } + assert docs[1].to_dict() == { + "name": "Susan", + "age": 20, + "languages": ["en"], + } + assert docs[2].to_dict() == { + "name": "Sarah", + "age": 45, + } + assert docs[2].meta.id == "45" + + async def gen2() -> AsyncIterator[Union[Doc, Dict[str, Any]]]: + yield {"_op_type": "create", "_id": "45", "_source": Doc(name="Sarah", age=45)} + + # a "create" action with an existing id should fail + with raises(BulkIndexError): + await Doc.bulk(gen2(), refresh=True) + + async def gen3() -> AsyncIterator[Union[Doc, Dict[str, Any]]]: + yield Doc(_id="45", name="Sarah", age=45, languages=["es"]) + yield {"_op_type": "delete", "_id": docs[1].meta.id} + + await Doc.bulk(gen3(), refresh=True) + with raises(NotFoundError): + await Doc.get(docs[1].meta.id) + doc = await Doc.get("45") + assert doc is not None + assert (doc).to_dict() == { + "name": "Sarah", + "age": 45, + "languages": ["es"], + } + + +@pytest.mark.asyncio +async def test_legacy_dense_vector( + async_client: AsyncElasticsearch, es_version: Tuple[int, ...] +) -> None: + if es_version >= (8, 16): + pytest.skip("this test is a legacy version for Elasticsearch 8.15 or older") + + class Doc(AsyncDocument): + float_vector: List[float] = mapped_field(DenseVector(dims=3)) + + class Index: + name = "vectors" + + await Doc._index.delete(ignore_unavailable=True) + await Doc.init() + + doc = Doc(float_vector=[1.0, 1.2, 2.3]) + await doc.save(refresh=True) + + docs = await Doc.search().execute() + assert len(docs) == 1 + assert docs[0].float_vector == doc.float_vector + + +@pytest.mark.asyncio +async def test_dense_vector( + async_client: AsyncElasticsearch, es_version: Tuple[int, ...] +) -> None: + if es_version < (8, 16): + pytest.skip("this test requires Elasticsearch 8.16 or newer") + + class Doc(AsyncDocument): + float_vector: List[float] = mapped_field(DenseVector()) + byte_vector: List[int] = mapped_field(DenseVector(element_type="byte")) + bit_vector: str = mapped_field(DenseVector(element_type="bit")) + + class Index: + name = "vectors" + + await Doc._index.delete(ignore_unavailable=True) + await Doc.init() + + doc = Doc( + float_vector=[1.0, 1.2, 2.3], byte_vector=[12, 23, 34, 45], bit_vector="12abf0" + ) + await doc.save(refresh=True) + + docs = await Doc.search().execute() + assert len(docs) == 1 + assert docs[0].float_vector == doc.float_vector + assert docs[0].byte_vector == doc.byte_vector + assert docs[0].bit_vector == doc.bit_vector diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py b/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py new file mode 100644 index 000000000..5efc7033e --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py @@ -0,0 +1,305 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime +from typing import Tuple, Type + +import pytest +from elasticsearch import AsyncElasticsearch + +from elasticsearch.dsl import A, AsyncDocument, AsyncSearch, Boolean, Date, Keyword +from elasticsearch.dsl.faceted_search import ( + AsyncFacetedSearch, + DateHistogramFacet, + NestedFacet, + RangeFacet, + TermsFacet, +) + +from .test_document import PullRequest + + +class Repos(AsyncDocument): + is_public = Boolean() + created_at = Date() + + class Index: + name = "git" + + +class Commit(AsyncDocument): + files = Keyword() + committed_date = Date() + + class Index: + name = "git" + + +class MetricSearch(AsyncFacetedSearch): + index = "git" + doc_types = [Commit] + + facets = { + "files": TermsFacet(field="files", metric=A("max", field="committed_date")), + } + + +@pytest.fixture(scope="session") +def commit_search_cls(es_version: Tuple[int, ...]) -> Type[AsyncFacetedSearch]: + if es_version >= (7, 2): + interval_kwargs = {"fixed_interval": "1d"} + else: + interval_kwargs = {"interval": "day"} + + class CommitSearch(AsyncFacetedSearch): + index = "flat-git" + fields = ( + "description", + "files", + ) + + facets = { + "files": TermsFacet(field="files"), + "frequency": DateHistogramFacet( + field="authored_date", min_doc_count=1, **interval_kwargs + ), + "deletions": RangeFacet( + field="stats.deletions", + ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))], + ), + } + + return CommitSearch + + +@pytest.fixture(scope="session") +def repo_search_cls(es_version: Tuple[int, ...]) -> Type[AsyncFacetedSearch]: + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class RepoSearch(AsyncFacetedSearch): + index = "git" + doc_types = [Repos] + facets = { + "public": TermsFacet(field="is_public"), + "created": DateHistogramFacet( + field="created_at", **{interval_type: "month"} + ), + } + + def search(self) -> AsyncSearch: + s = super().search() + return s.filter("term", commit_repo="repo") + + return RepoSearch + + +@pytest.fixture(scope="session") +def pr_search_cls(es_version: Tuple[int, ...]) -> Type[AsyncFacetedSearch]: + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class PRSearch(AsyncFacetedSearch): + index = "test-prs" + doc_types = [PullRequest] + facets = { + "comments": NestedFacet( + "comments", + DateHistogramFacet( + field="comments.created_at", **{interval_type: "month"} + ), + ) + } + + return PRSearch + + +@pytest.mark.asyncio +async def test_facet_with_custom_metric(async_data_client: AsyncElasticsearch) -> None: + ms = MetricSearch() + r = await ms.execute() + + dates = [f[1] for f in r.facets.files] + assert dates == list(sorted(dates, reverse=True)) + assert dates[0] == 1399038439000 + + +@pytest.mark.asyncio +async def test_nested_facet( + async_pull_request: PullRequest, pr_search_cls: Type[AsyncFacetedSearch] +) -> None: + prs = pr_search_cls() + r = await prs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments + + +@pytest.mark.asyncio +async def test_nested_facet_with_filter( + async_pull_request: PullRequest, pr_search_cls: Type[AsyncFacetedSearch] +) -> None: + prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)}) + r = await prs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments + + prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)}) + r = await prs.execute() + assert not r.hits + + +@pytest.mark.asyncio +async def test_datehistogram_facet( + async_data_client: AsyncElasticsearch, repo_search_cls: Type[AsyncFacetedSearch] +) -> None: + rs = repo_search_cls() + r = await rs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created + + +@pytest.mark.asyncio +async def test_boolean_facet( + async_data_client: AsyncElasticsearch, repo_search_cls: Type[AsyncFacetedSearch] +) -> None: + rs = repo_search_cls() + r = await rs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(True, 1, False)] == r.facets.public + value, count, selected = r.facets.public[0] + assert value is True + + +@pytest.mark.asyncio +async def test_empty_search_finds_everything( + async_data_client: AsyncElasticsearch, + es_version: Tuple[int, ...], + commit_search_cls: Type[AsyncFacetedSearch], +) -> None: + cs = commit_search_cls() + r = await cs.execute() + + assert r.hits.total.value == 52 # type: ignore[attr-defined] + assert [ + ("elasticsearch_dsl", 40, False), + ("test_elasticsearch_dsl", 35, False), + ("elasticsearch_dsl/query.py", 19, False), + ("test_elasticsearch_dsl/test_search.py", 15, False), + ("elasticsearch_dsl/utils.py", 14, False), + ("test_elasticsearch_dsl/test_query.py", 13, False), + ("elasticsearch_dsl/search.py", 12, False), + ("elasticsearch_dsl/aggs.py", 11, False), + ("test_elasticsearch_dsl/test_result.py", 5, False), + ("elasticsearch_dsl/result.py", 3, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 2, False), + (datetime(2014, 3, 4, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 3, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 9, False), + (datetime(2014, 3, 10, 0, 0), 2, False), + (datetime(2014, 3, 15, 0, 0), 4, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 2, False), + (datetime(2014, 3, 24, 0, 0), 10, False), + (datetime(2014, 4, 20, 0, 0), 2, False), + (datetime(2014, 4, 22, 0, 0), 2, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 2, False), + (datetime(2014, 5, 1, 0, 0), 2, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 19, False), + ("good", 14, False), + ("better", 19, False), + ] == r.facets.deletions + + +@pytest.mark.asyncio +async def test_term_filters_are_shown_as_selected_and_data_is_filtered( + async_data_client: AsyncElasticsearch, commit_search_cls: Type[AsyncFacetedSearch] +) -> None: + cs = commit_search_cls(filters={"files": "test_elasticsearch_dsl"}) + + r = await cs.execute() + + assert 35 == r.hits.total.value # type: ignore[attr-defined] + assert [ + ("elasticsearch_dsl", 40, False), + ("test_elasticsearch_dsl", 35, True), # selected + ("elasticsearch_dsl/query.py", 19, False), + ("test_elasticsearch_dsl/test_search.py", 15, False), + ("elasticsearch_dsl/utils.py", 14, False), + ("test_elasticsearch_dsl/test_query.py", 13, False), + ("elasticsearch_dsl/search.py", 12, False), + ("elasticsearch_dsl/aggs.py", 11, False), + ("test_elasticsearch_dsl/test_result.py", 5, False), + ("elasticsearch_dsl/result.py", 3, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 2, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 6, False), + (datetime(2014, 3, 10, 0, 0), 1, False), + (datetime(2014, 3, 15, 0, 0), 3, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 1, False), + (datetime(2014, 3, 24, 0, 0), 7, False), + (datetime(2014, 4, 20, 0, 0), 1, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 1, False), + (datetime(2014, 5, 1, 0, 0), 1, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 12, False), + ("good", 10, False), + ("better", 13, False), + ] == r.facets.deletions + + +@pytest.mark.asyncio +async def test_range_filters_are_shown_as_selected_and_data_is_filtered( + async_data_client: AsyncElasticsearch, commit_search_cls: Type[AsyncFacetedSearch] +) -> None: + cs = commit_search_cls(filters={"deletions": "better"}) + + r = await cs.execute() + + assert 19 == r.hits.total.value # type: ignore[attr-defined] + + +@pytest.mark.asyncio +async def test_pagination( + async_data_client: AsyncElasticsearch, commit_search_cls: Type[AsyncFacetedSearch] +) -> None: + cs = commit_search_cls() + cs = cs[0:20] + + assert 52 == await cs.count() + assert 20 == len(await cs.execute()) diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_index.py b/test_elasticsearch/test_dsl/test_integration/_async/test_index.py new file mode 100644 index 000000000..10c426e5d --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_index.py @@ -0,0 +1,162 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from elasticsearch.dsl import ( + AsyncComposableIndexTemplate, + AsyncDocument, + AsyncIndex, + AsyncIndexTemplate, + Date, + Text, + analysis, +) + + +class Post(AsyncDocument): + title = Text(analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword")) + published_from = Date() + + +@pytest.mark.asyncio +async def test_index_template_works(async_write_client: AsyncElasticsearch) -> None: + it = AsyncIndexTemplate("test-template", "test-legacy-*") + it.document(Post) + it.settings(number_of_replicas=0, number_of_shards=1) + await it.save() + + i = AsyncIndex("test-legacy-blog") + await i.create() + + assert { + "test-legacy-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == await async_write_client.indices.get_mapping(index="test-legacy-blog") + + +@pytest.mark.asyncio +async def test_composable_index_template_works( + async_write_client: AsyncElasticsearch, +) -> None: + it = AsyncComposableIndexTemplate("test-template", "test-*") + it.document(Post) + it.settings(number_of_replicas=0, number_of_shards=1) + await it.save() + + i = AsyncIndex("test-blog") + await i.create() + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == await async_write_client.indices.get_mapping(index="test-blog") + + +@pytest.mark.asyncio +async def test_index_can_be_saved_even_with_settings( + async_write_client: AsyncElasticsearch, +) -> None: + i = AsyncIndex("test-blog", using=async_write_client) + i.settings(number_of_shards=3, number_of_replicas=0) + await i.save() + i.settings(number_of_replicas=1) + await i.save() + + assert ( + "1" + == (await i.get_settings())["test-blog"]["settings"]["index"][ + "number_of_replicas" + ] + ) + + +@pytest.mark.asyncio +async def test_index_exists(async_data_client: AsyncElasticsearch) -> None: + assert await AsyncIndex("git").exists() + assert not await AsyncIndex("not-there").exists() + + +@pytest.mark.asyncio +async def test_index_can_be_created_with_settings_and_mappings( + async_write_client: AsyncElasticsearch, +) -> None: + i = AsyncIndex("test-blog", using=async_write_client) + i.document(Post) + i.settings(number_of_replicas=0, number_of_shards=1) + await i.create() + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == await async_write_client.indices.get_mapping(index="test-blog") + + settings = await async_write_client.indices.get_settings(index="test-blog") + assert settings["test-blog"]["settings"]["index"]["number_of_replicas"] == "0" + assert settings["test-blog"]["settings"]["index"]["number_of_shards"] == "1" + assert settings["test-blog"]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } + + +@pytest.mark.asyncio +async def test_delete(async_write_client: AsyncElasticsearch) -> None: + await async_write_client.indices.create( + index="test-index", + body={"settings": {"number_of_replicas": 0, "number_of_shards": 1}}, + ) + + i = AsyncIndex("test-index", using=async_write_client) + await i.delete() + assert not await async_write_client.indices.exists(index="test-index") + + +@pytest.mark.asyncio +async def test_multiple_indices_with_same_doc_type_work( + async_write_client: AsyncElasticsearch, +) -> None: + i1 = AsyncIndex("test-index-1", using=async_write_client) + i2 = AsyncIndex("test-index-2", using=async_write_client) + + for i in (i1, i2): + i.document(Post) + await i.create() + + for j in ("test-index-1", "test-index-2"): + settings = await async_write_client.indices.get_settings(index=j) + assert settings[j]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_mapping.py b/test_elasticsearch/test_dsl/test_integration/_async/test_mapping.py new file mode 100644 index 000000000..3f860ba59 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_mapping.py @@ -0,0 +1,171 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch +from pytest import raises + +from elasticsearch.dsl import AsyncMapping, analysis, exceptions + + +@pytest.mark.asyncio +async def test_mapping_saved_into_es(async_write_client: AsyncElasticsearch) -> None: + m = AsyncMapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + m.field("tags", "keyword") + await m.save("test-mapping", using=async_write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "tags": {"type": "keyword"}, + } + } + } + } == await async_write_client.indices.get_mapping(index="test-mapping") + + +@pytest.mark.asyncio +async def test_mapping_saved_into_es_when_index_already_exists_closed( + async_write_client: AsyncElasticsearch, +) -> None: + m = AsyncMapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + await async_write_client.indices.create(index="test-mapping") + + with raises(exceptions.IllegalOperation): + await m.save("test-mapping", using=async_write_client) + + await async_write_client.cluster.health( + index="test-mapping", wait_for_status="yellow" + ) + await async_write_client.indices.close(index="test-mapping") + await m.save("test-mapping", using=async_write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": {"name": {"type": "text", "analyzer": "my_analyzer"}} + } + } + } == await async_write_client.indices.get_mapping(index="test-mapping") + + +@pytest.mark.asyncio +async def test_mapping_saved_into_es_when_index_already_exists_with_analysis( + async_write_client: AsyncElasticsearch, +) -> None: + m = AsyncMapping() + analyzer = analysis.analyzer("my_analyzer", tokenizer="keyword") + m.field("name", "text", analyzer=analyzer) + + new_analysis = analyzer.get_analysis_definition() + new_analysis["analyzer"]["other_analyzer"] = { + "type": "custom", + "tokenizer": "whitespace", + } + await async_write_client.indices.create( + index="test-mapping", body={"settings": {"analysis": new_analysis}} + ) + + m.field("title", "text", analyzer=analyzer) + await m.save("test-mapping", using=async_write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "title": {"type": "text", "analyzer": "my_analyzer"}, + } + } + } + } == await async_write_client.indices.get_mapping(index="test-mapping") + + +@pytest.mark.asyncio +async def test_mapping_gets_updated_from_es( + async_write_client: AsyncElasticsearch, +) -> None: + await async_write_client.indices.create( + index="test-mapping", + body={ + "settings": {"number_of_shards": 1, "number_of_replicas": 0}, + "mappings": { + "date_detection": False, + "properties": { + "title": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + "created_at": {"type": "date"}, + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + }, + }, + }, + }, + }, + ) + + m = await AsyncMapping.from_es("test-mapping", using=async_write_client) + + assert ["comments", "created_at", "title"] == list( + sorted(m.properties.properties._d_.keys()) # type: ignore[attr-defined] + ) + assert { + "date_detection": False, + "properties": { + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + }, + "created_at": {"type": "date"}, + "title": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + } == m.to_dict() + + # test same with alias + await async_write_client.indices.put_alias(index="test-mapping", name="test-alias") + + m2 = await AsyncMapping.from_es("test-alias", using=async_write_client) + assert m2.to_dict() == m.to_dict() diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_search.py b/test_elasticsearch/test_dsl/test_integration/_async/test_search.py new file mode 100644 index 000000000..627656dfd --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_search.py @@ -0,0 +1,304 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import pytest +from elasticsearch import ApiError, AsyncElasticsearch +from pytest import raises + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncMultiSearch, + AsyncSearch, + Date, + Keyword, + Q, + Text, +) +from elasticsearch.dsl.response import aggs + +from ..test_data import FLAT_DATA + + +class Repository(AsyncDocument): + created_at = Date() + description = Text(analyzer="snowball") + tags = Keyword() + + @classmethod + def search(cls) -> AsyncSearch["Repository"]: # type: ignore[override] + return super().search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(AsyncDocument): + class Index: + name = "flat-git" + + +@pytest.mark.asyncio +async def test_filters_aggregation_buckets_are_accessible( + async_data_client: AsyncElasticsearch, +) -> None: + has_tests_query = Q("term", files="test_elasticsearch_dsl") + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").bucket( + "has_tests", "filters", filters={"yes": has_tests_query, "no": ~has_tests_query} + ).metric("lines", "stats", field="stats.lines") + + response = await s.execute() + + assert isinstance( + response.aggregations.top_authors.buckets[0].has_tests.buckets.yes, aggs.Bucket + ) + assert ( + 35 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.doc_count + ) + assert ( + 228 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.lines.max + ) + + +@pytest.mark.asyncio +async def test_top_hits_are_wrapped_in_response( + async_data_client: AsyncElasticsearch, +) -> None: + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").metric( + "top_commits", "top_hits", size=5 + ) + response = await s.execute() + + top_commits = response.aggregations.top_authors.buckets[0].top_commits + assert isinstance(top_commits, aggs.TopHitsData) + assert 5 == len(top_commits) + + hits = [h for h in top_commits] + assert 5 == len(hits) + assert isinstance(hits[0], Commit) + + +@pytest.mark.asyncio +async def test_inner_hits_are_wrapped_in_response( + async_data_client: AsyncElasticsearch, +) -> None: + s = AsyncSearch(index="git")[0:1].query( + "has_parent", parent_type="repo", inner_hits={}, query=Q("match_all") + ) + response = await s.execute() + + commit = response.hits[0] + assert isinstance(commit.meta.inner_hits.repo, response.__class__) + assert repr(commit.meta.inner_hits.repo[0]).startswith( + " None: + s = AsyncSearch(index="git")[0:1].query( + "has_parent", parent_type="repo", inner_hits={}, query=Q("match_all") + ) + response = await s.execute() + d = response.to_dict(recursive=True) + assert isinstance(d, dict) + assert isinstance(d["hits"]["hits"][0]["inner_hits"]["repo"], dict) + + # iterating over the results changes the format of the internal AttrDict + for hit in response: + pass + + d = response.to_dict(recursive=True) + assert isinstance(d, dict) + assert isinstance(d["hits"]["hits"][0]["inner_hits"]["repo"], dict) + + +@pytest.mark.asyncio +async def test_scan_respects_doc_types(async_data_client: AsyncElasticsearch) -> None: + repos = [repo async for repo in Repository.search().scan()] + + assert 1 == len(repos) + assert isinstance(repos[0], Repository) + assert repos[0].organization == "elasticsearch" + + +@pytest.mark.asyncio +async def test_scan_iterates_through_all_docs( + async_data_client: AsyncElasticsearch, +) -> None: + s = AsyncSearch(index="flat-git") + + commits = [commit async for commit in s.scan()] + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.asyncio +async def test_search_after(async_data_client: AsyncElasticsearch) -> None: + page_size = 7 + s = AsyncSearch(index="flat-git")[:page_size].sort("authored_date") + commits = [] + while True: + r = await s.execute() + commits += r.hits + if len(r.hits) < page_size: + break + s = s.search_after() + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.asyncio +async def test_search_after_no_search(async_data_client: AsyncElasticsearch) -> None: + s = AsyncSearch(index="flat-git") + with raises( + ValueError, match="A search must be executed before using search_after" + ): + s.search_after() + await s.count() + with raises( + ValueError, match="A search must be executed before using search_after" + ): + s.search_after() + + +@pytest.mark.asyncio +async def test_search_after_no_sort(async_data_client: AsyncElasticsearch) -> None: + s = AsyncSearch(index="flat-git") + r = await s.execute() + with raises( + ValueError, match="Cannot use search_after when results are not sorted" + ): + r.search_after() + + +@pytest.mark.asyncio +async def test_search_after_no_results(async_data_client: AsyncElasticsearch) -> None: + s = AsyncSearch(index="flat-git")[:100].sort("authored_date") + r = await s.execute() + assert 52 == len(r.hits) + s = s.search_after() + r = await s.execute() + assert 0 == len(r.hits) + with raises( + ValueError, match="Cannot use search_after when there are no search results" + ): + r.search_after() + + +@pytest.mark.asyncio +async def test_point_in_time(async_data_client: AsyncElasticsearch) -> None: + page_size = 7 + commits = [] + async with AsyncSearch(index="flat-git")[:page_size].point_in_time( + keep_alive="30s" + ) as s: + pit_id = s._extra["pit"]["id"] + while True: + r = await s.execute() + commits += r.hits + if len(r.hits) < page_size: + break + s = s.search_after() + assert pit_id == s._extra["pit"]["id"] + assert "30s" == s._extra["pit"]["keep_alive"] + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.asyncio +async def test_iterate(async_data_client: AsyncElasticsearch) -> None: + s = AsyncSearch(index="flat-git") + + commits = [commit async for commit in s.iterate()] + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.asyncio +async def test_response_is_cached(async_data_client: AsyncElasticsearch) -> None: + s = Repository.search() + repos = [repo async for repo in s] + + assert hasattr(s, "_response") + assert s._response.hits == repos + + +@pytest.mark.asyncio +async def test_multi_search(async_data_client: AsyncElasticsearch) -> None: + s1 = Repository.search() + s2 = AsyncSearch[Repository](index="flat-git") + + ms = AsyncMultiSearch[Repository]() + ms = ms.add(s1).add(s2) + + r1, r2 = await ms.execute() + + assert 1 == len(r1) + assert isinstance(r1[0], Repository) + assert r1._search is s1 + + assert 52 == r2.hits.total.value # type: ignore[attr-defined] + assert r2._search is s2 + + +@pytest.mark.asyncio +async def test_multi_missing(async_data_client: AsyncElasticsearch) -> None: + s1 = Repository.search() + s2 = AsyncSearch[Repository](index="flat-git") + s3 = AsyncSearch[Repository](index="does_not_exist") + + ms = AsyncMultiSearch[Repository]() + ms = ms.add(s1).add(s2).add(s3) + + with raises(ApiError): + await ms.execute() + + r1, r2, r3 = await ms.execute(raise_on_error=False) + + assert 1 == len(r1) + assert isinstance(r1[0], Repository) + assert r1._search is s1 + + assert 52 == r2.hits.total.value # type: ignore[attr-defined] + assert r2._search is s2 + + assert r3 is None + + +@pytest.mark.asyncio +async def test_raw_subfield_can_be_used_in_aggs( + async_data_client: AsyncElasticsearch, +) -> None: + s = AsyncSearch(index="git")[0:0] + s.aggs.bucket("authors", "terms", field="author.name.raw", size=1) + + r = await s.execute() + + authors = r.aggregations.authors + assert 1 == len(authors) + assert {"key": "Honza Král", "doc_count": 52} == authors[0] diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_update_by_query.py b/test_elasticsearch/test_dsl/test_integration/_async/test_update_by_query.py new file mode 100644 index 000000000..1fbf9d0e9 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_update_by_query.py @@ -0,0 +1,85 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from elasticsearch.dsl import AsyncUpdateByQuery +from elasticsearch.dsl.search import Q + + +@pytest.mark.asyncio +async def test_update_by_query_no_script( + async_write_client: AsyncElasticsearch, setup_ubq_tests: str +) -> None: + index = setup_ubq_tests + + ubq = ( + AsyncUpdateByQuery(using=async_write_client) + .index(index) + .filter(~Q("exists", field="is_public")) + ) + response = await ubq.execute() + + assert response.total == 52 + assert response["took"] > 0 + assert not response.timed_out + assert response.updated == 52 + assert response.deleted == 0 + assert response.took > 0 + assert response.success() + + +@pytest.mark.asyncio +async def test_update_by_query_with_script( + async_write_client: AsyncElasticsearch, setup_ubq_tests: str +) -> None: + index = setup_ubq_tests + + ubq = ( + AsyncUpdateByQuery(using=async_write_client) + .index(index) + .filter(~Q("exists", field="parent_shas")) + .script(source="ctx._source.is_public = false") + ) + ubq = ubq.params(conflicts="proceed") + + response = await ubq.execute() + assert response.total == 2 + assert response.updated == 2 + assert response.version_conflicts == 0 + + +@pytest.mark.asyncio +async def test_delete_by_query_with_script( + async_write_client: AsyncElasticsearch, setup_ubq_tests: str +) -> None: + index = setup_ubq_tests + + ubq = ( + AsyncUpdateByQuery(using=async_write_client) + .index(index) + .filter(Q("match", parent_shas="1dd19210b5be92b960f7db6f66ae526288edccc3")) + .script(source='ctx.op = "delete"') + ) + ubq = ubq.params(conflicts="proceed") + + response = await ubq.execute() + + assert response.total == 1 + assert response.deleted == 1 + assert response.success() diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/__init__.py b/test_elasticsearch/test_dsl/test_integration/_sync/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_analysis.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_analysis.py new file mode 100644 index 000000000..a12756c62 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_analysis.py @@ -0,0 +1,54 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from elasticsearch.dsl import analyzer, token_filter, tokenizer + + +@pytest.mark.sync +def test_simulate_with_just__builtin_tokenizer( + client: Elasticsearch, +) -> None: + a = analyzer("my-analyzer", tokenizer="keyword") + tokens = (a.simulate("Hello World!", using=client)).tokens + + assert len(tokens) == 1 + assert tokens[0].token == "Hello World!" + + +@pytest.mark.sync +def test_simulate_complex(client: Elasticsearch) -> None: + a = analyzer( + "my-analyzer", + tokenizer=tokenizer("split_words", "simple_pattern_split", pattern=":"), + filter=["lowercase", token_filter("no-ifs", "stop", stopwords=["if"])], + ) + + tokens = (a.simulate("if:this:works", using=client)).tokens + + assert len(tokens) == 2 + assert ["this", "works"] == [t.token for t in tokens] + + +@pytest.mark.sync +def test_simulate_builtin(client: Elasticsearch) -> None: + a = analyzer("my-analyzer", "english") + tokens = (a.simulate("fixes running")).tokens + + assert ["fix", "run"] == [t.token for t in tokens] diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py new file mode 100644 index 000000000..08f983b6e --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py @@ -0,0 +1,844 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this file creates several documents using bad or no types because +# these are still supported and should be kept functional in spite +# of not having appropriate type hints. For that reason the comment +# below disables many mypy checks that fails as a result of this. +# mypy: disable-error-code="assignment, index, arg-type, call-arg, operator, comparison-overlap, attr-defined" + +from datetime import datetime +from ipaddress import ip_address +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Tuple, Union + +import pytest +from elasticsearch import ConflictError, Elasticsearch, NotFoundError +from elasticsearch.helpers.errors import BulkIndexError +from pytest import raises +from pytz import timezone + +from elasticsearch.dsl import ( + Binary, + Boolean, + Date, + DenseVector, + Document, + Double, + InnerDoc, + Ip, + Keyword, + Long, + Mapping, + MetaField, + Nested, + Object, + Q, + RankFeatures, + Search, + Text, + analyzer, + mapped_field, +) +from elasticsearch.dsl.utils import AttrList + +snowball = analyzer("my_snow", tokenizer="standard", filter=["lowercase", "snowball"]) + + +class User(InnerDoc): + name = Text(fields={"raw": Keyword()}) + + +class Wiki(Document): + owner = Object(User) + views = Long() + ranked = RankFeatures() + + class Index: + name = "test-wiki" + + +class Repository(Document): + owner = Object(User) + created_at = Date() + description = Text(analyzer=snowball) + tags = Keyword() + + @classmethod + def search(cls) -> Search["Repository"]: # type: ignore[override] + return super().search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(Document): + committed_date = Date() + authored_date = Date() + description = Text(analyzer=snowball) + + class Index: + name = "flat-git" + + class Meta: + mapping = Mapping() + + +class History(InnerDoc): + timestamp = Date() + diff = Text() + + +class Comment(InnerDoc): + content = Text() + created_at = Date() + author = Object(User) + history = Nested(History) + + class Meta: + dynamic = MetaField(False) + + +class PullRequest(Document): + comments = Nested(Comment) + created_at = Date() + + class Index: + name = "test-prs" + + +class SerializationDoc(Document): + i = Long() + b = Boolean() + d = Double() + bin = Binary() + ip = Ip() + + class Index: + name = "test-serialization" + + +class Tags(Document): + tags = Keyword(multi=True) + + class Index: + name = "tags" + + +@pytest.mark.sync +def test_serialization(write_client: Elasticsearch) -> None: + SerializationDoc.init() + write_client.index( + index="test-serialization", + id=42, + body={ + "i": [1, 2, "3", None], + "b": [True, False, "true", "false", None], + "d": [0.1, "-0.1", None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "ip": ["::1", "127.0.0.1", None], + }, + ) + sd = SerializationDoc.get(id=42) + assert sd is not None + + assert sd.i == [1, 2, 3, None] + assert sd.b == [True, False, True, False, None] + assert sd.d == [0.1, -0.1, None] + assert sd.bin == [b"Hello World", None] + assert sd.ip == [ip_address("::1"), ip_address("127.0.0.1"), None] + + assert sd.to_dict() == { + "b": [True, False, True, False, None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "d": [0.1, -0.1, None], + "i": [1, 2, 3, None], + "ip": ["::1", "127.0.0.1", None], + } + + +@pytest.mark.sync +def test_nested_inner_hits_are_wrapped_properly(pull_request: Any) -> None: + history_query = Q( + "nested", + path="comments.history", + inner_hits={}, + query=Q("match", comments__history__diff="ahoj"), + ) + s = PullRequest.search().query( + "nested", inner_hits={}, path="comments", query=history_query + ) + + response = s.execute() + pr = response.hits[0] + assert isinstance(pr, PullRequest) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].history[0], History) + + comment = pr.meta.inner_hits.comments.hits[0] + assert isinstance(comment, Comment) + assert comment.author.name == "honzakral" + assert isinstance(comment.history[0], History) + + history = comment.meta.inner_hits["comments.history"].hits[0] + assert isinstance(history, History) + assert history.timestamp == datetime(2012, 1, 1) + assert "score" in history.meta + + +@pytest.mark.sync +def test_nested_inner_hits_are_deserialized_properly( + pull_request: Any, +) -> None: + s = PullRequest.search().query( + "nested", + inner_hits={}, + path="comments", + query=Q("match", comments__content="hello"), + ) + + response = s.execute() + pr = response.hits[0] + assert isinstance(pr.created_at, datetime) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].created_at, datetime) + + +@pytest.mark.sync +def test_nested_top_hits_are_wrapped_properly(pull_request: Any) -> None: + s = PullRequest.search() + s.aggs.bucket("comments", "nested", path="comments").metric( + "hits", "top_hits", size=1 + ) + + r = s.execute() + + print(r._d_) + assert isinstance(r.aggregations.comments.hits.hits[0], Comment) + + +@pytest.mark.sync +def test_update_object_field(write_client: Elasticsearch) -> None: + Wiki.init() + w = Wiki( + owner=User(name="Honza Kral"), + _id="elasticsearch-py", + ranked={"test1": 0.1, "topic2": 0.2}, + ) + w.save() + + assert "updated" == w.update(owner=[{"name": "Honza"}, User(name="Nick")]) + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + w = Wiki.get(id="elasticsearch-py") + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + assert w.ranked == {"test1": 0.1, "topic2": 0.2} + + +@pytest.mark.sync +def test_update_script(write_client: Elasticsearch) -> None: + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + w.save() + + w.update(script="ctx._source.views += params.inc", inc=5) + w = Wiki.get(id="elasticsearch-py") + assert w.views == 47 + + +@pytest.mark.sync +def test_update_script_with_dict(write_client: Elasticsearch) -> None: + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + w.save() + + w.update( + script={ + "source": "ctx._source.views += params.inc1 + params.inc2", + "params": {"inc1": 2}, + "lang": "painless", + }, + inc2=3, + ) + w = Wiki.get(id="elasticsearch-py") + assert w.views == 47 + + +@pytest.mark.sync +def test_update_retry_on_conflict(write_client: Elasticsearch) -> None: + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + w.save() + + w1 = Wiki.get(id="elasticsearch-py") + w2 = Wiki.get(id="elasticsearch-py") + assert w1 is not None + assert w2 is not None + + w1.update(script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1) + w2.update(script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1) + + w = Wiki.get(id="elasticsearch-py") + assert w.views == 52 + + +@pytest.mark.sync +@pytest.mark.parametrize("retry_on_conflict", [None, 0]) +def test_update_conflicting_version( + write_client: Elasticsearch, retry_on_conflict: bool +) -> None: + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + w.save() + + w1 = Wiki.get(id="elasticsearch-py") + w2 = Wiki.get(id="elasticsearch-py") + assert w1 is not None + assert w2 is not None + + w1.update(script="ctx._source.views += params.inc", inc=5) + + with raises(ConflictError): + w2.update( + script="ctx._source.views += params.inc", + inc=5, + retry_on_conflict=retry_on_conflict, + ) + + +@pytest.mark.sync +def test_save_and_update_return_doc_meta( + write_client: Elasticsearch, +) -> None: + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + resp = w.save(return_doc_meta=True) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "created" + assert set(resp.keys()) == { + "_id", + "_index", + "_primary_term", + "_seq_no", + "_shards", + "_version", + "result", + } + + resp = w.update( + script="ctx._source.views += params.inc", inc=5, return_doc_meta=True + ) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "updated" + assert set(resp.keys()) == { + "_id", + "_index", + "_primary_term", + "_seq_no", + "_shards", + "_version", + "result", + } + + +@pytest.mark.sync +def test_init(write_client: Elasticsearch) -> None: + Repository.init(index="test-git") + + assert write_client.indices.exists(index="test-git") + + +@pytest.mark.sync +def test_get_raises_404_on_index_missing( + data_client: Elasticsearch, +) -> None: + with raises(NotFoundError): + Repository.get("elasticsearch-dsl-php", index="not-there") + + +@pytest.mark.sync +def test_get_raises_404_on_non_existent_id( + data_client: Elasticsearch, +) -> None: + with raises(NotFoundError): + Repository.get("elasticsearch-dsl-php") + + +@pytest.mark.sync +def test_get_returns_none_if_404_ignored( + data_client: Elasticsearch, +) -> None: + assert None is Repository.get( + "elasticsearch-dsl-php", using=data_client.options(ignore_status=404) + ) + + +@pytest.mark.sync +def test_get_returns_none_if_404_ignored_and_index_doesnt_exist( + data_client: Elasticsearch, +) -> None: + assert None is Repository.get( + "42", index="not-there", using=data_client.options(ignore_status=404) + ) + + +@pytest.mark.sync +def test_get(data_client: Elasticsearch) -> None: + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + + assert isinstance(elasticsearch_repo, Repository) + assert elasticsearch_repo.owner.name == "elasticsearch" + assert datetime(2014, 3, 3) == elasticsearch_repo.created_at + + +@pytest.mark.sync +def test_exists_return_true(data_client: Elasticsearch) -> None: + assert Repository.exists("elasticsearch-dsl-py") + + +@pytest.mark.sync +def test_exists_false(data_client: Elasticsearch) -> None: + assert not Repository.exists("elasticsearch-dsl-php") + + +@pytest.mark.sync +def test_get_with_tz_date(data_client: Elasticsearch) -> None: + first_commit = Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert first_commit is not None + + tzinfo = timezone("Europe/Prague") + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123000)) + == first_commit.authored_date + ) + + +@pytest.mark.sync +def test_save_with_tz_date(data_client: Elasticsearch) -> None: + tzinfo = timezone("Europe/Prague") + first_commit = Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert first_commit is not None + + first_commit.committed_date = tzinfo.localize( + datetime(2014, 5, 2, 13, 47, 19, 123456) + ) + first_commit.save() + + first_commit = Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert first_commit is not None + + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123456)) + == first_commit.committed_date + ) + + +COMMIT_DOCS_WITH_MISSING = [ + {"_id": "0"}, # Missing + {"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"}, # Existing + {"_id": "f"}, # Missing + {"_id": "eb3e543323f189fd7b698e66295427204fff5755"}, # Existing +] + + +@pytest.mark.sync +def test_mget(data_client: Elasticsearch) -> None: + commits = Commit.mget(COMMIT_DOCS_WITH_MISSING) + assert commits[0] is None + assert commits[1] is not None + assert commits[1].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[2] is None + assert commits[3] is not None + assert commits[3].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +@pytest.mark.sync +def test_mget_raises_exception_when_missing_param_is_invalid( + data_client: Elasticsearch, +) -> None: + with raises(ValueError): + Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raj") + + +@pytest.mark.sync +def test_mget_raises_404_when_missing_param_is_raise( + data_client: Elasticsearch, +) -> None: + with raises(NotFoundError): + Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raise") + + +@pytest.mark.sync +def test_mget_ignores_missing_docs_when_missing_param_is_skip( + data_client: Elasticsearch, +) -> None: + commits = Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="skip") + assert commits[0] is not None + assert commits[0].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[1] is not None + assert commits[1].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +@pytest.mark.sync +def test_update_works_from_search_response( + data_client: Elasticsearch, +) -> None: + elasticsearch_repo = (Repository.search().execute())[0] + + elasticsearch_repo.update(owner={"other_name": "elastic"}) + assert "elastic" == elasticsearch_repo.owner.other_name + + new_version = Repository.get("elasticsearch-dsl-py") + assert new_version is not None + assert "elastic" == new_version.owner.other_name + assert "elasticsearch" == new_version.owner.name + + +@pytest.mark.sync +def test_update(data_client: Elasticsearch) -> None: + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + v = elasticsearch_repo.meta.version + + old_seq_no = elasticsearch_repo.meta.seq_no + elasticsearch_repo.update(owner={"new_name": "elastic"}, new_field="testing-update") + + assert "elastic" == elasticsearch_repo.owner.new_name + assert "testing-update" == elasticsearch_repo.new_field + + # assert version has been updated + assert elasticsearch_repo.meta.version == v + 1 + + new_version = Repository.get("elasticsearch-dsl-py") + assert new_version is not None + assert "testing-update" == new_version.new_field + assert "elastic" == new_version.owner.new_name + assert "elasticsearch" == new_version.owner.name + assert "seq_no" in new_version.meta + assert new_version.meta.seq_no != old_seq_no + assert "primary_term" in new_version.meta + + +@pytest.mark.sync +def test_save_updates_existing_doc(data_client: Elasticsearch) -> None: + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + + elasticsearch_repo.new_field = "testing-save" + old_seq_no = elasticsearch_repo.meta.seq_no + assert "updated" == elasticsearch_repo.save() + + new_repo = data_client.get(index="git", id="elasticsearch-dsl-py") + assert "testing-save" == new_repo["_source"]["new_field"] + assert new_repo["_seq_no"] != old_seq_no + assert new_repo["_seq_no"] == elasticsearch_repo.meta.seq_no + + +@pytest.mark.sync +def test_update_empty_field(client: Elasticsearch) -> None: + Tags._index.delete(ignore_unavailable=True) + Tags.init() + d = Tags(id="123", tags=["a", "b"]) + d.save(refresh=True) + d.update(tags=[], refresh=True) + assert d.tags == [] + + r = Tags.search().execute() + assert r.hits[0].tags == [] + + +@pytest.mark.sync +def test_save_automatically_uses_seq_no_and_primary_term( + data_client: Elasticsearch, +) -> None: + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + elasticsearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + elasticsearch_repo.save() + + +@pytest.mark.sync +def test_delete_automatically_uses_seq_no_and_primary_term( + data_client: Elasticsearch, +) -> None: + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + elasticsearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + elasticsearch_repo.delete() + + +def assert_doc_equals(expected: Any, actual: Any) -> None: + for f in expected: + assert f in actual + assert actual[f] == expected[f] + + +@pytest.mark.sync +def test_can_save_to_different_index( + write_client: Elasticsearch, +) -> None: + test_repo = Repository(description="testing", meta={"id": 42}) + assert test_repo.save(index="test-document") + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"description": "testing"}, + }, + write_client.get(index="test-document", id=42), + ) + + +@pytest.mark.sync +def test_save_without_skip_empty_will_include_empty_fields( + write_client: Elasticsearch, +) -> None: + test_repo = Repository(field_1=[], field_2=None, field_3={}, meta={"id": 42}) + assert test_repo.save(index="test-document", skip_empty=False) + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"field_1": [], "field_2": None, "field_3": {}}, + }, + write_client.get(index="test-document", id=42), + ) + + +@pytest.mark.sync +def test_delete(write_client: Elasticsearch) -> None: + write_client.create( + index="test-document", + id="elasticsearch-dsl-py", + body={ + "organization": "elasticsearch", + "created_at": "2014-03-03", + "owner": {"name": "elasticsearch"}, + }, + ) + + test_repo = Repository(meta={"id": "elasticsearch-dsl-py"}) + test_repo.meta.index = "test-document" + test_repo.delete() + + assert not write_client.exists( + index="test-document", + id="elasticsearch-dsl-py", + ) + + +@pytest.mark.sync +def test_search(data_client: Elasticsearch) -> None: + assert Repository.search().count() == 1 + + +@pytest.mark.sync +def test_search_returns_proper_doc_classes( + data_client: Elasticsearch, +) -> None: + result = Repository.search().execute() + + elasticsearch_repo = result.hits[0] + + assert isinstance(elasticsearch_repo, Repository) + assert elasticsearch_repo.owner.name == "elasticsearch" + + +@pytest.mark.sync +def test_refresh_mapping(data_client: Elasticsearch) -> None: + class Commit(Document): + class Index: + name = "git" + + Commit._index.load_mappings() + + assert "stats" in Commit._index._mapping + assert "committer" in Commit._index._mapping + assert "description" in Commit._index._mapping + assert "committed_date" in Commit._index._mapping + assert isinstance(Commit._index._mapping["committed_date"], Date) + + +@pytest.mark.sync +def test_highlight_in_meta(data_client: Elasticsearch) -> None: + commit = ( + Commit.search() + .query("match", description="inverting") + .highlight("description") + .execute() + )[0] + + assert isinstance(commit, Commit) + assert "description" in commit.meta.highlight + assert isinstance(commit.meta.highlight["description"], AttrList) + assert len(commit.meta.highlight["description"]) > 0 + + +@pytest.mark.sync +def test_bulk(data_client: Elasticsearch) -> None: + class Address(InnerDoc): + street: str + active: bool + + class Doc(Document): + if TYPE_CHECKING: + _id: int + name: str + age: int + languages: List[str] = mapped_field(Keyword()) + addresses: List[Address] + + class Index: + name = "bulk-index" + + Doc._index.delete(ignore_unavailable=True) + Doc.init() + + def gen1() -> Iterator[Union[Doc, Dict[str, Any]]]: + yield Doc( + name="Joe", + age=33, + languages=["en", "fr"], + addresses=[ + Address(street="123 Main St", active=True), + Address(street="321 Park Dr.", active=False), + ], + ) + yield Doc(name="Susan", age=20, languages=["en"]) + yield {"_op_type": "create", "_id": "45", "_source": Doc(name="Sarah", age=45)} + + Doc.bulk(gen1(), refresh=True) + docs = list(Doc.search().execute()) + assert len(docs) == 3 + assert docs[0].to_dict() == { + "name": "Joe", + "age": 33, + "languages": [ + "en", + "fr", + ], + "addresses": [ + { + "active": True, + "street": "123 Main St", + }, + { + "active": False, + "street": "321 Park Dr.", + }, + ], + } + assert docs[1].to_dict() == { + "name": "Susan", + "age": 20, + "languages": ["en"], + } + assert docs[2].to_dict() == { + "name": "Sarah", + "age": 45, + } + assert docs[2].meta.id == "45" + + def gen2() -> Iterator[Union[Doc, Dict[str, Any]]]: + yield {"_op_type": "create", "_id": "45", "_source": Doc(name="Sarah", age=45)} + + # a "create" action with an existing id should fail + with raises(BulkIndexError): + Doc.bulk(gen2(), refresh=True) + + def gen3() -> Iterator[Union[Doc, Dict[str, Any]]]: + yield Doc(_id="45", name="Sarah", age=45, languages=["es"]) + yield {"_op_type": "delete", "_id": docs[1].meta.id} + + Doc.bulk(gen3(), refresh=True) + with raises(NotFoundError): + Doc.get(docs[1].meta.id) + doc = Doc.get("45") + assert doc is not None + assert (doc).to_dict() == { + "name": "Sarah", + "age": 45, + "languages": ["es"], + } + + +@pytest.mark.sync +def test_legacy_dense_vector( + client: Elasticsearch, es_version: Tuple[int, ...] +) -> None: + if es_version >= (8, 16): + pytest.skip("this test is a legacy version for Elasticsearch 8.15 or older") + + class Doc(Document): + float_vector: List[float] = mapped_field(DenseVector(dims=3)) + + class Index: + name = "vectors" + + Doc._index.delete(ignore_unavailable=True) + Doc.init() + + doc = Doc(float_vector=[1.0, 1.2, 2.3]) + doc.save(refresh=True) + + docs = Doc.search().execute() + assert len(docs) == 1 + assert docs[0].float_vector == doc.float_vector + + +@pytest.mark.sync +def test_dense_vector(client: Elasticsearch, es_version: Tuple[int, ...]) -> None: + if es_version < (8, 16): + pytest.skip("this test requires Elasticsearch 8.16 or newer") + + class Doc(Document): + float_vector: List[float] = mapped_field(DenseVector()) + byte_vector: List[int] = mapped_field(DenseVector(element_type="byte")) + bit_vector: str = mapped_field(DenseVector(element_type="bit")) + + class Index: + name = "vectors" + + Doc._index.delete(ignore_unavailable=True) + Doc.init() + + doc = Doc( + float_vector=[1.0, 1.2, 2.3], byte_vector=[12, 23, 34, 45], bit_vector="12abf0" + ) + doc.save(refresh=True) + + docs = Doc.search().execute() + assert len(docs) == 1 + assert docs[0].float_vector == doc.float_vector + assert docs[0].byte_vector == doc.byte_vector + assert docs[0].bit_vector == doc.bit_vector diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py new file mode 100644 index 000000000..114800644 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py @@ -0,0 +1,305 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime +from typing import Tuple, Type + +import pytest +from elasticsearch import Elasticsearch + +from elasticsearch.dsl import A, Boolean, Date, Document, Keyword, Search +from elasticsearch.dsl.faceted_search import ( + DateHistogramFacet, + FacetedSearch, + NestedFacet, + RangeFacet, + TermsFacet, +) + +from .test_document import PullRequest + + +class Repos(Document): + is_public = Boolean() + created_at = Date() + + class Index: + name = "git" + + +class Commit(Document): + files = Keyword() + committed_date = Date() + + class Index: + name = "git" + + +class MetricSearch(FacetedSearch): + index = "git" + doc_types = [Commit] + + facets = { + "files": TermsFacet(field="files", metric=A("max", field="committed_date")), + } + + +@pytest.fixture(scope="session") +def commit_search_cls(es_version: Tuple[int, ...]) -> Type[FacetedSearch]: + if es_version >= (7, 2): + interval_kwargs = {"fixed_interval": "1d"} + else: + interval_kwargs = {"interval": "day"} + + class CommitSearch(FacetedSearch): + index = "flat-git" + fields = ( + "description", + "files", + ) + + facets = { + "files": TermsFacet(field="files"), + "frequency": DateHistogramFacet( + field="authored_date", min_doc_count=1, **interval_kwargs + ), + "deletions": RangeFacet( + field="stats.deletions", + ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))], + ), + } + + return CommitSearch + + +@pytest.fixture(scope="session") +def repo_search_cls(es_version: Tuple[int, ...]) -> Type[FacetedSearch]: + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class RepoSearch(FacetedSearch): + index = "git" + doc_types = [Repos] + facets = { + "public": TermsFacet(field="is_public"), + "created": DateHistogramFacet( + field="created_at", **{interval_type: "month"} + ), + } + + def search(self) -> Search: + s = super().search() + return s.filter("term", commit_repo="repo") + + return RepoSearch + + +@pytest.fixture(scope="session") +def pr_search_cls(es_version: Tuple[int, ...]) -> Type[FacetedSearch]: + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class PRSearch(FacetedSearch): + index = "test-prs" + doc_types = [PullRequest] + facets = { + "comments": NestedFacet( + "comments", + DateHistogramFacet( + field="comments.created_at", **{interval_type: "month"} + ), + ) + } + + return PRSearch + + +@pytest.mark.sync +def test_facet_with_custom_metric(data_client: Elasticsearch) -> None: + ms = MetricSearch() + r = ms.execute() + + dates = [f[1] for f in r.facets.files] + assert dates == list(sorted(dates, reverse=True)) + assert dates[0] == 1399038439000 + + +@pytest.mark.sync +def test_nested_facet( + pull_request: PullRequest, pr_search_cls: Type[FacetedSearch] +) -> None: + prs = pr_search_cls() + r = prs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments + + +@pytest.mark.sync +def test_nested_facet_with_filter( + pull_request: PullRequest, pr_search_cls: Type[FacetedSearch] +) -> None: + prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)}) + r = prs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments + + prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)}) + r = prs.execute() + assert not r.hits + + +@pytest.mark.sync +def test_datehistogram_facet( + data_client: Elasticsearch, repo_search_cls: Type[FacetedSearch] +) -> None: + rs = repo_search_cls() + r = rs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created + + +@pytest.mark.sync +def test_boolean_facet( + data_client: Elasticsearch, repo_search_cls: Type[FacetedSearch] +) -> None: + rs = repo_search_cls() + r = rs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(True, 1, False)] == r.facets.public + value, count, selected = r.facets.public[0] + assert value is True + + +@pytest.mark.sync +def test_empty_search_finds_everything( + data_client: Elasticsearch, + es_version: Tuple[int, ...], + commit_search_cls: Type[FacetedSearch], +) -> None: + cs = commit_search_cls() + r = cs.execute() + + assert r.hits.total.value == 52 # type: ignore[attr-defined] + assert [ + ("elasticsearch_dsl", 40, False), + ("test_elasticsearch_dsl", 35, False), + ("elasticsearch_dsl/query.py", 19, False), + ("test_elasticsearch_dsl/test_search.py", 15, False), + ("elasticsearch_dsl/utils.py", 14, False), + ("test_elasticsearch_dsl/test_query.py", 13, False), + ("elasticsearch_dsl/search.py", 12, False), + ("elasticsearch_dsl/aggs.py", 11, False), + ("test_elasticsearch_dsl/test_result.py", 5, False), + ("elasticsearch_dsl/result.py", 3, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 2, False), + (datetime(2014, 3, 4, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 3, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 9, False), + (datetime(2014, 3, 10, 0, 0), 2, False), + (datetime(2014, 3, 15, 0, 0), 4, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 2, False), + (datetime(2014, 3, 24, 0, 0), 10, False), + (datetime(2014, 4, 20, 0, 0), 2, False), + (datetime(2014, 4, 22, 0, 0), 2, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 2, False), + (datetime(2014, 5, 1, 0, 0), 2, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 19, False), + ("good", 14, False), + ("better", 19, False), + ] == r.facets.deletions + + +@pytest.mark.sync +def test_term_filters_are_shown_as_selected_and_data_is_filtered( + data_client: Elasticsearch, commit_search_cls: Type[FacetedSearch] +) -> None: + cs = commit_search_cls(filters={"files": "test_elasticsearch_dsl"}) + + r = cs.execute() + + assert 35 == r.hits.total.value # type: ignore[attr-defined] + assert [ + ("elasticsearch_dsl", 40, False), + ("test_elasticsearch_dsl", 35, True), # selected + ("elasticsearch_dsl/query.py", 19, False), + ("test_elasticsearch_dsl/test_search.py", 15, False), + ("elasticsearch_dsl/utils.py", 14, False), + ("test_elasticsearch_dsl/test_query.py", 13, False), + ("elasticsearch_dsl/search.py", 12, False), + ("elasticsearch_dsl/aggs.py", 11, False), + ("test_elasticsearch_dsl/test_result.py", 5, False), + ("elasticsearch_dsl/result.py", 3, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 2, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 6, False), + (datetime(2014, 3, 10, 0, 0), 1, False), + (datetime(2014, 3, 15, 0, 0), 3, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 1, False), + (datetime(2014, 3, 24, 0, 0), 7, False), + (datetime(2014, 4, 20, 0, 0), 1, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 1, False), + (datetime(2014, 5, 1, 0, 0), 1, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 12, False), + ("good", 10, False), + ("better", 13, False), + ] == r.facets.deletions + + +@pytest.mark.sync +def test_range_filters_are_shown_as_selected_and_data_is_filtered( + data_client: Elasticsearch, commit_search_cls: Type[FacetedSearch] +) -> None: + cs = commit_search_cls(filters={"deletions": "better"}) + + r = cs.execute() + + assert 19 == r.hits.total.value # type: ignore[attr-defined] + + +@pytest.mark.sync +def test_pagination( + data_client: Elasticsearch, commit_search_cls: Type[FacetedSearch] +) -> None: + cs = commit_search_cls() + cs = cs[0:20] + + assert 52 == cs.count() + assert 20 == len(cs.execute()) diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_index.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_index.py new file mode 100644 index 000000000..7509f0b0f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_index.py @@ -0,0 +1,160 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from elasticsearch.dsl import ( + ComposableIndexTemplate, + Date, + Document, + Index, + IndexTemplate, + Text, + analysis, +) + + +class Post(Document): + title = Text(analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword")) + published_from = Date() + + +@pytest.mark.sync +def test_index_template_works(write_client: Elasticsearch) -> None: + it = IndexTemplate("test-template", "test-legacy-*") + it.document(Post) + it.settings(number_of_replicas=0, number_of_shards=1) + it.save() + + i = Index("test-legacy-blog") + i.create() + + assert { + "test-legacy-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == write_client.indices.get_mapping(index="test-legacy-blog") + + +@pytest.mark.sync +def test_composable_index_template_works( + write_client: Elasticsearch, +) -> None: + it = ComposableIndexTemplate("test-template", "test-*") + it.document(Post) + it.settings(number_of_replicas=0, number_of_shards=1) + it.save() + + i = Index("test-blog") + i.create() + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == write_client.indices.get_mapping(index="test-blog") + + +@pytest.mark.sync +def test_index_can_be_saved_even_with_settings( + write_client: Elasticsearch, +) -> None: + i = Index("test-blog", using=write_client) + i.settings(number_of_shards=3, number_of_replicas=0) + i.save() + i.settings(number_of_replicas=1) + i.save() + + assert ( + "1" + == (i.get_settings())["test-blog"]["settings"]["index"]["number_of_replicas"] + ) + + +@pytest.mark.sync +def test_index_exists(data_client: Elasticsearch) -> None: + assert Index("git").exists() + assert not Index("not-there").exists() + + +@pytest.mark.sync +def test_index_can_be_created_with_settings_and_mappings( + write_client: Elasticsearch, +) -> None: + i = Index("test-blog", using=write_client) + i.document(Post) + i.settings(number_of_replicas=0, number_of_shards=1) + i.create() + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == write_client.indices.get_mapping(index="test-blog") + + settings = write_client.indices.get_settings(index="test-blog") + assert settings["test-blog"]["settings"]["index"]["number_of_replicas"] == "0" + assert settings["test-blog"]["settings"]["index"]["number_of_shards"] == "1" + assert settings["test-blog"]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } + + +@pytest.mark.sync +def test_delete(write_client: Elasticsearch) -> None: + write_client.indices.create( + index="test-index", + body={"settings": {"number_of_replicas": 0, "number_of_shards": 1}}, + ) + + i = Index("test-index", using=write_client) + i.delete() + assert not write_client.indices.exists(index="test-index") + + +@pytest.mark.sync +def test_multiple_indices_with_same_doc_type_work( + write_client: Elasticsearch, +) -> None: + i1 = Index("test-index-1", using=write_client) + i2 = Index("test-index-2", using=write_client) + + for i in (i1, i2): + i.document(Post) + i.create() + + for j in ("test-index-1", "test-index-2"): + settings = write_client.indices.get_settings(index=j) + assert settings[j]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_mapping.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_mapping.py new file mode 100644 index 000000000..270e79a5e --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_mapping.py @@ -0,0 +1,169 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch +from pytest import raises + +from elasticsearch.dsl import Mapping, analysis, exceptions + + +@pytest.mark.sync +def test_mapping_saved_into_es(write_client: Elasticsearch) -> None: + m = Mapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + m.field("tags", "keyword") + m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "tags": {"type": "keyword"}, + } + } + } + } == write_client.indices.get_mapping(index="test-mapping") + + +@pytest.mark.sync +def test_mapping_saved_into_es_when_index_already_exists_closed( + write_client: Elasticsearch, +) -> None: + m = Mapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + write_client.indices.create(index="test-mapping") + + with raises(exceptions.IllegalOperation): + m.save("test-mapping", using=write_client) + + write_client.cluster.health(index="test-mapping", wait_for_status="yellow") + write_client.indices.close(index="test-mapping") + m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": {"name": {"type": "text", "analyzer": "my_analyzer"}} + } + } + } == write_client.indices.get_mapping(index="test-mapping") + + +@pytest.mark.sync +def test_mapping_saved_into_es_when_index_already_exists_with_analysis( + write_client: Elasticsearch, +) -> None: + m = Mapping() + analyzer = analysis.analyzer("my_analyzer", tokenizer="keyword") + m.field("name", "text", analyzer=analyzer) + + new_analysis = analyzer.get_analysis_definition() + new_analysis["analyzer"]["other_analyzer"] = { + "type": "custom", + "tokenizer": "whitespace", + } + write_client.indices.create( + index="test-mapping", body={"settings": {"analysis": new_analysis}} + ) + + m.field("title", "text", analyzer=analyzer) + m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "title": {"type": "text", "analyzer": "my_analyzer"}, + } + } + } + } == write_client.indices.get_mapping(index="test-mapping") + + +@pytest.mark.sync +def test_mapping_gets_updated_from_es( + write_client: Elasticsearch, +) -> None: + write_client.indices.create( + index="test-mapping", + body={ + "settings": {"number_of_shards": 1, "number_of_replicas": 0}, + "mappings": { + "date_detection": False, + "properties": { + "title": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + "created_at": {"type": "date"}, + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + }, + }, + }, + }, + }, + ) + + m = Mapping.from_es("test-mapping", using=write_client) + + assert ["comments", "created_at", "title"] == list( + sorted(m.properties.properties._d_.keys()) # type: ignore[attr-defined] + ) + assert { + "date_detection": False, + "properties": { + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + }, + "created_at": {"type": "date"}, + "title": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + } == m.to_dict() + + # test same with alias + write_client.indices.put_alias(index="test-mapping", name="test-alias") + + m2 = Mapping.from_es("test-alias", using=write_client) + assert m2.to_dict() == m.to_dict() diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_search.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_search.py new file mode 100644 index 000000000..1ce578fa5 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_search.py @@ -0,0 +1,294 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import pytest +from elasticsearch import ApiError, Elasticsearch +from pytest import raises + +from elasticsearch.dsl import Date, Document, Keyword, MultiSearch, Q, Search, Text +from elasticsearch.dsl.response import aggs + +from ..test_data import FLAT_DATA + + +class Repository(Document): + created_at = Date() + description = Text(analyzer="snowball") + tags = Keyword() + + @classmethod + def search(cls) -> Search["Repository"]: # type: ignore[override] + return super().search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(Document): + class Index: + name = "flat-git" + + +@pytest.mark.sync +def test_filters_aggregation_buckets_are_accessible( + data_client: Elasticsearch, +) -> None: + has_tests_query = Q("term", files="test_elasticsearch_dsl") + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").bucket( + "has_tests", "filters", filters={"yes": has_tests_query, "no": ~has_tests_query} + ).metric("lines", "stats", field="stats.lines") + + response = s.execute() + + assert isinstance( + response.aggregations.top_authors.buckets[0].has_tests.buckets.yes, aggs.Bucket + ) + assert ( + 35 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.doc_count + ) + assert ( + 228 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.lines.max + ) + + +@pytest.mark.sync +def test_top_hits_are_wrapped_in_response( + data_client: Elasticsearch, +) -> None: + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").metric( + "top_commits", "top_hits", size=5 + ) + response = s.execute() + + top_commits = response.aggregations.top_authors.buckets[0].top_commits + assert isinstance(top_commits, aggs.TopHitsData) + assert 5 == len(top_commits) + + hits = [h for h in top_commits] + assert 5 == len(hits) + assert isinstance(hits[0], Commit) + + +@pytest.mark.sync +def test_inner_hits_are_wrapped_in_response( + data_client: Elasticsearch, +) -> None: + s = Search(index="git")[0:1].query( + "has_parent", parent_type="repo", inner_hits={}, query=Q("match_all") + ) + response = s.execute() + + commit = response.hits[0] + assert isinstance(commit.meta.inner_hits.repo, response.__class__) + assert repr(commit.meta.inner_hits.repo[0]).startswith( + " None: + s = Search(index="git")[0:1].query( + "has_parent", parent_type="repo", inner_hits={}, query=Q("match_all") + ) + response = s.execute() + d = response.to_dict(recursive=True) + assert isinstance(d, dict) + assert isinstance(d["hits"]["hits"][0]["inner_hits"]["repo"], dict) + + # iterating over the results changes the format of the internal AttrDict + for hit in response: + pass + + d = response.to_dict(recursive=True) + assert isinstance(d, dict) + assert isinstance(d["hits"]["hits"][0]["inner_hits"]["repo"], dict) + + +@pytest.mark.sync +def test_scan_respects_doc_types(data_client: Elasticsearch) -> None: + repos = [repo for repo in Repository.search().scan()] + + assert 1 == len(repos) + assert isinstance(repos[0], Repository) + assert repos[0].organization == "elasticsearch" + + +@pytest.mark.sync +def test_scan_iterates_through_all_docs( + data_client: Elasticsearch, +) -> None: + s = Search(index="flat-git") + + commits = [commit for commit in s.scan()] + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.sync +def test_search_after(data_client: Elasticsearch) -> None: + page_size = 7 + s = Search(index="flat-git")[:page_size].sort("authored_date") + commits = [] + while True: + r = s.execute() + commits += r.hits + if len(r.hits) < page_size: + break + s = s.search_after() + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.sync +def test_search_after_no_search(data_client: Elasticsearch) -> None: + s = Search(index="flat-git") + with raises( + ValueError, match="A search must be executed before using search_after" + ): + s.search_after() + s.count() + with raises( + ValueError, match="A search must be executed before using search_after" + ): + s.search_after() + + +@pytest.mark.sync +def test_search_after_no_sort(data_client: Elasticsearch) -> None: + s = Search(index="flat-git") + r = s.execute() + with raises( + ValueError, match="Cannot use search_after when results are not sorted" + ): + r.search_after() + + +@pytest.mark.sync +def test_search_after_no_results(data_client: Elasticsearch) -> None: + s = Search(index="flat-git")[:100].sort("authored_date") + r = s.execute() + assert 52 == len(r.hits) + s = s.search_after() + r = s.execute() + assert 0 == len(r.hits) + with raises( + ValueError, match="Cannot use search_after when there are no search results" + ): + r.search_after() + + +@pytest.mark.sync +def test_point_in_time(data_client: Elasticsearch) -> None: + page_size = 7 + commits = [] + with Search(index="flat-git")[:page_size].point_in_time(keep_alive="30s") as s: + pit_id = s._extra["pit"]["id"] + while True: + r = s.execute() + commits += r.hits + if len(r.hits) < page_size: + break + s = s.search_after() + assert pit_id == s._extra["pit"]["id"] + assert "30s" == s._extra["pit"]["keep_alive"] + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.sync +def test_iterate(data_client: Elasticsearch) -> None: + s = Search(index="flat-git") + + commits = [commit for commit in s.iterate()] + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.sync +def test_response_is_cached(data_client: Elasticsearch) -> None: + s = Repository.search() + repos = [repo for repo in s] + + assert hasattr(s, "_response") + assert s._response.hits == repos + + +@pytest.mark.sync +def test_multi_search(data_client: Elasticsearch) -> None: + s1 = Repository.search() + s2 = Search[Repository](index="flat-git") + + ms = MultiSearch[Repository]() + ms = ms.add(s1).add(s2) + + r1, r2 = ms.execute() + + assert 1 == len(r1) + assert isinstance(r1[0], Repository) + assert r1._search is s1 + + assert 52 == r2.hits.total.value # type: ignore[attr-defined] + assert r2._search is s2 + + +@pytest.mark.sync +def test_multi_missing(data_client: Elasticsearch) -> None: + s1 = Repository.search() + s2 = Search[Repository](index="flat-git") + s3 = Search[Repository](index="does_not_exist") + + ms = MultiSearch[Repository]() + ms = ms.add(s1).add(s2).add(s3) + + with raises(ApiError): + ms.execute() + + r1, r2, r3 = ms.execute(raise_on_error=False) + + assert 1 == len(r1) + assert isinstance(r1[0], Repository) + assert r1._search is s1 + + assert 52 == r2.hits.total.value # type: ignore[attr-defined] + assert r2._search is s2 + + assert r3 is None + + +@pytest.mark.sync +def test_raw_subfield_can_be_used_in_aggs( + data_client: Elasticsearch, +) -> None: + s = Search(index="git")[0:0] + s.aggs.bucket("authors", "terms", field="author.name.raw", size=1) + + r = s.execute() + + authors = r.aggregations.authors + assert 1 == len(authors) + assert {"key": "Honza Král", "doc_count": 52} == authors[0] diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_update_by_query.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_update_by_query.py new file mode 100644 index 000000000..f16505d49 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_update_by_query.py @@ -0,0 +1,85 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from elasticsearch.dsl import UpdateByQuery +from elasticsearch.dsl.search import Q + + +@pytest.mark.sync +def test_update_by_query_no_script( + write_client: Elasticsearch, setup_ubq_tests: str +) -> None: + index = setup_ubq_tests + + ubq = ( + UpdateByQuery(using=write_client) + .index(index) + .filter(~Q("exists", field="is_public")) + ) + response = ubq.execute() + + assert response.total == 52 + assert response["took"] > 0 + assert not response.timed_out + assert response.updated == 52 + assert response.deleted == 0 + assert response.took > 0 + assert response.success() + + +@pytest.mark.sync +def test_update_by_query_with_script( + write_client: Elasticsearch, setup_ubq_tests: str +) -> None: + index = setup_ubq_tests + + ubq = ( + UpdateByQuery(using=write_client) + .index(index) + .filter(~Q("exists", field="parent_shas")) + .script(source="ctx._source.is_public = false") + ) + ubq = ubq.params(conflicts="proceed") + + response = ubq.execute() + assert response.total == 2 + assert response.updated == 2 + assert response.version_conflicts == 0 + + +@pytest.mark.sync +def test_delete_by_query_with_script( + write_client: Elasticsearch, setup_ubq_tests: str +) -> None: + index = setup_ubq_tests + + ubq = ( + UpdateByQuery(using=write_client) + .index(index) + .filter(Q("match", parent_shas="1dd19210b5be92b960f7db6f66ae526288edccc3")) + .script(source='ctx.op = "delete"') + ) + ubq = ubq.params(conflicts="proceed") + + response = ubq.execute() + + assert response.total == 1 + assert response.deleted == 1 + assert response.success() diff --git a/test_elasticsearch/test_dsl/test_integration/test_count.py b/test_elasticsearch/test_dsl/test_integration/test_count.py new file mode 100644 index 000000000..5d52607bc --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_count.py @@ -0,0 +1,46 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any + +from elasticsearch import Elasticsearch + +from elasticsearch.dsl.search import Q, Search + + +def test_count_all(data_client: Elasticsearch) -> None: + s = Search(using=data_client).index("git") + assert 53 == s.count() + + +def test_count_prefetch(data_client: Elasticsearch, mocker: Any) -> None: + mocker.spy(data_client, "count") + + search = Search(using=data_client).index("git") + search.execute() + assert search.count() == 53 + assert data_client.count.call_count == 0 # type: ignore[attr-defined] + + search._response.hits.total.relation = "gte" # type: ignore[attr-defined] + assert search.count() == 53 + assert data_client.count.call_count == 1 # type: ignore[attr-defined] + + +def test_count_filter(data_client: Elasticsearch) -> None: + s = Search(using=data_client).index("git").filter(~Q("exists", field="parent_shas")) + # initial commit + repo document + assert 2 == s.count() diff --git a/test_elasticsearch/test_dsl/test_integration/test_data.py b/test_elasticsearch/test_dsl/test_integration/test_data.py new file mode 100644 index 000000000..1e80896ab --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_data.py @@ -0,0 +1,1093 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict + +from elasticsearch import Elasticsearch + +user_mapping = { + "properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}} +} + +FLAT_GIT_INDEX: Dict[str, Any] = { + "settings": { + # custom analyzer for analyzing file paths + "analysis": { + "analyzer": { + "file_path": { + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": ["lowercase"], + } + } + }, + }, + "mappings": { + "properties": { + "description": {"type": "text", "analyzer": "snowball"}, + "author": user_mapping, + "authored_date": {"type": "date"}, + "committer": user_mapping, + "committed_date": {"type": "date"}, + "parent_shas": {"type": "keyword"}, + "files": { + "type": "text", + "analyzer": "file_path", + "fielddata": True, + }, + } + }, +} + +GIT_INDEX: Dict[str, Any] = { + "settings": { + # custom analyzer for analyzing file paths + "analysis": { + "analyzer": { + "file_path": { + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": ["lowercase"], + } + } + }, + }, + "mappings": { + "properties": { + # common fields + "description": {"type": "text", "analyzer": "snowball"}, + "commit_repo": {"type": "join", "relations": {"repo": "commit"}}, + # COMMIT mappings + "author": user_mapping, + "authored_date": {"type": "date"}, + "committer": user_mapping, + "committed_date": {"type": "date"}, + "parent_shas": {"type": "keyword"}, + "files": { + "type": "text", + "analyzer": "file_path", + "fielddata": True, + }, + # REPO mappings + "is_public": {"type": "boolean"}, + "owner": user_mapping, + "created_at": {"type": "date"}, + "tags": {"type": "keyword"}, + } + }, +} + + +def create_flat_git_index(client: Elasticsearch, index: str) -> None: + client.indices.create(index=index, body=FLAT_GIT_INDEX) + + +def create_git_index(client: Elasticsearch, index: str) -> None: + client.indices.create(index=index, body=GIT_INDEX) + + +DATA = [ + # repository + { + "_id": "elasticsearch-dsl-py", + "_source": { + "commit_repo": "repo", + "organization": "elasticsearch", + "created_at": "2014-03-03", + "owner": {"name": "elasticsearch"}, + "is_public": True, + }, + "_index": "git", + }, + # documents + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_aggs.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 7, "insertions": 23, "lines": 30, "files": 4}, + "description": "Make sure buckets aren't modified in-place", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["eb3e543323f189fd7b698e66295427204fff5755"], + "committed_date": "2014-05-02T13:47:19", + "authored_date": "2014-05-02T13:47:19.123+02:00", + }, + "_index": "git", + }, + { + "_id": "eb3e543323f189fd7b698e66295427204fff5755", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 18, "lines": 18, "files": 1}, + "description": "Add communication with ES server", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["dd15b6ba17dd9ba16363a51f85b31f66f1fb1157"], + "committed_date": "2014-05-01T13:32:14", + "authored_date": "2014-05-01T13:32:14", + }, + "_index": "git", + }, + { + "_id": "dd15b6ba17dd9ba16363a51f85b31f66f1fb1157", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "test_elasticsearch_dsl/test_result.py", + "elasticsearch_dsl/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 18, "insertions": 44, "lines": 62, "files": 3}, + "description": "Minor cleanup and adding helpers for interactive python", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["ed19caf25abd25300e707fadf3f81b05c5673446"], + "committed_date": "2014-05-01T13:30:44", + "authored_date": "2014-05-01T13:30:44", + }, + "_index": "git", + }, + { + "_id": "ed19caf25abd25300e707fadf3f81b05c5673446", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 28, "lines": 28, "files": 3}, + "description": "Make sure aggs do copy-on-write", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["583e52c71e9a72c1b291ec5843683d8fa8f1ce2d"], + "committed_date": "2014-04-27T16:28:09", + "authored_date": "2014-04-27T16:28:09", + }, + "_index": "git", + }, + { + "_id": "583e52c71e9a72c1b291ec5843683d8fa8f1ce2d", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/aggs.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 1, "lines": 2, "files": 1}, + "description": "Use __setitem__ from DslBase in AggsBase", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1dd19210b5be92b960f7db6f66ae526288edccc3"], + "committed_date": "2014-04-27T15:51:53", + "authored_date": "2014-04-27T15:51:53", + }, + "_index": "git", + }, + { + "_id": "1dd19210b5be92b960f7db6f66ae526288edccc3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_search.py", + "elasticsearch_dsl/search.py", + "elasticsearch_dsl/filter.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 21, "insertions": 98, "lines": 119, "files": 5}, + "description": "Have Search clone itself on any change besides aggs", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b4c9e29376af2e42a4e6dc153f0f293b1a18bac3"], + "committed_date": "2014-04-26T14:49:43", + "authored_date": "2014-04-26T14:49:43", + }, + "_index": "git", + }, + { + "_id": "b4c9e29376af2e42a4e6dc153f0f293b1a18bac3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_result.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 5, "lines": 5, "files": 1}, + "description": "Add tests for [] on response", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a64a54181b232bb5943bd16960be9416e402f5f5"], + "committed_date": "2014-04-26T13:56:52", + "authored_date": "2014-04-26T13:56:52", + }, + "_index": "git", + }, + { + "_id": "a64a54181b232bb5943bd16960be9416e402f5f5", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_result.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 7, "lines": 8, "files": 1}, + "description": "Test access to missing fields raises appropriate exceptions", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["df3f778a3d37b170bde6979a4ef2d9e3e6400778"], + "committed_date": "2014-04-25T16:01:07", + "authored_date": "2014-04-25T16:01:07", + }, + "_index": "git", + }, + { + "_id": "df3f778a3d37b170bde6979a4ef2d9e3e6400778", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "test_elasticsearch_dsl/test_result.py", + "elasticsearch_dsl/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 8, "insertions": 31, "lines": 39, "files": 3}, + "description": "Support attribute access even for inner/nested objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["7e599e116b5ff5d271ce3fe1ebc80e82ab3d5925"], + "committed_date": "2014-04-25T15:59:02", + "authored_date": "2014-04-25T15:59:02", + }, + "_index": "git", + }, + { + "_id": "7e599e116b5ff5d271ce3fe1ebc80e82ab3d5925", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "test_elasticsearch_dsl/test_result.py", + "elasticsearch_dsl/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 149, "lines": 149, "files": 2}, + "description": "Added a prototype of a Respose and Result classes", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e2882d28cb8077eaa3e5d8ae76543482d4d90f7e"], + "committed_date": "2014-04-25T15:12:15", + "authored_date": "2014-04-25T15:12:15", + }, + "_index": "git", + }, + { + "_id": "e2882d28cb8077eaa3e5d8ae76543482d4d90f7e", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["docs/index.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 6, "lines": 6, "files": 1}, + "description": "add warning to the docs", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["51f94d83d1c47d3b81207736ca97a1ec6302678f"], + "committed_date": "2014-04-22T19:16:21", + "authored_date": "2014-04-22T19:16:21", + }, + "_index": "git", + }, + { + "_id": "51f94d83d1c47d3b81207736ca97a1ec6302678f", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 3, "insertions": 29, "lines": 32, "files": 1}, + "description": "Add some comments to the code", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["0950f6c600b49e2bf012d03b02250fb71c848555"], + "committed_date": "2014-04-22T19:12:06", + "authored_date": "2014-04-22T19:12:06", + }, + "_index": "git", + }, + { + "_id": "0950f6c600b49e2bf012d03b02250fb71c848555", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["README.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 6, "lines": 6, "files": 1}, + "description": "Added a WIP warning", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["54d058f5ac6be8225ef61d5529772aada42ec6c8"], + "committed_date": "2014-04-20T00:19:25", + "authored_date": "2014-04-20T00:19:25", + }, + "_index": "git", + }, + { + "_id": "54d058f5ac6be8225ef61d5529772aada42ec6c8", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/__init__.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 36, "insertions": 7, "lines": 43, "files": 3}, + "description": "Remove the operator kwarg from .query", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["4cb07845e45787abc1f850c0b561e487e0034424"], + "committed_date": "2014-04-20T00:17:25", + "authored_date": "2014-04-20T00:17:25", + }, + "_index": "git", + }, + { + "_id": "4cb07845e45787abc1f850c0b561e487e0034424", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 35, "insertions": 49, "lines": 84, "files": 2}, + "description": "Complex example", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["578abe80f76aafd7e81fe46a44403e601733a938"], + "committed_date": "2014-03-24T20:48:45", + "authored_date": "2014-03-24T20:48:45", + }, + "_index": "git", + }, + { + "_id": "578abe80f76aafd7e81fe46a44403e601733a938", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 0, "lines": 2, "files": 1}, + "description": "removing extra whitespace", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["ecb84f03565940c7d294dbc80723420dcfbab340"], + "committed_date": "2014-03-24T20:42:23", + "authored_date": "2014-03-24T20:42:23", + }, + "_index": "git", + }, + { + "_id": "ecb84f03565940c7d294dbc80723420dcfbab340", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 3, "lines": 4, "files": 1}, + "description": "Make sure attribute access works for .query on Search", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["9a247c876ab66e2bca56b25f392d054e613b1b2a"], + "committed_date": "2014-03-24T20:35:02", + "authored_date": "2014-03-24T20:34:46", + }, + "_index": "git", + }, + { + "_id": "9a247c876ab66e2bca56b25f392d054e613b1b2a", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 2, "lines": 2, "files": 1}, + "description": "Make sure .index and .doc_type methods are chainable", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["cee5e46947d510a49edd3609ff91aab7b1f3ac89"], + "committed_date": "2014-03-24T20:27:46", + "authored_date": "2014-03-24T20:27:46", + }, + "_index": "git", + }, + { + "_id": "cee5e46947d510a49edd3609ff91aab7b1f3ac89", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + "elasticsearch_dsl/filter.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 13, "insertions": 128, "lines": 141, "files": 3}, + "description": "Added .filter and .post_filter to Search", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1d6857182b09a556d58c6bc5bdcb243092812ba3"], + "committed_date": "2014-03-24T20:26:57", + "authored_date": "2014-03-24T20:26:57", + }, + "_index": "git", + }, + { + "_id": "1d6857182b09a556d58c6bc5bdcb243092812ba3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py", "elasticsearch_dsl/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 24, "insertions": 29, "lines": 53, "files": 2}, + "description": "Extracted combination logic into DslBase", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["4ad92f15a1955846c01642318303a821e8435b75"], + "committed_date": "2014-03-24T20:03:51", + "authored_date": "2014-03-24T20:03:51", + }, + "_index": "git", + }, + { + "_id": "4ad92f15a1955846c01642318303a821e8435b75", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py", "elasticsearch_dsl/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 43, "insertions": 45, "lines": 88, "files": 2}, + "description": "Extracted bool-related logic to a mixin to be reused by filters", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["6eb39dc2825605543ac1ed0b45b9b6baeecc44c2"], + "committed_date": "2014-03-24T19:16:16", + "authored_date": "2014-03-24T19:16:16", + }, + "_index": "git", + }, + { + "_id": "6eb39dc2825605543ac1ed0b45b9b6baeecc44c2", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 32, "lines": 33, "files": 2}, + "description": "Enable otheroperators when querying on Search object", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["be094c7b307332cb6039bf9a7c984d2c7593ddff"], + "committed_date": "2014-03-24T18:25:10", + "authored_date": "2014-03-24T18:25:10", + }, + "_index": "git", + }, + { + "_id": "be094c7b307332cb6039bf9a7c984d2c7593ddff", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 23, "insertions": 35, "lines": 58, "files": 3}, + "description": "make sure query operations always return copies", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b2576e3b6437e2cb9d8971fee4ead60df91fd75b"], + "committed_date": "2014-03-24T18:10:37", + "authored_date": "2014-03-24T18:03:13", + }, + "_index": "git", + }, + { + "_id": "b2576e3b6437e2cb9d8971fee4ead60df91fd75b", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 53, "lines": 54, "files": 2}, + "description": "Adding or operator for queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1be002170ac3cd59d2e97824b83b88bb3c9c60ed"], + "committed_date": "2014-03-24T17:53:38", + "authored_date": "2014-03-24T17:53:38", + }, + "_index": "git", + }, + { + "_id": "1be002170ac3cd59d2e97824b83b88bb3c9c60ed", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 35, "lines": 35, "files": 2}, + "description": "Added inverting of queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["24e1e38b2f704f65440d96c290b7c6cd54c2e00e"], + "committed_date": "2014-03-23T17:44:36", + "authored_date": "2014-03-23T17:44:36", + }, + "_index": "git", + }, + { + "_id": "24e1e38b2f704f65440d96c290b7c6cd54c2e00e", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/aggs.py", "elasticsearch_dsl/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 5, "insertions": 1, "lines": 6, "files": 2}, + "description": "Change equality checks to use .to_dict()", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["277cfaedbaf3705ed74ad6296227e1172c97a63f"], + "committed_date": "2014-03-23T17:43:01", + "authored_date": "2014-03-23T17:43:01", + }, + "_index": "git", + }, + { + "_id": "277cfaedbaf3705ed74ad6296227e1172c97a63f", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 11, "lines": 12, "files": 2}, + "description": "Test combining of bool queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["6aa3868a6a9f35f71553ce96f9d3d63c74d054fd"], + "committed_date": "2014-03-21T15:15:06", + "authored_date": "2014-03-21T15:15:06", + }, + "_index": "git", + }, + { + "_id": "6aa3868a6a9f35f71553ce96f9d3d63c74d054fd", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 23, "lines": 24, "files": 2}, + "description": "Adding & operator for queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["bb311eb35e7eb53fb5ae01e3f80336866c7e3e37"], + "committed_date": "2014-03-21T15:10:08", + "authored_date": "2014-03-21T15:10:08", + }, + "_index": "git", + }, + { + "_id": "bb311eb35e7eb53fb5ae01e3f80336866c7e3e37", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 4, "lines": 5, "files": 2}, + "description": "Don't serialize empty typed fields into dict", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["aea8ea9e421bd53a5b058495e68c3fd57bb1dacc"], + "committed_date": "2014-03-15T16:29:37", + "authored_date": "2014-03-15T16:29:37", + }, + "_index": "git", + }, + { + "_id": "aea8ea9e421bd53a5b058495e68c3fd57bb1dacc", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 3, "insertions": 37, "lines": 40, "files": 3}, + "description": "Bool queries, when combining just adds their params together", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a8819a510b919be43ff3011b904f257798fb8916"], + "committed_date": "2014-03-15T16:16:40", + "authored_date": "2014-03-15T16:16:40", + }, + "_index": "git", + }, + { + "_id": "a8819a510b919be43ff3011b904f257798fb8916", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/run_tests.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 6, "insertions": 2, "lines": 8, "files": 1}, + "description": "Simpler run_tests.py", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e35792a725be2325fc54d3fcb95a7d38d8075a99"], + "committed_date": "2014-03-15T16:02:21", + "authored_date": "2014-03-15T16:02:21", + }, + "_index": "git", + }, + { + "_id": "e35792a725be2325fc54d3fcb95a7d38d8075a99", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/aggs.py", "elasticsearch_dsl/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 2, "lines": 4, "files": 2}, + "description": "Maku we don't treat shortcuts as methods.", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["3179d778dc9e3f2883d5f7ffa63b9ae0399c16bc"], + "committed_date": "2014-03-15T15:59:21", + "authored_date": "2014-03-15T15:59:21", + }, + "_index": "git", + }, + { + "_id": "3179d778dc9e3f2883d5f7ffa63b9ae0399c16bc", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/query.py", + "elasticsearch_dsl/utils.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 9, "insertions": 5, "lines": 14, "files": 3}, + "description": "Centralize == of Dsl objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b5e7d0c4b284211df8f7b464fcece93a27a802fb"], + "committed_date": "2014-03-10T21:37:24", + "authored_date": "2014-03-10T21:37:24", + }, + "_index": "git", + }, + { + "_id": "b5e7d0c4b284211df8f7b464fcece93a27a802fb", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + "elasticsearch_dsl/utils.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_aggs.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 75, "insertions": 115, "lines": 190, "files": 6}, + "description": "Experimental draft with more declarative DSL", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["0fe741b43adee5ca1424584ddd3f35fa33f8733c"], + "committed_date": "2014-03-10T21:34:39", + "authored_date": "2014-03-10T21:34:39", + }, + "_index": "git", + }, + { + "_id": "0fe741b43adee5ca1424584ddd3f35fa33f8733c", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 2, "lines": 4, "files": 1}, + "description": "Make sure .query is chainable", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a22be5933d4b022cbacee867b1aece120208edf3"], + "committed_date": "2014-03-07T17:41:59", + "authored_date": "2014-03-07T17:41:59", + }, + "_index": "git", + }, + { + "_id": "a22be5933d4b022cbacee867b1aece120208edf3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 14, "insertions": 44, "lines": 58, "files": 3}, + "description": "Search now does aggregations", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e823686aacfc4bdcb34ffdab337a26fa09659a9a"], + "committed_date": "2014-03-07T17:29:55", + "authored_date": "2014-03-07T17:29:55", + }, + "_index": "git", + }, + { + "_id": "e823686aacfc4bdcb34ffdab337a26fa09659a9a", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [".gitignore"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 1, "lines": 1, "files": 1}, + "description": "Ignore html coverage report", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e0aedb3011c71d704deec03a8f32b2b360d6e364"], + "committed_date": "2014-03-07T17:03:23", + "authored_date": "2014-03-07T17:03:23", + }, + "_index": "git", + }, + { + "_id": "e0aedb3011c71d704deec03a8f32b2b360d6e364", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "test_elasticsearch_dsl/test_aggs.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 228, "lines": 228, "files": 2}, + "description": "Added aggregation DSL objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["61cbc0aa62a0b776ae5e333406659dbb2f5cfbbd"], + "committed_date": "2014-03-07T16:25:55", + "authored_date": "2014-03-07T16:25:55", + }, + "_index": "git", + }, + { + "_id": "61cbc0aa62a0b776ae5e333406659dbb2f5cfbbd", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py", "elasticsearch_dsl/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 12, "insertions": 7, "lines": 19, "files": 2}, + "description": "Only retrieve DslClass, leave the instantiation to the caller", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["647f1017a7b17a913e07af70a3b03202f6adbdfd"], + "committed_date": "2014-03-07T15:27:43", + "authored_date": "2014-03-07T15:27:43", + }, + "_index": "git", + }, + { + "_id": "647f1017a7b17a913e07af70a3b03202f6adbdfd", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "test_elasticsearch_dsl/test_search.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 19, "insertions": 19, "lines": 38, "files": 3}, + "description": "No need to replicate Query suffix when in query namespace", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["7c4f94ecdb38f0e91c7ee52f579c0ea148afcc7d"], + "committed_date": "2014-03-07T15:19:01", + "authored_date": "2014-03-07T15:19:01", + }, + "_index": "git", + }, + { + "_id": "7c4f94ecdb38f0e91c7ee52f579c0ea148afcc7d", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 3, "lines": 5, "files": 1}, + "description": "Ask forgiveness, not permission", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["c10793c2ca43688195e415b25b674ff34d58eaff"], + "committed_date": "2014-03-07T15:13:22", + "authored_date": "2014-03-07T15:13:22", + }, + "_index": "git", + }, + { + "_id": "c10793c2ca43688195e415b25b674ff34d58eaff", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 24, "insertions": 27, "lines": 51, "files": 3}, + "description": "Extract DSL object registration to DslMeta", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["d8867fdb17fcf4c696657740fa08d29c36adc6ec"], + "committed_date": "2014-03-07T15:12:13", + "authored_date": "2014-03-07T15:10:31", + }, + "_index": "git", + }, + { + "_id": "d8867fdb17fcf4c696657740fa08d29c36adc6ec", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 13, "lines": 13, "files": 2}, + "description": "Search.to_dict", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["2eb7cd980d917ed6f4a4dd8e246804f710ec5082"], + "committed_date": "2014-03-07T02:58:33", + "authored_date": "2014-03-07T02:58:33", + }, + "_index": "git", + }, + { + "_id": "2eb7cd980d917ed6f4a4dd8e246804f710ec5082", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 113, "lines": 113, "files": 2}, + "description": "Basic Search object", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["11708576f9118e0dbf27ae1f8a7b799cf281b511"], + "committed_date": "2014-03-06T21:02:03", + "authored_date": "2014-03-06T21:01:05", + }, + "_index": "git", + }, + { + "_id": "11708576f9118e0dbf27ae1f8a7b799cf281b511", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 13, "lines": 13, "files": 2}, + "description": "MatchAll query + anything is anything", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1dc496e5c7c1b2caf290df477fca2db61ebe37e0"], + "committed_date": "2014-03-06T20:40:39", + "authored_date": "2014-03-06T20:39:52", + }, + "_index": "git", + }, + { + "_id": "1dc496e5c7c1b2caf290df477fca2db61ebe37e0", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 53, "lines": 53, "files": 2}, + "description": "From_dict, Q(dict) and bool query parses it's subqueries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["d407f99d1959b7b862a541c066d9fd737ce913f3"], + "committed_date": "2014-03-06T20:24:30", + "authored_date": "2014-03-06T20:24:30", + }, + "_index": "git", + }, + { + "_id": "d407f99d1959b7b862a541c066d9fd737ce913f3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["CONTRIBUTING.md", "README.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 6, "insertions": 21, "lines": 27, "files": 2}, + "description": "Housekeeping - licence and updated generic CONTRIBUTING.md", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["277e8ecc7395754d1ba1f2411ec32337a3e9d73f"], + "committed_date": "2014-03-05T16:21:44", + "authored_date": "2014-03-05T16:21:44", + }, + "_index": "git", + }, + { + "_id": "277e8ecc7395754d1ba1f2411ec32337a3e9d73f", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "setup.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 59, "lines": 59, "files": 3}, + "description": "Automatic query registration and Q function", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["8f1e34bd8f462fec50bcc10971df2d57e2986604"], + "committed_date": "2014-03-05T16:18:52", + "authored_date": "2014-03-05T16:18:52", + }, + "_index": "git", + }, + { + "_id": "8f1e34bd8f462fec50bcc10971df2d57e2986604", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 54, "lines": 54, "files": 2}, + "description": "Initial implementation of match and bool queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["fcff47ddcc6d08be5739d03dd30f504fb9db2608"], + "committed_date": "2014-03-05T15:55:06", + "authored_date": "2014-03-05T15:55:06", + }, + "_index": "git", + }, + { + "_id": "fcff47ddcc6d08be5739d03dd30f504fb9db2608", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "docs/Makefile", + "CONTRIBUTING.md", + "docs/conf.py", + "LICENSE", + "Changelog.rst", + "docs/index.rst", + "docs/Changelog.rst", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 692, "lines": 692, "files": 7}, + "description": "Docs template", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["febe8127ae48fcc81778c0fb2d628f1bcc0a0350"], + "committed_date": "2014-03-04T01:42:31", + "authored_date": "2014-03-04T01:42:31", + }, + "_index": "git", + }, + { + "_id": "febe8127ae48fcc81778c0fb2d628f1bcc0a0350", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/__init__.py", + "test_elasticsearch_dsl/run_tests.py", + "setup.py", + "README.rst", + "test_elasticsearch_dsl/__init__.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 82, "lines": 82, "files": 5}, + "description": "Empty project structure", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["2a8f1ce89760bfc72808f3945b539eae650acac9"], + "committed_date": "2014-03-04T01:37:49", + "authored_date": "2014-03-03T18:23:55", + }, + "_index": "git", + }, + { + "_id": "2a8f1ce89760bfc72808f3945b539eae650acac9", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [".gitignore"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 9, "lines": 9, "files": 1}, + "description": "Initial commit, .gitignore", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": [], + "committed_date": "2014-03-03T18:15:05", + "authored_date": "2014-03-03T18:15:05", + }, + "_index": "git", + }, +] + + +def flatten_doc(d: Dict[str, Any]) -> Dict[str, Any]: + src = d["_source"].copy() + del src["commit_repo"] + return {"_index": "flat-git", "_id": d["_id"], "_source": src} + + +FLAT_DATA = [flatten_doc(d) for d in DATA if "routing" in d] + + +def create_test_git_data(d: Dict[str, Any]) -> Dict[str, Any]: + src = d["_source"].copy() + return { + "_index": "test-git", + "routing": "elasticsearch-dsl-py", + "_id": d["_id"], + "_source": src, + } + + +TEST_GIT_DATA = [create_test_git_data(d) for d in DATA] diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/__init__.py b/test_elasticsearch/test_dsl/test_integration/test_examples/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/__init__.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_alias_migration.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_alias_migration.py new file mode 100644 index 000000000..dae4c973f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_alias_migration.py @@ -0,0 +1,73 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from ..async_examples import alias_migration +from ..async_examples.alias_migration import ALIAS, PATTERN, BlogPost, migrate + + +@pytest.mark.asyncio +async def test_alias_migration(async_write_client: AsyncElasticsearch) -> None: + # create the index + await alias_migration.setup() + + # verify that template, index, and alias has been set up + assert await async_write_client.indices.exists_index_template(name=ALIAS) + assert await async_write_client.indices.exists(index=PATTERN) + assert await async_write_client.indices.exists_alias(name=ALIAS) + + indices = await async_write_client.indices.get(index=PATTERN) + assert len(indices) == 1 + index_name, _ = indices.popitem() + + # which means we can now save a document + with open(__file__) as f: + bp = BlogPost( + _id=0, + title="Hello World!", + tags=["testing", "dummy"], + content=f.read(), + published=None, + ) + await bp.save(refresh=True) + + assert await BlogPost.search().count() == 1 + + # _matches work which means we get BlogPost instance + bp = (await BlogPost.search().execute())[0] + assert isinstance(bp, BlogPost) + assert not bp.is_published() + assert "0" == bp.meta.id + + # create new index + await migrate() + + indices = await async_write_client.indices.get(index=PATTERN) + assert 2 == len(indices) + alias = await async_write_client.indices.get(index=ALIAS) + assert 1 == len(alias) + assert index_name not in alias + + # data has been moved properly + assert await BlogPost.search().count() == 1 + + # _matches work which means we get BlogPost instance + bp = (await BlogPost.search().execute())[0] + assert isinstance(bp, BlogPost) + assert "0" == bp.meta.id diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_completion.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_completion.py new file mode 100644 index 000000000..e9716c1d2 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_completion.py @@ -0,0 +1,39 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from ..async_examples.completion import Person + + +@pytest.mark.asyncio +async def test_person_suggests_on_all_variants_of_name( + async_write_client: AsyncElasticsearch, +) -> None: + await Person.init(using=async_write_client) + + await Person(_id=None, name="Honza Král", popularity=42).save(refresh=True) + + s = Person.search().suggest("t", "kra", completion={"field": "suggest"}) + response = await s.execute() + + opts = response.suggest["t"][0].options + + assert 1 == len(opts) + assert opts[0]._score == 42 + assert opts[0]._source.name == "Honza Král" diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_composite_aggs.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_composite_aggs.py new file mode 100644 index 000000000..4bb4e68a3 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_composite_aggs.py @@ -0,0 +1,57 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from elasticsearch.dsl import A, AsyncSearch + +from ..async_examples.composite_agg import scan_aggs + + +@pytest.mark.asyncio +async def test_scan_aggs_exhausts_all_files( + async_data_client: AsyncElasticsearch, +) -> None: + s = AsyncSearch(index="flat-git") + key_aggs = [{"files": A("terms", field="files")}] + file_list = [f async for f in scan_aggs(s, key_aggs)] + + assert len(file_list) == 26 + + +@pytest.mark.asyncio +async def test_scan_aggs_with_multiple_aggs( + async_data_client: AsyncElasticsearch, +) -> None: + s = AsyncSearch(index="flat-git") + key_aggs = [ + {"files": A("terms", field="files")}, + { + "months": A( + "date_histogram", field="committed_date", calendar_interval="month" + ) + }, + ] + file_list = [ + f + async for f in scan_aggs( + s, key_aggs, {"first_seen": A("min", field="committed_date")} + ) + ] + + assert len(file_list) == 47 diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_parent_child.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_parent_child.py new file mode 100644 index 000000000..4d8527081 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_parent_child.py @@ -0,0 +1,116 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +import pytest +import pytest_asyncio +from elasticsearch import AsyncElasticsearch + +from elasticsearch.dsl import Q + +from ..async_examples.parent_child import Answer, Comment, Question, User, setup + +honza = User( + id=42, + signed_up=datetime(2013, 4, 3), + username="honzakral", + email="honza@elastic.co", + location="Prague", +) + +nick = User( + id=47, + signed_up=datetime(2017, 4, 3), + username="fxdgear", + email="nick.lang@elastic.co", + location="Colorado", +) + + +@pytest_asyncio.fixture +async def question(async_write_client: AsyncElasticsearch) -> Question: + await setup() + assert await async_write_client.indices.exists_index_template(name="base") + + # create a question object + q = Question( + _id=1, + author=nick, + tags=["elasticsearch", "python"], + title="How do I use elasticsearch from Python?", + body=""" + I want to use elasticsearch, how do I do it from Python? + """, + created=None, + question_answer=None, + comments=[], + ) + await q.save() + return q + + +@pytest.mark.asyncio +async def test_comment( + async_write_client: AsyncElasticsearch, question: Question +) -> None: + await question.add_comment(nick, "Just use elasticsearch-py") + + q = await Question.get(1) # type: ignore[arg-type] + assert isinstance(q, Question) + assert 1 == len(q.comments) + + c = q.comments[0] + assert isinstance(c, Comment) + assert c.author.username == "fxdgear" + + +@pytest.mark.asyncio +async def test_question_answer( + async_write_client: AsyncElasticsearch, question: Question +) -> None: + a = await question.add_answer(honza, "Just use `elasticsearch-py`!") + + assert isinstance(a, Answer) + + # refresh the index so we can search right away + await Question._index.refresh() + + # we can now fetch answers from elasticsearch + answers = await question.get_answers() + assert 1 == len(answers) + assert isinstance(answers[0], Answer) + + search = Question.search().query( + "has_child", + type="answer", + inner_hits={}, + query=Q("term", author__username__keyword="honzakral"), + ) + response = await search.execute() + + assert 1 == len(response.hits) + + q = response.hits[0] + assert isinstance(q, Question) + assert 1 == len(q.meta.inner_hits.answer.hits) + assert q.meta.inner_hits.answer.hits is await q.get_answers() + + a = q.meta.inner_hits.answer.hits[0] + assert isinstance(a, Answer) + assert isinstance(await a.get_question(), Question) + assert (await a.get_question()).meta.id == "1" diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_percolate.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_percolate.py new file mode 100644 index 000000000..d1564d94b --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_percolate.py @@ -0,0 +1,37 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from ..async_examples.percolate import BlogPost, setup + + +@pytest.mark.asyncio +async def test_post_gets_tagged_automatically( + async_write_client: AsyncElasticsearch, +) -> None: + await setup() + + bp = BlogPost(_id=47, content="nothing about snakes here!") + bp_py = BlogPost(_id=42, content="something about Python here!") + + await bp.save() + await bp_py.save() + + assert [] == bp.tags + assert {"programming", "development", "python"} == set(bp_py.tags) diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py new file mode 100644 index 000000000..7d3acdd34 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py @@ -0,0 +1,56 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from hashlib import md5 +from typing import Any, List, Tuple +from unittest import SkipTest + +import pytest +from elasticsearch import AsyncElasticsearch + +from test_elasticsearch.test_dsl.async_sleep import sleep + +from ..async_examples import vectors + + +@pytest.mark.asyncio +async def test_vector_search( + async_write_client: AsyncElasticsearch, es_version: Tuple[int, ...], mocker: Any +) -> None: + # this test only runs on Elasticsearch >= 8.11 because the example uses + # a dense vector without specifying an explicit size + if es_version < (8, 11): + raise SkipTest("This test requires Elasticsearch 8.11 or newer") + + class MockModel: + def __init__(self, model: Any): + pass + + def encode(self, text: str) -> List[float]: + vector = [int(ch) for ch in md5(text.encode()).digest()] + total = sum(vector) + return [float(v) / total for v in vector] + + mocker.patch.object(vectors, "SentenceTransformer", new=MockModel) + + await vectors.create() + for i in range(10): + results = await (await vectors.search("Welcome to our team!")).execute() + if len(results.hits) > 0: + break + await sleep(0.1) + assert results[0].name == "New Employee Onboarding Guide" diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/__init__.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_alias_migration.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_alias_migration.py new file mode 100644 index 000000000..9a74b699b --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_alias_migration.py @@ -0,0 +1,73 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from ..examples import alias_migration +from ..examples.alias_migration import ALIAS, PATTERN, BlogPost, migrate + + +@pytest.mark.sync +def test_alias_migration(write_client: Elasticsearch) -> None: + # create the index + alias_migration.setup() + + # verify that template, index, and alias has been set up + assert write_client.indices.exists_index_template(name=ALIAS) + assert write_client.indices.exists(index=PATTERN) + assert write_client.indices.exists_alias(name=ALIAS) + + indices = write_client.indices.get(index=PATTERN) + assert len(indices) == 1 + index_name, _ = indices.popitem() + + # which means we can now save a document + with open(__file__) as f: + bp = BlogPost( + _id=0, + title="Hello World!", + tags=["testing", "dummy"], + content=f.read(), + published=None, + ) + bp.save(refresh=True) + + assert BlogPost.search().count() == 1 + + # _matches work which means we get BlogPost instance + bp = (BlogPost.search().execute())[0] + assert isinstance(bp, BlogPost) + assert not bp.is_published() + assert "0" == bp.meta.id + + # create new index + migrate() + + indices = write_client.indices.get(index=PATTERN) + assert 2 == len(indices) + alias = write_client.indices.get(index=ALIAS) + assert 1 == len(alias) + assert index_name not in alias + + # data has been moved properly + assert BlogPost.search().count() == 1 + + # _matches work which means we get BlogPost instance + bp = (BlogPost.search().execute())[0] + assert isinstance(bp, BlogPost) + assert "0" == bp.meta.id diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_completion.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_completion.py new file mode 100644 index 000000000..6dec13e20 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_completion.py @@ -0,0 +1,39 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from ..examples.completion import Person + + +@pytest.mark.sync +def test_person_suggests_on_all_variants_of_name( + write_client: Elasticsearch, +) -> None: + Person.init(using=write_client) + + Person(_id=None, name="Honza Král", popularity=42).save(refresh=True) + + s = Person.search().suggest("t", "kra", completion={"field": "suggest"}) + response = s.execute() + + opts = response.suggest["t"][0].options + + assert 1 == len(opts) + assert opts[0]._score == 42 + assert opts[0]._source.name == "Honza Král" diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_composite_aggs.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_composite_aggs.py new file mode 100644 index 000000000..f7d519f92 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_composite_aggs.py @@ -0,0 +1,57 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from elasticsearch.dsl import A, Search + +from ..examples.composite_agg import scan_aggs + + +@pytest.mark.sync +def test_scan_aggs_exhausts_all_files( + data_client: Elasticsearch, +) -> None: + s = Search(index="flat-git") + key_aggs = [{"files": A("terms", field="files")}] + file_list = [f for f in scan_aggs(s, key_aggs)] + + assert len(file_list) == 26 + + +@pytest.mark.sync +def test_scan_aggs_with_multiple_aggs( + data_client: Elasticsearch, +) -> None: + s = Search(index="flat-git") + key_aggs = [ + {"files": A("terms", field="files")}, + { + "months": A( + "date_histogram", field="committed_date", calendar_interval="month" + ) + }, + ] + file_list = [ + f + for f in scan_aggs( + s, key_aggs, {"first_seen": A("min", field="committed_date")} + ) + ] + + assert len(file_list) == 47 diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_parent_child.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_parent_child.py new file mode 100644 index 000000000..514f03686 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_parent_child.py @@ -0,0 +1,111 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +import pytest +from elasticsearch import Elasticsearch + +from elasticsearch.dsl import Q + +from ..examples.parent_child import Answer, Comment, Question, User, setup + +honza = User( + id=42, + signed_up=datetime(2013, 4, 3), + username="honzakral", + email="honza@elastic.co", + location="Prague", +) + +nick = User( + id=47, + signed_up=datetime(2017, 4, 3), + username="fxdgear", + email="nick.lang@elastic.co", + location="Colorado", +) + + +@pytest.fixture +def question(write_client: Elasticsearch) -> Question: + setup() + assert write_client.indices.exists_index_template(name="base") + + # create a question object + q = Question( + _id=1, + author=nick, + tags=["elasticsearch", "python"], + title="How do I use elasticsearch from Python?", + body=""" + I want to use elasticsearch, how do I do it from Python? + """, + created=None, + question_answer=None, + comments=[], + ) + q.save() + return q + + +@pytest.mark.sync +def test_comment(write_client: Elasticsearch, question: Question) -> None: + question.add_comment(nick, "Just use elasticsearch-py") + + q = Question.get(1) # type: ignore[arg-type] + assert isinstance(q, Question) + assert 1 == len(q.comments) + + c = q.comments[0] + assert isinstance(c, Comment) + assert c.author.username == "fxdgear" + + +@pytest.mark.sync +def test_question_answer(write_client: Elasticsearch, question: Question) -> None: + a = question.add_answer(honza, "Just use `elasticsearch-py`!") + + assert isinstance(a, Answer) + + # refresh the index so we can search right away + Question._index.refresh() + + # we can now fetch answers from elasticsearch + answers = question.get_answers() + assert 1 == len(answers) + assert isinstance(answers[0], Answer) + + search = Question.search().query( + "has_child", + type="answer", + inner_hits={}, + query=Q("term", author__username__keyword="honzakral"), + ) + response = search.execute() + + assert 1 == len(response.hits) + + q = response.hits[0] + assert isinstance(q, Question) + assert 1 == len(q.meta.inner_hits.answer.hits) + assert q.meta.inner_hits.answer.hits is q.get_answers() + + a = q.meta.inner_hits.answer.hits[0] + assert isinstance(a, Answer) + assert isinstance(a.get_question(), Question) + assert (a.get_question()).meta.id == "1" diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_percolate.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_percolate.py new file mode 100644 index 000000000..925d362c2 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_percolate.py @@ -0,0 +1,37 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from ..examples.percolate import BlogPost, setup + + +@pytest.mark.sync +def test_post_gets_tagged_automatically( + write_client: Elasticsearch, +) -> None: + setup() + + bp = BlogPost(_id=47, content="nothing about snakes here!") + bp_py = BlogPost(_id=42, content="something about Python here!") + + bp.save() + bp_py.save() + + assert [] == bp.tags + assert {"programming", "development", "python"} == set(bp_py.tags) diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py new file mode 100644 index 000000000..ff0d0e759 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py @@ -0,0 +1,56 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from hashlib import md5 +from typing import Any, List, Tuple +from unittest import SkipTest + +import pytest +from elasticsearch import Elasticsearch + +from test_elasticsearch.test_dsl.sleep import sleep + +from ..examples import vectors + + +@pytest.mark.sync +def test_vector_search( + write_client: Elasticsearch, es_version: Tuple[int, ...], mocker: Any +) -> None: + # this test only runs on Elasticsearch >= 8.11 because the example uses + # a dense vector without specifying an explicit size + if es_version < (8, 11): + raise SkipTest("This test requires Elasticsearch 8.11 or newer") + + class MockModel: + def __init__(self, model: Any): + pass + + def encode(self, text: str) -> List[float]: + vector = [int(ch) for ch in md5(text.encode()).digest()] + total = sum(vector) + return [float(v) / total for v in vector] + + mocker.patch.object(vectors, "SentenceTransformer", new=MockModel) + + vectors.create() + for i in range(10): + results = (vectors.search("Welcome to our team!")).execute() + if len(results.hits) > 0: + break + sleep(0.1) + assert results[0].name == "New Employee Onboarding Guide" diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/async_examples b/test_elasticsearch/test_dsl/test_integration/test_examples/async_examples new file mode 120000 index 000000000..96158259a --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/async_examples @@ -0,0 +1 @@ +../../../../examples/dsl/async \ No newline at end of file diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/examples b/test_elasticsearch/test_dsl/test_integration/test_examples/examples new file mode 120000 index 000000000..ff15b4ebc --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/examples @@ -0,0 +1 @@ +../../../../examples/dsl \ No newline at end of file diff --git a/test_elasticsearch/test_dsl/test_package.py b/test_elasticsearch/test_dsl/test_package.py new file mode 100644 index 000000000..2e989baa1 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_package.py @@ -0,0 +1,22 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import elasticsearch.dsl + + +def test__all__is_sorted() -> None: + assert elasticsearch.dsl.__all__ == sorted(elasticsearch.dsl.__all__) diff --git a/test_elasticsearch/test_dsl/test_query.py b/test_elasticsearch/test_dsl/test_query.py new file mode 100644 index 000000000..c09f26b1a --- /dev/null +++ b/test_elasticsearch/test_dsl/test_query.py @@ -0,0 +1,671 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from elasticsearch.dsl import function, query, utils + + +def test_empty_Q_is_match_all() -> None: + q = query.Q() + + assert isinstance(q, query.MatchAll) + assert query.MatchAll() == q + + +def test_combined_fields_to_dict() -> None: + assert { + "combined_fields": { + "query": "this is a test", + "fields": ["name", "body", "description"], + "operator": "and", + }, + } == query.CombinedFields( + query="this is a test", + fields=["name", "body", "description"], + operator="and", + ).to_dict() + + +def test_combined_fields_to_dict_extra() -> None: + assert { + "combined_fields": { + "query": "this is a test", + "fields": ["name", "body^2"], + "operator": "or", + }, + } == query.CombinedFields( + query="this is a test", + fields=["name", "body^2"], + operator="or", + ).to_dict() + + +def test_match_to_dict() -> None: + assert {"match": {"f": "value"}} == query.Match(f="value").to_dict() + + +def test_match_to_dict_extra() -> None: + assert {"match": {"f": "value", "boost": 2}} == query.Match( + f="value", boost=2 + ).to_dict() + + +def test_fuzzy_to_dict() -> None: + assert {"fuzzy": {"f": "value"}} == query.Fuzzy(f="value").to_dict() + + +def test_prefix_to_dict() -> None: + assert {"prefix": {"f": "value"}} == query.Prefix(f="value").to_dict() + + +def test_term_to_dict() -> None: + assert {"term": {"_type": "article"}} == query.Term(_type="article").to_dict() + + +def test_terms_to_dict() -> None: + assert {"terms": {"_type": ["article", "section"]}} == query.Terms( + _type=["article", "section"] + ).to_dict() + assert {"terms": {"_type": ["article", "section"], "boost": 1.1}} == query.Terms( + _type=("article", "section"), boost=1.1 + ).to_dict() + assert {"terms": {"_type": "article", "boost": 1.1}} == query.Terms( + _type="article", boost=1.1 + ).to_dict() + assert { + "terms": {"_id": {"index": "my-other-index", "id": "my-id"}, "boost": 1.1} + } == query.Terms( + _id={"index": "my-other-index", "id": "my-id"}, boost=1.1 + ).to_dict() + + +def test_bool_to_dict() -> None: + bool = query.Bool(must=[query.Match(f="value")], should=[]) + + assert {"bool": {"must": [{"match": {"f": "value"}}]}} == bool.to_dict() + + +def test_dismax_to_dict() -> None: + assert {"dis_max": {"queries": [{"term": {"_type": "article"}}]}} == query.DisMax( + queries=[query.Term(_type="article")] + ).to_dict() + + +def test_bool_from_dict_issue_318() -> None: + d = {"bool": {"must_not": {"match": {"field": "value"}}}} + q = query.Q(d) + + assert q == ~query.Match(field="value") + + +def test_repr() -> None: + bool = query.Bool(must=[query.Match(f="value")], should=[]) + + assert "Bool(must=[Match(f='value')])" == repr(bool) + + +def test_query_clone() -> None: + bool = query.Bool( + must=[query.Match(x=42)], + should=[query.Match(g="v2")], + must_not=[query.Match(title="value")], + ) + bool_clone = bool._clone() + + assert bool == bool_clone + assert bool is not bool_clone + + +def test_bool_converts_its_init_args_to_queries() -> None: + q = query.Bool(must=[{"match": {"f": "value"}}]) # type: ignore + + assert len(q.must) == 1 + assert q.must[0] == query.Match(f="value") + + +def test_two_queries_make_a_bool() -> None: + q1 = query.Match(f="value1") + q2 = query.Match(message={"query": "this is a test", "opeartor": "and"}) + q = q1 & q2 + + assert isinstance(q, query.Bool) + assert [q1, q2] == q.must + + +def test_other_and_bool_appends_other_to_must() -> None: + q1 = query.Match(f="value1") + qb = query.Bool() + + q = q1 & qb + assert q is not qb + assert q.must[0] == q1 + + +def test_bool_and_other_appends_other_to_must() -> None: + q1 = query.Match(f="value1") + qb = query.Bool() + + q = qb & q1 + assert q is not qb + assert q.must[0] == q1 + + +def test_bool_and_other_sets_min_should_match_if_needed() -> None: + q1 = query.Q("term", category=1) + q2 = query.Q( + "bool", should=[query.Q("term", name="aaa"), query.Q("term", name="bbb")] + ) + + q = q1 & q2 + assert q == query.Bool( + must=[q1], + should=[query.Q("term", name="aaa"), query.Q("term", name="bbb")], + minimum_should_match=1, + ) + + +def test_bool_with_different_minimum_should_match_should_not_be_combined() -> None: + q1 = query.Q( + "bool", + minimum_should_match=2, + should=[ + query.Q("term", field="aa1"), + query.Q("term", field="aa2"), + query.Q("term", field="aa3"), + query.Q("term", field="aa4"), + ], + ) + q2 = query.Q( + "bool", + minimum_should_match=3, + should=[ + query.Q("term", field="bb1"), + query.Q("term", field="bb2"), + query.Q("term", field="bb3"), + query.Q("term", field="bb4"), + ], + ) + q3 = query.Q( + "bool", + minimum_should_match=4, + should=[ + query.Q("term", field="cc1"), + query.Q("term", field="cc2"), + query.Q("term", field="cc3"), + query.Q("term", field="cc4"), + ], + ) + + q4 = q1 | q2 + assert q4 == query.Bool(should=[q1, q2]) + + q5 = q1 | q2 | q3 + assert q5 == query.Bool(should=[q1, q2, q3]) + + +def test_empty_bool_has_min_should_match_0() -> None: + assert 0 == query.Bool()._min_should_match + + +def test_query_and_query_creates_bool() -> None: + q1 = query.Match(f=42) + q2 = query.Match(g=47) + + q = q1 & q2 + assert isinstance(q, query.Bool) + assert q.must == [q1, q2] + + +def test_match_all_and_query_equals_other() -> None: + q1 = query.Match(f=42) + q2 = query.MatchAll() + + q = q1 & q2 + assert q1 == q + + +def test_not_match_all_is_match_none() -> None: + q = query.MatchAll() + + assert ~q == query.MatchNone() + + +def test_not_match_none_is_match_all() -> None: + q = query.MatchNone() + + assert ~q == query.MatchAll() + + +def test_invert_empty_bool_is_match_none() -> None: + q = query.Bool() + + assert ~q == query.MatchNone() + + +def test_match_none_or_query_equals_query() -> None: + q1 = query.Match(f=42) + q2 = query.MatchNone() + + assert q1 | q2 == query.Match(f=42) + + +def test_match_none_and_query_equals_match_none() -> None: + q1 = query.Match(f=42) + q2 = query.MatchNone() + + assert q1 & q2 == query.MatchNone() + + +def test_bool_and_bool() -> None: + qt1, qt2, qt3 = query.Match(f=1), query.Match(f=2), query.Match(f=3) + + q1 = query.Bool(must=[qt1], should=[qt2]) + q2 = query.Bool(must_not=[qt3]) + assert q1 & q2 == query.Bool( + must=[qt1], must_not=[qt3], should=[qt2], minimum_should_match=0 + ) + + q1 = query.Bool(must=[qt1], should=[qt1, qt2]) + q2 = query.Bool(should=[qt3]) + assert q1 & q2 == query.Bool( + must=[qt1, qt3], should=[qt1, qt2], minimum_should_match=0 + ) + + +def test_bool_and_bool_with_min_should_match() -> None: + qt1, qt2 = query.Match(f=1), query.Match(f=2) + q1 = query.Q("bool", minimum_should_match=1, should=[qt1]) + q2 = query.Q("bool", minimum_should_match=1, should=[qt2]) + + assert query.Q("bool", must=[qt1, qt2]) == q1 & q2 + + +def test_negative_min_should_match() -> None: + qt1, qt2 = query.Match(f=1), query.Match(f=2) + q1 = query.Q("bool", minimum_should_match=-2, should=[qt1]) + q2 = query.Q("bool", minimum_should_match=1, should=[qt2]) + + with raises(ValueError): + q1 & q2 + with raises(ValueError): + q2 & q1 + + +def test_percentage_min_should_match() -> None: + qt1, qt2 = query.Match(f=1), query.Match(f=2) + q1 = query.Q("bool", minimum_should_match="50%", should=[qt1]) + q2 = query.Q("bool", minimum_should_match=1, should=[qt2]) + + with raises(ValueError): + q1 & q2 + with raises(ValueError): + q2 & q1 + + +def test_inverted_query_becomes_bool_with_must_not() -> None: + q = query.Match(f=42) + + assert ~q == query.Bool(must_not=[query.Match(f=42)]) + + +def test_inverted_query_with_must_not_become_should() -> None: + q = query.Q("bool", must_not=[query.Q("match", f=1), query.Q("match", f=2)]) + + assert ~q == query.Q("bool", should=[query.Q("match", f=1), query.Q("match", f=2)]) + + +def test_inverted_query_with_must_and_must_not() -> None: + q = query.Q( + "bool", + must=[query.Q("match", f=3), query.Q("match", f=4)], + must_not=[query.Q("match", f=1), query.Q("match", f=2)], + ) + print((~q).to_dict()) + assert ~q == query.Q( + "bool", + should=[ + # negation of must + query.Q("bool", must_not=[query.Q("match", f=3)]), + query.Q("bool", must_not=[query.Q("match", f=4)]), + # negation of must_not + query.Q("match", f=1), + query.Q("match", f=2), + ], + ) + + +def test_double_invert_returns_original_query() -> None: + q = query.Match(f=42) + + assert q == ~~q + + +def test_bool_query_gets_inverted_internally() -> None: + q = query.Bool(must_not=[query.Match(f=42)], must=[query.Match(g="v")]) + + assert ~q == query.Bool( + should=[ + # negating must + query.Bool(must_not=[query.Match(g="v")]), + # negating must_not + query.Match(f=42), + ] + ) + + +def test_match_all_or_something_is_match_all() -> None: + q1 = query.MatchAll() + q2 = query.Match(f=42) + + assert (q1 | q2) == query.MatchAll() + assert (q2 | q1) == query.MatchAll() + + +def test_or_produces_bool_with_should() -> None: + q1 = query.Match(f=42) + q2 = query.Match(g="v") + + q = q1 | q2 + assert q == query.Bool(should=[q1, q2]) + + +def test_or_bool_doesnt_loop_infinitely_issue_37() -> None: + q = query.Match(f=42) | ~query.Match(f=47) + + assert q == query.Bool( + should=[query.Bool(must_not=[query.Match(f=47)]), query.Match(f=42)] + ) + + +def test_or_bool_doesnt_loop_infinitely_issue_96() -> None: + q = ~query.Match(f=42) | ~query.Match(f=47) + + assert q == query.Bool( + should=[ + query.Bool(must_not=[query.Match(f=42)]), + query.Bool(must_not=[query.Match(f=47)]), + ] + ) + + +def test_bool_will_append_another_query_with_or() -> None: + qb = query.Bool(should=[query.Match(f="v"), query.Match(f="v2")]) + q = query.Match(g=42) + + assert (q | qb) == query.Bool(should=[query.Match(f="v"), query.Match(f="v2"), q]) + + +def test_bool_queries_with_only_should_get_concatenated() -> None: + q1 = query.Bool(should=[query.Match(f=1), query.Match(f=2)]) + q2 = query.Bool(should=[query.Match(f=3), query.Match(f=4)]) + + assert (q1 | q2) == query.Bool( + should=[query.Match(f=1), query.Match(f=2), query.Match(f=3), query.Match(f=4)] + ) + + +def test_two_bool_queries_append_one_to_should_if_possible() -> None: + q1 = query.Bool(should=[query.Match(f="v")]) + q2 = query.Bool(must=[query.Match(f="v")]) + + assert (q1 | q2) == query.Bool( + should=[query.Match(f="v"), query.Bool(must=[query.Match(f="v")])] + ) + assert (q2 | q1) == query.Bool( + should=[query.Match(f="v"), query.Bool(must=[query.Match(f="v")])] + ) + + +def test_queries_are_registered() -> None: + assert "match" in query.Query._classes + assert query.Query._classes["match"] is query.Match + + +def test_defining_query_registers_it() -> None: + class MyQuery(query.Query): + name = "my_query" + + assert "my_query" in query.Query._classes + assert query.Query._classes["my_query"] is MyQuery + + +def test_Q_passes_query_through() -> None: + q = query.Match(f="value1") + + assert query.Q(q) is q + + +def test_Q_constructs_query_by_name() -> None: + q = query.Q("match", f="value") + + assert isinstance(q, query.Match) + assert {"f": "value"} == q._params + + +def test_Q_translates_double_underscore_to_dots_in_param_names() -> None: + q = query.Q("match", comment__author="honza") + + assert {"comment.author": "honza"} == q._params + + +def test_Q_doesn_translate_double_underscore_to_dots_in_param_names() -> None: + q = query.Q("match", comment__author="honza", _expand__to_dot=False) + + assert {"comment__author": "honza"} == q._params + + +def test_Q_constructs_simple_query_from_dict() -> None: + q = query.Q({"match": {"f": "value"}}) + + assert isinstance(q, query.Match) + assert {"f": "value"} == q._params + + +def test_Q_constructs_compound_query_from_dict() -> None: + q = query.Q({"bool": {"must": [{"match": {"f": "value"}}]}}) + + assert q == query.Bool(must=[query.Match(f="value")]) + + +def test_Q_raises_error_when_passed_in_dict_and_params() -> None: + with raises(Exception): + # Ignore types as it's not a valid call + query.Q({"match": {"f": "value"}}, f="value") # type: ignore[call-overload] + + +def test_Q_raises_error_when_passed_in_query_and_params() -> None: + q = query.Match(f="value1") + + with raises(Exception): + # Ignore types as it's not a valid call signature + query.Q(q, f="value") # type: ignore[call-overload] + + +def test_Q_raises_error_on_unknown_query() -> None: + with raises(Exception): + query.Q("not a query", f="value") + + +def test_match_all_and_anything_is_anything() -> None: + q = query.MatchAll() + + s = query.Match(f=42) + assert q & s == s + assert s & q == s + + +def test_function_score_with_functions() -> None: + q = query.Q( + "function_score", + functions=[query.SF("script_score", script="doc['comment_count'] * _score")], + ) + + assert { + "function_score": { + "functions": [{"script_score": {"script": "doc['comment_count'] * _score"}}] + } + } == q.to_dict() + + +def test_function_score_with_no_function_is_boost_factor() -> None: + q = query.Q( + "function_score", + functions=[query.SF({"weight": 20, "filter": query.Q("term", f=42)})], + ) + + assert { + "function_score": {"functions": [{"filter": {"term": {"f": 42}}, "weight": 20}]} + } == q.to_dict() + + +def test_function_score_to_dict() -> None: + q = query.Q( + "function_score", + query=query.Q("match", title="python"), + functions=[ + query.SF("random_score"), + query.SF( + "field_value_factor", + field="comment_count", + filter=query.Q("term", tags="python"), + ), + ], + ) + + d = { + "function_score": { + "query": {"match": {"title": "python"}}, + "functions": [ + {"random_score": {}}, + { + "filter": {"term": {"tags": "python"}}, + "field_value_factor": {"field": "comment_count"}, + }, + ], + } + } + assert d == q.to_dict() + + +def test_function_score_class_based_to_dict() -> None: + q = query.FunctionScore( + query=query.Match(title="python"), + functions=[ + function.RandomScore(), + function.FieldValueFactor( + field="comment_count", + filter=query.Term(tags="python"), + ), + ], + ) + + d = { + "function_score": { + "query": {"match": {"title": "python"}}, + "functions": [ + {"random_score": {}}, + { + "filter": {"term": {"tags": "python"}}, + "field_value_factor": {"field": "comment_count"}, + }, + ], + } + } + assert d == q.to_dict() + + +def test_function_score_with_single_function() -> None: + d = { + "function_score": { + "filter": {"term": {"tags": "python"}}, + "script_score": {"script": "doc['comment_count'] * _score"}, + } + } + + q = query.Q(d) + assert isinstance(q, query.FunctionScore) + assert isinstance(q.filter, query.Term) + assert len(q.functions) == 1 + + sf = q.functions[0] + assert isinstance(sf, function.ScriptScore) + assert "doc['comment_count'] * _score" == sf.script + + +def test_function_score_from_dict() -> None: + d = { + "function_score": { + "filter": {"term": {"tags": "python"}}, + "functions": [ + { + "filter": {"terms": {"tags": "python"}}, + "script_score": {"script": "doc['comment_count'] * _score"}, + }, + {"boost_factor": 6}, + ], + } + } + + q = query.Q(d) + assert isinstance(q, query.FunctionScore) + assert isinstance(q.filter, query.Term) + assert len(q.functions) == 2 + + sf = q.functions[0] + assert isinstance(sf, function.ScriptScore) + assert isinstance(sf.filter, query.Terms) + + sf = q.functions[1] + assert isinstance(sf, function.BoostFactor) + assert 6 == sf.value + assert {"boost_factor": 6} == sf.to_dict() + + +def test_script_score() -> None: + d = { + "script_score": { + "query": {"match_all": {}}, + "script": {"source": "...", "params": {}}, + } + } + q = query.Q(d) + + assert isinstance(q, query.ScriptScore) + assert isinstance(q.query, query.MatchAll) + assert q.script == {"source": "...", "params": {}} + assert q.to_dict() == d + + +def test_expand_double_underscore_to_dot_setting() -> None: + q = query.Term(comment__count=2) + assert q.to_dict() == {"term": {"comment.count": 2}} + utils.EXPAND__TO_DOT = False + q = query.Term(comment__count=2) + assert q.to_dict() == {"term": {"comment__count": 2}} + utils.EXPAND__TO_DOT = True + + +def test_knn_query() -> None: + q = query.Knn(field="image-vector", query_vector=[-5, 9, -12], num_candidates=10) + assert q.to_dict() == { + "knn": { + "field": "image-vector", + "query_vector": [-5, 9, -12], + "num_candidates": 10, + } + } diff --git a/test_elasticsearch/test_dsl/test_result.py b/test_elasticsearch/test_dsl/test_result.py new file mode 100644 index 000000000..46707c715 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_result.py @@ -0,0 +1,215 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pickle +from datetime import date +from typing import Any, Dict + +from pytest import fixture, raises + +from elasticsearch.dsl import Date, Document, Object, Search, response +from elasticsearch.dsl.aggs import Terms +from elasticsearch.dsl.response.aggs import AggResponse, Bucket, BucketData +from elasticsearch.dsl.utils import AttrDict + + +@fixture +def agg_response(aggs_search: Search, aggs_data: Dict[str, Any]) -> response.Response: + return response.Response(aggs_search, aggs_data) + + +def test_agg_response_is_pickleable(agg_response: response.Response) -> None: + agg_response.hits + r = pickle.loads(pickle.dumps(agg_response)) + + assert r == agg_response + assert r._search == agg_response._search + assert r.hits == agg_response.hits + + +def test_response_is_pickleable(dummy_response: Dict[str, Any]) -> None: + res = response.Response(Search(), dummy_response.body) # type: ignore[attr-defined] + res.hits + r = pickle.loads(pickle.dumps(res)) + + assert r == res + assert r._search == res._search + assert r.hits == res.hits + + +def test_hit_is_pickleable(dummy_response: Dict[str, Any]) -> None: + res = response.Response(Search(), dummy_response) + hits = pickle.loads(pickle.dumps(res.hits)) + + assert hits == res.hits + assert hits[0].meta == res.hits[0].meta + + +def test_response_stores_search(dummy_response: Dict[str, Any]) -> None: + s = Search() + r = response.Response(s, dummy_response) + + assert r._search is s + + +def test_attribute_error_in_hits_is_not_hidden(dummy_response: Dict[str, Any]) -> None: + def f(hit: AttrDict[Any]) -> Any: + raise AttributeError() + + s = Search().doc_type(employee=f) + r = response.Response(s, dummy_response) + with raises(TypeError): + r.hits + + +def test_interactive_helpers(dummy_response: Dict[str, Any]) -> None: + res = response.Response(Search(), dummy_response) + hits = res.hits + h = hits[0] + + rhits = ( + "[, , " + ", ]" + ).format( + repr(dummy_response["hits"]["hits"][0]["_source"]), + repr(dummy_response["hits"]["hits"][1]["_source"])[:60], + repr(dummy_response["hits"]["hits"][2]["_source"])[:60], + ) + + assert res + assert f"" == repr(res) + assert rhits == repr(hits) + assert {"meta", "city", "name"} == set(dir(h)) + assert "" % dummy_response["hits"]["hits"][0][ + "_source" + ] == repr(h) + + +def test_empty_response_is_false(dummy_response: Dict[str, Any]) -> None: + dummy_response["hits"]["hits"] = [] + res = response.Response(Search(), dummy_response) + + assert not res + + +def test_len_response(dummy_response: Dict[str, Any]) -> None: + res = response.Response(Search(), dummy_response) + assert len(res) == 4 + + +def test_iterating_over_response_gives_you_hits(dummy_response: Dict[str, Any]) -> None: + res = response.Response(Search(), dummy_response) + hits = list(h for h in res) + + assert res.success() + assert 123 == res.took + assert 4 == len(hits) + assert all(isinstance(h, response.Hit) for h in hits) + h = hits[0] + + assert "test-index" == h.meta.index + assert "company" == h.meta.doc_type + assert "elasticsearch" == h.meta.id + assert 12 == h.meta.score + + assert hits[1].meta.routing == "elasticsearch" + + +def test_hits_get_wrapped_to_contain_additional_attrs( + dummy_response: Dict[str, Any] +) -> None: + res = response.Response(Search(), dummy_response) + hits = res.hits + + assert 123 == hits.total # type: ignore[attr-defined] + assert 12.0 == hits.max_score # type: ignore[attr-defined] + + +def test_hits_provide_dot_and_bracket_access_to_attrs( + dummy_response: Dict[str, Any] +) -> None: + res = response.Response(Search(), dummy_response) + h = res.hits[0] + + assert "Elasticsearch" == h.name + assert "Elasticsearch" == h["name"] + + assert "Honza" == res.hits[2].name.first + + with raises(KeyError): + h["not_there"] + + with raises(AttributeError): + h.not_there + + +def test_slicing_on_response_slices_on_hits(dummy_response: Dict[str, Any]) -> None: + res = response.Response(Search(), dummy_response) + + assert res[0] is res.hits[0] + assert res[::-1] == res.hits[::-1] + + +def test_aggregation_base(agg_response: response.Response) -> None: + assert agg_response.aggs is agg_response.aggregations + assert isinstance(agg_response.aggs, response.AggResponse) + + +def test_metric_agg_works(agg_response: response.Response) -> None: + assert 25052.0 == agg_response.aggs.sum_lines.value + + +def test_aggregations_can_be_iterated_over(agg_response: response.Response) -> None: + aggs = [a for a in agg_response.aggs] + + assert len(aggs) == 3 + assert all(map(lambda a: isinstance(a, AggResponse), aggs)) + + +def test_aggregations_can_be_retrieved_by_name( + agg_response: response.Response, aggs_search: Search +) -> None: + a = agg_response.aggs["popular_files"] + + assert isinstance(a, BucketData) + assert isinstance(a._meta["aggs"], Terms) + assert a._meta["aggs"] is aggs_search.aggs.aggs["popular_files"] + + +def test_bucket_response_can_be_iterated_over(agg_response: response.Response) -> None: + popular_files = agg_response.aggregations.popular_files + + buckets = [b for b in popular_files] + assert all(isinstance(b, Bucket) for b in buckets) + assert buckets == popular_files.buckets + + +def test_bucket_keys_get_deserialized( + aggs_data: Dict[str, Any], aggs_search: Search +) -> None: + class Commit(Document): + info = Object(properties={"committed_date": Date()}) + + class Index: + name = "test-commit" + + aggs_search = aggs_search.doc_type(Commit) + agg_response = response.Response(aggs_search, aggs_data) + + per_month = agg_response.aggregations.per_month + for b in per_month: + assert isinstance(b.key, date) diff --git a/test_elasticsearch/test_dsl/test_utils.py b/test_elasticsearch/test_dsl/test_utils.py new file mode 100644 index 000000000..ac4d6df6e --- /dev/null +++ b/test_elasticsearch/test_dsl/test_utils.py @@ -0,0 +1,136 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pickle +from typing import Any, Dict, Tuple + +from pytest import raises + +from elasticsearch.dsl import Q, serializer, utils + + +def test_attrdict_pickle() -> None: + ad: utils.AttrDict[str] = utils.AttrDict({}) + + pickled_ad = pickle.dumps(ad) + assert ad == pickle.loads(pickled_ad) + + +def test_attrlist_pickle() -> None: + al = utils.AttrList[Any]([]) + + pickled_al = pickle.dumps(al) + assert al == pickle.loads(pickled_al) + + +def test_attrlist_slice() -> None: + class MyAttrDict(utils.AttrDict[str]): + pass + + l = utils.AttrList[Any]([{}, {}], obj_wrapper=MyAttrDict) + assert isinstance(l[:][0], MyAttrDict) + + +def test_attrlist_with_type_argument() -> None: + a = utils.AttrList[str](["a", "b"]) + assert list(a) == ["a", "b"] + + +def test_attrdict_keys_items() -> None: + a = utils.AttrDict({"a": {"b": 42, "c": 47}, "d": "e"}) + assert list(a.keys()) == ["a", "d"] + assert list(a.items()) == [("a", {"b": 42, "c": 47}), ("d", "e")] + + +def test_attrdict_with_type_argument() -> None: + a = utils.AttrDict[str]({"a": "b"}) + assert list(a.keys()) == ["a"] + assert list(a.items()) == [("a", "b")] + + +def test_merge() -> None: + a: utils.AttrDict[Any] = utils.AttrDict({"a": {"b": 42, "c": 47}}) + b = {"a": {"b": 123, "d": -12}, "e": [1, 2, 3]} + + utils.merge(a, b) + + assert a == {"a": {"b": 123, "c": 47, "d": -12}, "e": [1, 2, 3]} + + +def test_merge_conflict() -> None: + data: Tuple[Dict[str, Any], ...] = ( + {"a": 42}, + {"a": {"b": 47}}, + ) + for d in data: + utils.merge({"a": {"b": 42}}, d) + with raises(ValueError): + utils.merge({"a": {"b": 42}}, d, True) + + +def test_attrdict_bool() -> None: + d: utils.AttrDict[str] = utils.AttrDict({}) + + assert not d + d.title = "Title" + assert d + + +def test_attrlist_items_get_wrapped_during_iteration() -> None: + al = utils.AttrList([1, object(), [1], {}]) + + l = list(iter(al)) + + assert isinstance(l[2], utils.AttrList) + assert isinstance(l[3], utils.AttrDict) + + +def test_serializer_deals_with_Attr_versions() -> None: + d = utils.AttrDict({"key": utils.AttrList([1, 2, 3])}) + + assert serializer.serializer.dumps(d) == serializer.serializer.dumps( + {"key": [1, 2, 3]} + ) + + +def test_serializer_deals_with_objects_with_to_dict() -> None: + class MyClass: + def to_dict(self) -> int: + return 42 + + assert serializer.serializer.dumps(MyClass()) == b"42" + + +def test_recursive_to_dict() -> None: + assert utils.recursive_to_dict({"k": [1, (1.0, {"v": Q("match", key="val")})]}) == { + "k": [1, (1.0, {"v": {"match": {"key": "val"}}})] + } + + +def test_attrlist_to_list() -> None: + l = utils.AttrList[Any]([{}, {}]).to_list() + assert isinstance(l, list) + assert l == [{}, {}] + + +def test_attrdict_with_reserved_keyword() -> None: + d = utils.AttrDict({"from": 10, "size": 20}) + assert d.from_ == 10 + assert d.size == 20 + d = utils.AttrDict({}) + d.from_ = 10 + assert {"from": 10} == d.to_dict() diff --git a/test_elasticsearch/test_dsl/test_validation.py b/test_elasticsearch/test_dsl/test_validation.py new file mode 100644 index 000000000..e14550eba --- /dev/null +++ b/test_elasticsearch/test_dsl/test_validation.py @@ -0,0 +1,162 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime +from typing import Any + +from pytest import raises + +from elasticsearch.dsl import ( + Date, + Document, + InnerDoc, + Integer, + Nested, + Object, + Text, + mapped_field, +) +from elasticsearch.dsl.exceptions import ValidationException + + +class Author(InnerDoc): + name: str + email: str + + def clean(self) -> None: + if not self.name: + raise ValidationException("name is missing") + if not self.email: + raise ValidationException("email is missing") + elif self.name.lower() not in self.email: + raise ValidationException("Invalid email!") + + +class BlogPost(Document): + authors = Nested(Author, required=True) + created = Date() + inner = Object() + + +class BlogPostWithStatus(Document): + published: bool = mapped_field(init=False) + + +class AutoNowDate(Date): + def clean(self, data: Any) -> Any: + if data is None: + data = datetime.now() + return super().clean(data) + + +class Log(Document): + timestamp = AutoNowDate(required=True) + data = Text() + + +def test_required_int_can_be_0() -> None: + class DT(Document): + i = Integer(required=True) + + dt = DT(i=0) + dt.full_clean() + + +def test_required_field_cannot_be_empty_list() -> None: + class DT(Document): + i = Integer(required=True) + + dt = DT(i=[]) + with raises(ValidationException): + dt.full_clean() + + +def test_validation_works_for_lists_of_values() -> None: + class DT(Document): + i = Date(required=True) + + dt = DT(i=[datetime.now(), "not date"]) + with raises(ValidationException): + dt.full_clean() + + dt = DT(i=[datetime.now(), datetime.now()]) + dt.full_clean() + + +def test_field_with_custom_clean() -> None: + l = Log() + l.full_clean() + + assert isinstance(l.timestamp, datetime) + + +def test_empty_object() -> None: + d = BlogPost(authors=[{"name": "Honza", "email": "honza@elastic.co"}]) + d.inner = {} # type: ignore[assignment] + + d.full_clean() + + +def test_missing_required_field_raises_validation_exception() -> None: + d = BlogPost() + with raises(ValidationException): + d.full_clean() + + d = BlogPost() + d.authors.append({"name": "Honza"}) + with raises(ValidationException): + d.full_clean() + + d = BlogPost() + d.authors.append({"name": "Honza", "email": "honza@elastic.co"}) + d.full_clean() + + +def test_boolean_doesnt_treat_false_as_empty() -> None: + d = BlogPostWithStatus() + with raises(ValidationException): + d.full_clean() + d.published = False + d.full_clean() + d.published = True + d.full_clean() + + +def test_custom_validation_on_nested_gets_run() -> None: + d = BlogPost(authors=[Author(name="Honza", email="king@example.com")], created=None) + + assert isinstance(d.authors[0], Author) # type: ignore[index] + + with raises(ValidationException): + d.full_clean() + + +def test_accessing_known_fields_returns_empty_value() -> None: + d = BlogPost() + + assert [] == d.authors + + d.authors.append({}) + assert None is d.authors[0].name # type: ignore[index] + assert None is d.authors[0].email + + +def test_empty_values_are_not_serialized() -> None: + d = BlogPost(authors=[{"name": "Honza", "email": "honza@elastic.co"}], created=None) + + d.full_clean() + assert d.to_dict() == {"authors": [{"name": "Honza", "email": "honza@elastic.co"}]} diff --git a/test_elasticsearch/test_dsl/test_wrappers.py b/test_elasticsearch/test_dsl/test_wrappers.py new file mode 100644 index 000000000..8af6652a8 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_wrappers.py @@ -0,0 +1,111 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime, timedelta +from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence + +if TYPE_CHECKING: + from _operator import _SupportsComparison + +import pytest + +from elasticsearch.dsl import Range + + +@pytest.mark.parametrize( + "kwargs, item", + [ + ({}, 1), + ({}, -1), + ({"gte": -1}, -1), + ({"lte": 4}, 4), + ({"lte": 4, "gte": 2}, 4), + ({"lte": 4, "gte": 2}, 2), + ({"gt": datetime.now() - timedelta(seconds=10)}, datetime.now()), + ], +) +def test_range_contains( + kwargs: Mapping[str, "_SupportsComparison"], item: "_SupportsComparison" +) -> None: + assert item in Range(**kwargs) + + +@pytest.mark.parametrize( + "kwargs, item", + [ + ({"gt": -1}, -1), + ({"lt": 4}, 4), + ({"lt": 4}, 42), + ({"lte": 4, "gte": 2}, 1), + ({"lte": datetime.now() - timedelta(seconds=10)}, datetime.now()), + ], +) +def test_range_not_contains( + kwargs: Mapping[str, "_SupportsComparison"], item: "_SupportsComparison" +) -> None: + assert item not in Range(**kwargs) + + +@pytest.mark.parametrize( + "args,kwargs", + [ + (({},), {"lt": 42}), + ((), {"not_lt": 42}), + ((object(),), {}), + ((), {"lt": 1, "lte": 1}), + ((), {"gt": 1, "gte": 1}), + ], +) +def test_range_raises_value_error_on_wrong_params( + args: Sequence[Any], kwargs: Mapping[str, "_SupportsComparison"] +) -> None: + with pytest.raises(ValueError): + Range(*args, **kwargs) + + +@pytest.mark.parametrize( + "range,lower,inclusive", + [ + (Range(gt=1), 1, False), + (Range(gte=1), 1, True), + (Range(), None, False), + (Range(lt=42), None, False), + ], +) +def test_range_lower( + range: Range["_SupportsComparison"], + lower: Optional["_SupportsComparison"], + inclusive: bool, +) -> None: + assert (lower, inclusive) == range.lower + + +@pytest.mark.parametrize( + "range,upper,inclusive", + [ + (Range(lt=1), 1, False), + (Range(lte=1), 1, True), + (Range(), None, False), + (Range(gt=42), None, False), + ], +) +def test_range_upper( + range: Range["_SupportsComparison"], + upper: Optional["_SupportsComparison"], + inclusive: bool, +) -> None: + assert (upper, inclusive) == range.upper