Skip to content

Commit

Permalink
Fix lucene support
Browse files Browse the repository at this point in the history
  • Loading branch information
willholley committed Oct 26, 2023
1 parent c700f12 commit cdfb26d
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 77 deletions.
136 changes: 72 additions & 64 deletions src/docs/src/api/database/find.rst
Original file line number Diff line number Diff line change
Expand Up @@ -673,68 +673,74 @@ In addition, some 'meta' condition operators are available. Some condition
operators accept any valid JSON content as the argument. Other condition
operators require the argument to be in a specific JSON format.

+---------------+-------------+------------+-----------------------------------+
| Operator type | Operator | Argument | Purpose |
+===============+=============+============+===================================+
| (In)equality | ``$lt`` | Any JSON | The field is less than the |
| | | | argument. |
+---------------+-------------+------------+-----------------------------------+
| | ``$lte`` | Any JSON | The field is less than or equal to|
| | | | the argument. |
+---------------+-------------+------------+-----------------------------------+
| | ``$eq`` | Any JSON | The field is equal to the argument|
+---------------+-------------+------------+-----------------------------------+
| | ``$ne`` | Any JSON | The field is not equal to the |
| | | | argument. |
+---------------+-------------+------------+-----------------------------------+
| | ``$gte`` | Any JSON | The field is greater than or equal|
| | | | to the argument. |
+---------------+-------------+------------+-----------------------------------+
| | ``$gt`` | Any JSON | The field is greater than the |
| | | | to the argument. |
+---------------+-------------+------------+-----------------------------------+
| Object | ``$exists`` | Boolean | Check whether the field exists or |
| | | | not, regardless of its value. |
+---------------+-------------+------------+-----------------------------------+
| | ``$type`` | String | Check the document field's type. |
| | | | Valid values are ``"null"``, |
| | | | ``"boolean"``, ``"number"``, |
| | | | ``"string"``, ``"array"``, and |
| | | | ``"object"``. |
+---------------+-------------+------------+-----------------------------------+
| Array | ``$in`` | Array of | The document field must exist in |
| | | JSON values| the list provided. |
+---------------+-------------+------------+-----------------------------------+
| | ``$nin`` | Array of | The document field not must exist |
| | | JSON values| in the list provided. |
+---------------+-------------+------------+-----------------------------------+
| | ``$size`` | Integer | Special condition to match the |
| | | | length of an array field in a |
| | | | document. Non-array fields cannot |
| | | | match this condition. |
+---------------+-------------+------------+-----------------------------------+
| Miscellaneous | ``$mod`` | [Divisor, | Divisor is a non-zero integer, |
| | | Remainder] | Remainder is any integer. |
| | | | Non-integer values result in a |
| | | | 404. Matches documents where |
| | | | ``field % Divisor == Remainder`` |
| | | | is true, and only when the |
| | | | document field is an integer. |
+---------------+-------------+------------+-----------------------------------+
| | ``$regex`` | String | A regular expression pattern to |
| | | | match against the document field. |
| | | | Only matches when the field is a |
| | | | string value and matches the |
| | | | supplied regular expression. The |
| | | | matching algorithms are based on |
| | | | the Perl Compatible Regular |
| | | | Expression (PCRE) library. For |
| | | | more information about what is |
| | | | implemented, see the see the |
| | | | `Erlang Regular Expression |
| | | | <http://erlang.org/doc |
| | | | /man/re.html>`_. |
+---------------+-------------+------------+-----------------------------------+
+---------------+-----------------+-------------+------------------------------------+
| Operator type | Operator | Argument | Purpose |
+===============+=================+=============+====================================+
| (In)equality | ``$lt`` | Any JSON | The field is less than the |
| | | | argument. |
+---------------+-----------------+-------------+------------------------------------+
| | ``$lte`` | Any JSON | The field is less than or equal to |
| | | | the argument. |
+---------------+-----------------+-------------+------------------------------------+
| | ``$eq`` | Any JSON | The field is equal to the argument |
+---------------+-----------------+-------------+------------------------------------+
| | ``$ne`` | Any JSON | The field is not equal to the |
| | | | argument. |
+---------------+-----------------+-------------+------------------------------------+
| | ``$gte`` | Any JSON | The field is greater than or equal |
| | | | to the argument. |
+---------------+-----------------+-------------+------------------------------------+
| | ``$gt`` | Any JSON | The field is greater than the |
| | | | to the argument. |
+---------------+-----------------+-------------+------------------------------------+
| Object | ``$exists`` | Boolean | Check whether the field exists or |
| | | | not, regardless of its value. |
+---------------+-----------------+-------------+------------------------------------+
| | ``$type`` | String | Check the document field's type. |
| | | | Valid values are ``"null"``, |
| | | | ``"boolean"``, ``"number"``, |
| | | | ``"string"``, ``"array"``, and |
| | | | ``"object"``. |
+---------------+-----------------+-------------+------------------------------------+
| Array | ``$in`` | Array of | The document field must exist in |
| | | JSON values | the list provided. |
+---------------+-----------------+-------------+------------------------------------+
| | ``$nin`` | Array of | The document field not must exist |
| | | JSON values | in the list provided. |
+---------------+-----------------+-------------+------------------------------------+
| | ``$size`` | Integer | Special condition to match the |
| | | | length of an array field in a |
| | | | document. Non-array fields cannot |
| | | | match this condition. |
+---------------+-----------------+-------------+------------------------------------+
| Miscellaneous | ``$mod`` | [Divisor, | Divisor is a non-zero integer, |
| | | Remainder] | Remainder is any integer. |
| | | | Non-integer values result in a |
| | | | 404. Matches documents where |
| | | | ``field % Divisor == Remainder`` |
| | | | is true, and only when the |
| | | | document field is an integer. |
+---------------+-----------------+-------------+------------------------------------+
| | ``$regex`` | String | A regular expression pattern to |
| | | | match against the document field. |
| | | | Only matches when the field is a |
| | | | string value and matches the |
| | | | supplied regular expression. The |
| | | | matching algorithms are based on |
| | | | the Perl Compatible Regular |
| | | | Expression (PCRE) library. For |
| | | | more information about what is |
| | | | implemented, see the see the |
| | | | `Erlang Regular Expression |
| | | | <http://erlang.org/doc |
| | | | /man/re.html>`_. |
+---------------+-----------------+-------------+------------------------------------+
| | ``$beginsWith`` | String | Matches where the document field |
| | | | begins with the specified prefix |
| | | | (case-sensitive). If the document |
| | | | field contains a non-string value, |
| | | | the document is not matched. |
+---------------+-----------------+-------------+------------------------------------+

.. warning::
Regular expressions do not work with indexes, so they should not be used to
Expand All @@ -754,8 +760,10 @@ can itself be another operator with arguments of its own. This enables us to
build up more complex selector expressions.

However, only equality operators such as ``$eq``, ``$gt``, ``$gte``, ``$lt``,
and ``$lte`` (but not ``$ne``) can be used as the basis of a query. You should
include at least one of these in a selector.
``$lte`` and ``$beginsWith`` (but not ``$ne``) can be used as the basis
of a query that can make efficient use of a ``json`` index. You should
include at least one of these in a selector, or consider using
a ``text`` index if more flexibility is required.

For example, if you try to perform a query that attempts to match all documents
that have a field called `afieldname` containing a value that begins with the
Expand Down
3 changes: 2 additions & 1 deletion src/mango/src/mango_selector_text.erl
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,9 @@ convert(Path, {[{<<"$exists">>, ShouldExist}]}) ->
false -> {op_not, {FieldExists, false}}
end;
convert(Path, {[{<<"$beginsWith">>, Arg}]}) when is_binary(Arg) ->
Prefix = mango_util:lucene_escape_query_value(Arg),
Suffix = <<"*">>,
PrefixSearch = value_str(<<Arg/binary, Suffix/binary>>),
PrefixSearch = <<Prefix/binary, Suffix/binary>>,
{op_field, {make_field(Path, Arg), PrefixSearch}};
% We're not checking the actual type here, just looking for
% anything that has a possibility of matching by checking
Expand Down
41 changes: 29 additions & 12 deletions src/mango/test/03-operator-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
# License for the specific language governing permissions and limitations under
# the License.

from requests.exceptions import HTTPError
import mango
import unittest


class BaseOperatorTests:
class Common(object):
class Common(unittest.TestCase):
def assertUserIds(self, user_ids, docs):
user_ids_returned = list(d["user_id"] for d in docs)
user_ids.sort()
Expand Down Expand Up @@ -142,20 +143,36 @@ def test_exists_false_returns_missing_but_not_null(self):
self.assertNotIn("twitter", d)

def test_beginswith(self):
docs = self.db.find({"location.state": {"$beginsWith": "New"}})
self.assertEqual(len(docs), 2)
self.assertUserIds([2, 10], docs)
cases = [
{"prefix": "New", "user_ids": [2, 10]},
{
# test escaped characters - note the space in the test string
"prefix": "New ",
"user_ids": [2, 10],
},
{
# non-string values in documents should not match the prefix,
# but should not error
"prefix": "Foo",
"user_ids": [],
},
{"prefix": " New", "user_ids": []},
]

# non-string prefixes should return an error
def test_beginswith_invalid_prefix(self):
docs = self.db.find({"location.state": {"$beginsWith": 123}})
self.assertEqual(len(docs), 2)
for case in cases:
with self.subTest(prefix=case["prefix"]):
selector = {"location.state": {"$beginsWith": case["prefix"]}}
docs = self.db.find(selector)
self.assertEqual(len(docs), len(case["user_ids"]))
self.assertUserIds(case["user_ids"], docs)

# non-string values in documents should not match the prefix,
# but should not error
# non-string prefixes should return an error
def test_beginswith_invalid_prefix(self):
docs = self.db.find({"user_id": {"$beginsWith": "Foo"}})
self.assertEqual(len(docs), 0)
cases = [123, True, [], {}]
for prefix in cases:
with self.subTest(prefix=prefix):
with self.assertRaises(HTTPError):
self.db.find({"location.state": {"$beginsWith": prefix}})


class OperatorJSONTests(mango.UserDocsTests, BaseOperatorTests.Common):
Expand Down

0 comments on commit cdfb26d

Please sign in to comment.