diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index 25d75d55d0a..d1650e987cc 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -306,6 +306,8 @@ indexable({[{<<"$gt">>, _}]}) -> true; indexable({[{<<"$gte">>, _}]}) -> true; +indexable({[{<<"$beginsWith">>, _}]}) -> + true; % This is required to improve index selection for covering indexes. % Making `$exists` indexable should not cause problems in other cases. indexable({[{<<"$exists">>, _}]}) -> @@ -412,6 +414,10 @@ range(_, _, LCmp, Low, HCmp, High) -> % operators but its all straight forward once you figure out how % we're basically just narrowing our logical ranges. +% beginsWith requires both a high and low bound +range({[{<<"$beginsWith">>, Arg}]}, LCmp, Low, HCmp, High) -> + {LCmp0, Low0, HCmp0, High0} = range({[{<<"$gte">>, Arg}]}, LCmp, Low, HCmp, High), + range({[{<<"$lte">>, <>}]}, LCmp0, Low0, HCmp0, High0); range({[{<<"$lt">>, Arg}]}, LCmp, Low, HCmp, High) -> case range_pos(Low, Arg, High) of min -> diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 59be7a6ebaf..c1b4d7c282e 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -135,6 +135,8 @@ norm_ops({[{<<"$text">>, Arg}]}) when {[{<<"$default">>, {[{<<"$text">>, Arg}]}}]}; norm_ops({[{<<"$text">>, Arg}]}) -> ?MANGO_ERROR({bad_arg, '$text', Arg}); +norm_ops({[{<<"$beginsWith">>, Arg}]} = Cond) when is_binary(Arg) -> + Cond; % Not technically an operator but we pass it through here % so that this function accepts its own output. This exists % so that $text can have a field name value which simplifies @@ -514,6 +516,11 @@ match({[{<<"$mod">>, [D, R]}]}, Value, _Cmp) when is_integer(Value) -> Value rem D == R; match({[{<<"$mod">>, _}]}, _Value, _Cmp) -> false; +match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix), is_binary(Value) -> + string:prefix(Value, Prefix) /= nomatch; +% When Value is not a string, do not match +match({[{<<"$beginsWith">>, Prefix}]}, _, _Cmp) when is_binary(Prefix) -> + false; match({[{<<"$regex">>, Regex}]}, Value, _Cmp) when is_binary(Value) -> try match == re:run(Value, Regex, [{capture, none}]) @@ -1054,4 +1061,29 @@ fields_nor_test() -> }, ?assertEqual([<<"field1">>, <<"field2">>], fields_of(Selector2)). +match_beginswith_test() -> + Doc = + {[ + {<<"_id">>, <<"foo">>}, + {<<"_rev">>, <<"bar">>}, + {<<"user_id">>, 11} + ]}, + Check = fun(Field, Prefix) -> + Selector = {[{Field, {[{<<"$beginsWith">>, Prefix}]}}]}, + % Call match_int/2 to avoid ERROR for missing metric; this is confusing + % in the middle of test output. + match_int(mango_selector:normalize(Selector), Doc) + end, + [ + % matching + ?assertEqual(true, Check(<<"_id">>, <<"f">>)), + % no match (user_id is not a binary string) + ?assertEqual(false, Check(<<"user_id">>, <<"f">>)), + % invalid (prefix is not a binary string) + ?assertThrow( + {mango_error, mango_selector, {invalid_operator, <<"$beginsWith">>}}, + Check(<<"user_id">>, 1) + ) + ]. + -endif. diff --git a/src/mango/src/mango_selector_text.erl b/src/mango/src/mango_selector_text.erl index 1f8609ac27b..fc9280d85de 100644 --- a/src/mango/src/mango_selector_text.erl +++ b/src/mango/src/mango_selector_text.erl @@ -142,6 +142,9 @@ convert(Path, {[{<<"$exists">>, ShouldExist}]}) -> true -> FieldExists; false -> {op_not, {FieldExists, false}} end; +convert(Path, {[{<<"$beginsWith">>, Arg}]}) -> + PrefixSearch = [value_str(Arg), <<"*">>], + {op_field, {make_field(Path, Arg), PrefixSearch}}; % We're not checking the actual type here, just looking for % anything that has a possibility of matching by checking % for the field name. We use the same logic for $exists on diff --git a/src/mango/test/03-operator-test.py b/src/mango/test/03-operator-test.py index 70e3fbc5f24..b43aacf5f69 100644 --- a/src/mango/test/03-operator-test.py +++ b/src/mango/test/03-operator-test.py @@ -141,6 +141,22 @@ def test_exists_false_returns_missing_but_not_null(self): for d in docs: self.assertNotIn("twitter", d) + def test_beginswith(self): + docs = self.db.find({"location.state": {"$beginsWith": "New"}}) + self.assertEqual(len(docs), 2) + self.assertUserIds([2, 10], docs) + + # non-string prefixes should return an error + def test_beginswith_invalid_prefix(self): + docs = self.db.find({"location.state": {"$beginsWith": 123}}) + self.assertEqual(len(docs), 2) + + # non-string values in documents should not match the prefix, + # but should not error + def test_beginswith_invalid_prefix(self): + docs = self.db.find({"user_id": {"$beginsWith": "Foo"}}) + self.assertEqual(len(docs), 0) + class OperatorJSONTests(mango.UserDocsTests, BaseOperatorTests.Common): # START: text indexes do not support range queries across type boundaries so only diff --git a/src/mango/test/25-beginswith-test.py b/src/mango/test/25-beginswith-test.py new file mode 100644 index 00000000000..76772c24399 --- /dev/null +++ b/src/mango/test/25-beginswith-test.py @@ -0,0 +1,112 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +import copy +import mango + +DOCS = [ + {"_id": "aaa", "name": "Jimi", "location": "AUS", "age": 27}, + {"_id": "abc", "name": "Eddie", "location": "AND", "age": 65}, + {"_id": "bbb", "name": "Harry", "location": "CAN", "age": 21}, + {"_id": "ccc", "name": "Eddie", "location": "DEN", "age": 37}, + {"_id": "ddd", "name": "Jones", "location": "ETH", "age": 49}, +] + + +def to_utf8_bytes(list): + return [x.encode() for x in list] + + +class BeginsWithOperator(mango.DbPerClass): + def setUp(self): + self.db.recreate() + self.db.save_docs(copy.deepcopy(DOCS)) + self.db.create_index(["location"]) + self.db.create_index(["name", "location"]) + + def assertDocIds(self, user_ids, docs): + user_ids_returned = list(d["_id"] for d in docs) + user_ids.sort() + user_ids_returned.sort() + self.assertEqual(user_ids, user_ids_returned) + + def test_basic(self): + docs = self.db.find({"location": {"$beginsWith": "A"}}) + + self.assertEqual(len(docs), 2) + self.assertDocIds(["aaa", "abc"], docs) + + def test_json_range(self): + explain = self.db.find({"location": {"$beginsWith": "A"}}, explain=True) + self.assertEqual(explain["mrargs"]["start_key"], ["A"]) + end_key_bytes = to_utf8_bytes(explain["mrargs"]["end_key"]) + self.assertEqual(end_key_bytes, [b"A\xef\xbf\xbd", b""]) + + def test_compound_key(self): + selector = {"name": "Eddie", "location": {"$beginsWith": "A"}} + explain = self.db.find(selector, explain=True) + + self.assertEqual(explain["mrargs"]["start_key"], ["Eddie", "A"]) + end_key_bytes = to_utf8_bytes(explain["mrargs"]["end_key"]) + self.assertEqual(end_key_bytes, [b"Eddie", b"A\xef\xbf\xbd", b""]) + + docs = self.db.find(selector) + self.assertEqual(len(docs), 1) + self.assertDocIds(["abc"], docs) + + def test_sort_asc(self): + selector = {"location": {"$beginsWith": "A"}} + explain = self.db.find(selector, sort=["location"], explain=True) + + self.assertEqual(explain["mrargs"]["start_key"], ["A"]) + end_key_bytes = to_utf8_bytes(explain["mrargs"]["end_key"]) + self.assertEqual(end_key_bytes, [b"A\xef\xbf\xbd", b""]) + self.assertEqual(explain["mrargs"]["direction"], "fwd") + + def test_sort_desc(self): + selector = {"location": {"$beginsWith": "A"}} + explain = self.db.find(selector, sort=[{"location": "desc"}], explain=True) + + start_key_bytes = to_utf8_bytes(explain["mrargs"]["end_key"]) + self.assertEqual(start_key_bytes, [b"A"]) + self.assertEqual(explain["mrargs"]["end_key"], ["A"]) + self.assertEqual(explain["mrargs"]["direction"], "rev") + + def test_all_docs_range(self): + explain = self.db.find({"_id": {"$beginsWith": "a"}}, explain=True) + self.assertEqual(explain["mrargs"]["start_key"], "a") + end_key_bytes = to_utf8_bytes(explain["mrargs"]["end_key"]) + self.assertEqual(end_key_bytes, [b"a", b"\xef\xbf\xbd"]) + + def test_no_index(self): + selector = {"foo": {"$beginsWith": "a"}} + resp_explain = self.db.find(selector, explain=True) + + self.assertEqual(resp_explain["index"]["type"], "special") + self.assertEqual(resp_explain["mrargs"]["start_key"], None) + self.assertEqual(resp_explain["mrargs"]["end_key"], "") + + def test_invalid_operand(self): + try: + self.db.find({"_id": {"$beginsWith": True}}) + except Exception as e: + self.assertEqual(e.response.status_code, 400) + resp = e.response.json() + self.assertEqual(resp["error"], "invalid_operator") + else: + raise AssertionError("expected find error") + + def test_does_not_match_non_string_value(self): + selector = {"age": {"$beginsWith": "a"}} + docs = self.db.find(selector) + + self.assertEqual(len(docs), 0)