-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
search: improve with CompositeSuggestQueryParser #151
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
@@ -1,7 +1,7 @@ | ||||||||
# -*- coding: utf-8 -*- | ||||||||
# | ||||||||
# Copyright (C) 2022 TU Wien. | ||||||||
# Copyright (C) 2022 CERN. | ||||||||
# Copyright (C) 2022-2024 CERN. | ||||||||
# Copyright (C) 2024 KTH Royal Institute of Technology. | ||||||||
# | ||||||||
# Invenio-Users-Resources is free software; you can redistribute it and/or | ||||||||
|
@@ -26,10 +26,10 @@ | |||||||
SortParam, | ||||||||
) | ||||||||
from invenio_records_resources.services.records.queryparser import ( | ||||||||
CompositeSuggestQueryParser, | ||||||||
FieldValueMapper, | ||||||||
QueryParser, | ||||||||
SearchFieldTransformer, | ||||||||
SuggestQueryParser, | ||||||||
) | ||||||||
from luqum.tree import Word | ||||||||
|
||||||||
|
@@ -68,9 +68,14 @@ class UserSearchOptions(SearchOptions, SearchOptionsMixin): | |||||||
# The user search needs to be highly restricted to avoid leaking | ||||||||
# account information, hence do not edit here unless you are | ||||||||
# absolutely sure what you're doing. | ||||||||
suggest_parser_cls = SuggestQueryParser.factory( | ||||||||
suggest_parser_cls = CompositeSuggestQueryParser.factory( | ||||||||
tree_transformer_cls=SearchFieldTransformer, | ||||||||
fields=["username^2", "email^2", "profile.full_name^3", "profile.affiliations"], | ||||||||
fields=[ | ||||||||
"username^2", | ||||||||
"email.keyword^2", | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor: Maybe we could keep the email text field as well, might be useful, wdyt?
Suggested change
|
||||||||
"profile.full_name^3", | ||||||||
"profile.affiliations", | ||||||||
], | ||||||||
# Only public emails because hidden emails are stored in email_hidden field. | ||||||||
allow_list=["username", "email"], | ||||||||
mapping={ | ||||||||
|
@@ -81,7 +86,6 @@ class UserSearchOptions(SearchOptions, SearchOptionsMixin): | |||||||
"name": "profile.full_name", | ||||||||
}, | ||||||||
type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types | ||||||||
fuzziness="AUTO", # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||||
) | ||||||||
|
||||||||
params_interpreters_cls = [ | ||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright (C) 2022 CERN. | ||
# Copyright (C) 2022-2024 CERN. | ||
# | ||
# Invenio-Users-Resources is free software; you can redistribute it and/or | ||
# modify it under the terms of the MIT License; see LICENSE file for more | ||
|
@@ -98,24 +98,33 @@ def test_user_search_field_not_searchable(user_service, user_pub, query): | |
assert res["hits"]["total"] == 0 | ||
|
||
|
||
USERNAME_BOTH = ["pub", "pubres"] | ||
USERNAME_JOSE = ["pub"] | ||
USERNAME_TIM = ["pubres"] | ||
|
||
|
||
# | ||
# Read | ||
@pytest.mark.parametrize( | ||
"query", | ||
"query,expected_usernames", | ||
[ | ||
"CERN", | ||
"Jose CERN", | ||
"Jose AND CERN", | ||
"Tim", | ||
"Tim CERN", | ||
"Jose", | ||
"Jos", | ||
"[email protected]", | ||
"pub", | ||
("CERN", USERNAME_BOTH), | ||
("Jose", USERNAME_JOSE), | ||
("Jos", USERNAME_JOSE), | ||
("Jose CERN", USERNAME_JOSE), | ||
("Tim", USERNAME_TIM), | ||
("Tim CERN", USERNAME_TIM), | ||
("[email protected]", USERNAME_JOSE), | ||
("[email protected]", USERNAME_JOSE), | ||
("pub@inveniosoft", USERNAME_JOSE), | ||
("pub", USERNAME_BOTH), | ||
], | ||
) | ||
def test_user_search_field(user_service, user_pub, query): | ||
def test_user_search_field(user_service, user_pub, query, expected_usernames): | ||
"""Make sure certain fields ARE searchable.""" | ||
res = user_service.search(user_pub.identity, suggest=query).to_dict() | ||
assert res["hits"]["total"] > 0 | ||
usernames = [entry["username"] for entry in res["hits"]["hits"]] | ||
assert sorted(usernames) == expected_usernames | ||
|
||
|
||
# | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Searching for a username with a dash is still not working well.
For instance, searching from "one-two" seems to search for usernames starting with "one" and starting with "two", and therefore does not find anything.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is the issue: text field is split by
-
https://github.com/inveniosoftware/invenio-users-resources/blob/master/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json#L132if you search by username.keyword, it should work