diff --git a/invenio_vocabularies/contrib/affiliations/config.py b/invenio_vocabularies/contrib/affiliations/config.py index abe574d8..fa15ff18 100644 --- a/invenio_vocabularies/contrib/affiliations/config.py +++ b/invenio_vocabularies/contrib/affiliations/config.py @@ -13,7 +13,9 @@ from invenio_i18n import lazy_gettext as _ from invenio_records_resources.services import SearchOptions from invenio_records_resources.services.records.components import DataComponent -from invenio_records_resources.services.records.params import SuggestQueryParser +from invenio_records_resources.services.records.queryparser import ( + CompositeSuggestQueryParser, +) from werkzeug.local import LocalProxy from ...services.components import PIDComponent @@ -24,23 +26,29 @@ affiliation_edmo_country_mappings = LocalProxy( lambda: current_app.config["VOCABULARIES_AFFILIATIONS_EDMO_COUNTRY_MAPPING"] ) -localized_title = LocalProxy(lambda: f"title.{get_locale()}^20") +localized_title = LocalProxy(lambda: f"title.{get_locale()}^7") class AffiliationsSearchOptions(SearchOptions): """Search options.""" - suggest_parser_cls = SuggestQueryParser.factory( + suggest_parser_cls = CompositeSuggestQueryParser.factory( fields=[ - "name^100", - "acronym.keyword^100", - "acronym^40", + # We boost the acronym fields, since they're smaller words and are more + # likely to be used in a query. + "acronym.keyword^50", + "acronym^10", + "name^10", + # Aliases can sometimes be shorter, so we boost them a bit. + "aliases^5", localized_title, - "id^20", - "aliases^20", + "id^2", + # Allow to search identifiers directly (e.g. ROR) + "identifiers.identifier", + "country", + "country_name", + "types", ], - type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types - fuzziness="AUTO", # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness ) sort_default = "bestmatch" diff --git a/invenio_vocabularies/contrib/funders/config.py b/invenio_vocabularies/contrib/funders/config.py index 54408e49..0a16487b 100644 --- a/invenio_vocabularies/contrib/funders/config.py +++ b/invenio_vocabularies/contrib/funders/config.py @@ -13,7 +13,9 @@ from invenio_i18n import lazy_gettext as _ from invenio_records_resources.services import SearchOptions from invenio_records_resources.services.records.components import DataComponent -from invenio_records_resources.services.records.params import SuggestQueryParser +from invenio_records_resources.services.records.queryparser import ( + CompositeSuggestQueryParser, +) from werkzeug.local import LocalProxy from ...services.components import ModelPIDComponent @@ -23,24 +25,29 @@ funder_fundref_doi_prefix = LocalProxy( lambda: current_app.config["VOCABULARIES_FUNDER_DOI_PREFIX"] ) -localized_title = LocalProxy(lambda: f"title.{get_locale()}^20") +localized_title = LocalProxy(lambda: f"title.{get_locale()}^7") class FundersSearchOptions(SearchOptions): """Search options.""" - suggest_parser_cls = SuggestQueryParser.factory( + suggest_parser_cls = CompositeSuggestQueryParser.factory( fields=[ - "name^100", - "acronym.keyword^100", - "acronym^40", + # We boost the acronym fields, since they're smaller words and are more + # likely to be used in a query. + "acronym.keyword^50", + "acronym^10", + "name^10", + # Aliases can sometimes be shorter, so we boost them a bit. + "aliases^5", localized_title, - "id^20", - "aliases^20", - "identifiers.identifier^10", - ], - type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types - fuzziness="AUTO", # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness + "id^2", + # Allow to search identifiers directly (e.g. ROR) + "identifiers.identifier", + "country", + "country_name", + "types", + ] ) sort_default = "bestmatch" diff --git a/invenio_vocabularies/contrib/names/config.py b/invenio_vocabularies/contrib/names/config.py index c2ec45b9..30ff9a62 100644 --- a/invenio_vocabularies/contrib/names/config.py +++ b/invenio_vocabularies/contrib/names/config.py @@ -15,7 +15,9 @@ DataComponent, RelationsComponent, ) -from invenio_records_resources.services.records.params import SuggestQueryParser +from invenio_records_resources.services.records.queryparser import ( + CompositeSuggestQueryParser, +) from werkzeug.local import LocalProxy from ...services.components import PIDComponent @@ -26,16 +28,17 @@ class NamesSearchOptions(SearchOptions): """Search options.""" - suggest_parser_cls = SuggestQueryParser.factory( + suggest_parser_cls = CompositeSuggestQueryParser.factory( fields=[ - "given_name^100", - "name^70", - "family_name^50", - "identifiers.identifier^20", - "affiliations.name^20", + "name^5", + # We boost the affiliation acronym fields, since they're short and more + # likely to be used in a query. + "affiliations.acronym.keyword^3", + "affiliations.acronym", + "affiliations.name", + # Allow to search identifiers directly (e.g. ORCID) + "identifiers.identifier", ], - type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types - fuzziness="AUTO", ) sort_default = "bestmatch" diff --git a/invenio_vocabularies/services/config.py b/invenio_vocabularies/services/config.py index 1702fc94..a622d3d4 100644 --- a/invenio_vocabularies/services/config.py +++ b/invenio_vocabularies/services/config.py @@ -14,17 +14,11 @@ from invenio_i18n import lazy_gettext as _ from invenio_records_resources.services import ( Link, - LinksTemplate, - RecordService, RecordServiceConfig, SearchOptions, pagination_links, ) -from invenio_records_resources.services.base import ( - ConditionalLink, - Service, - ServiceListResult, -) +from invenio_records_resources.services.base import ConditionalLink from invenio_records_resources.services.records.components import DataComponent from invenio_records_resources.services.records.params import ( FilterParam, diff --git a/tests/contrib/names/test_names_resource.py b/tests/contrib/names/test_names_resource.py index 3920be63..81968229 100644 --- a/tests/contrib/names/test_names_resource.py +++ b/tests/contrib/names/test_names_resource.py @@ -10,7 +10,6 @@ """Test the name vocabulary resource.""" import json -from copy import deepcopy import pytest @@ -193,8 +192,6 @@ def test_names_suggest_sort(client_with_credentials, example_multiple_names, h, # With affiliation res = client_with_credentials.get(f"{prefix}?suggest=john%20wwe", headers=h) assert res.status_code == 200 - assert ( - res.json["hits"]["total"] == 3 - ) # Will find 3 johns but WWE affiliation should be at the top + assert res.json["hits"]["total"] == 1 assert res.json["hits"]["hits"][0]["name"] == "Cena, John" assert res.json["hits"]["hits"][0]["affiliations"][0]["name"] == "WWE"