Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fix mypy issues #64

Merged
merged 5 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: 3.11
- uses: pre-commit/[email protected]
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ repos:
rev: 5.12.0
hooks:
- id: isort
# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v0.971
# hooks:
# - id: mypy
# additional_dependencies: [types-all]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.971
hooks:
- id: mypy
additional_dependencies: [types-all]
29 changes: 20 additions & 9 deletions app/api.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import json
from pathlib import Path
from typing import Annotated
from typing import Annotated, Any, cast

from elasticsearch_dsl import Search
from fastapi import FastAPI, HTTPException, Query, Request
from fastapi.responses import HTMLResponse, PlainTextResponse
from fastapi.templating import Jinja2Templates

from app import config
from app._types import SearchResponse
from app._types import SearchResponse, SuccessSearchResponse
from app.config import check_config_is_defined, settings
from app.postprocessing import (
BaseResultProcessor,
load_result_processor,
process_taxonomy_completion_response,
)
Expand Down Expand Up @@ -59,9 +60,10 @@
def get_document(identifier: str):
"""Fetch a document from Elasticsearch with specific ID."""
check_config_is_defined()
id_field_name = config.CONFIG.index.id_field_name
global_config = cast(config.Config, config.CONFIG)
id_field_name = global_config.index.id_field_name
results = (
Search(index=config.CONFIG.index.name)
Search(index=global_config.index.name)
.query("term", **{id_field_name: identifier})
.extra(size=1)
.execute()
Expand Down Expand Up @@ -124,6 +126,8 @@ def search(
] = None,
) -> SearchResponse:
check_config_is_defined()
global_config = cast(config.Config, config.CONFIG)
result_processor = cast(BaseResultProcessor, RESULT_PROCESSOR)
if q is None and sort_by is None:
raise HTTPException(
status_code=400, detail="`sort_by` must be provided when `q` is missing"
Expand Down Expand Up @@ -152,7 +156,7 @@ def search(
langs=langs_set,
size=page_size,
page=page,
config=config.CONFIG,
config=global_config,
sort_by=sort_by,
# filter query builder is generated from elasticsearch mapping and
# takes ~40ms to generate, build-it before hand as we're using global
Expand All @@ -163,7 +167,11 @@ def search(

projection = set(fields.split(",")) if fields else None
return execute_query(
query, RESULT_PROCESSOR, page=page, page_size=page_size, projection=projection
query,
result_processor,
page=page,
page_size=page_size,
projection=projection,
)


Expand All @@ -188,13 +196,15 @@ def taxonomy_autocomplete(
Query(description="Fuzziness level to use, default to no fuzziness."),
] = None,
):
check_config_is_defined()
global_config = cast(config.Config, config.CONFIG)
taxonomy_names_list = taxonomy_names.split(",")
query = build_completion_query(
q=q,
taxonomy_names=taxonomy_names_list,
lang=lang,
size=size,
config=config.CONFIG,
config=global_config,
fuzziness=fuzziness,
)
es_response = query.execute()
Expand All @@ -221,17 +231,18 @@ def html_search(
return templates.TemplateResponse("search.html", {"request": request})

results = search(q=q, langs=langs, page_size=page_size, page=page, sort_by=sort_by)
template_data = {
template_data: dict[str, Any] = {
"q": q or "",
"request": request,
"sort_by": sort_by,
"results": results,
"es_query": json.dumps(results.debug.query, indent=4),
}
if results.is_success():
results = cast(SuccessSearchResponse, results)
template_data["aggregations"] = results.aggregations
page_count = results.page_count
pagination = [
pagination: list[dict[str, Any]] = [
{"name": p, "selected": p == page, "page_id": p}
for p in range(1, page_count + 1)
# Allow to scroll over a window of 10 pages
Expand Down
13 changes: 10 additions & 3 deletions app/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def import_data(
):
"""Import data into Elasticsearch."""
import time
from typing import cast

from app import config
from app.cli.perform_import import perform_import
Expand All @@ -44,12 +45,13 @@ def import_data(

start_time = time.perf_counter()
check_config_is_defined()
global_config = cast(config.Config, config.CONFIG)
perform_import(
input_path,
num_items,
num_processes,
start_time,
config.CONFIG, # type: ignore
global_config,
)
end_time = time.perf_counter()
logger.info("Import time: %s seconds", end_time - start_time)
Expand All @@ -67,6 +69,7 @@ def import_taxonomies(
):
"""Import taxonomies into Elasticsearch."""
import time
from typing import cast

from app import config
from app.cli.perform_import import perform_taxonomy_import
Expand All @@ -79,9 +82,10 @@ def import_taxonomies(
set_global_config(config_path)

check_config_is_defined()
global_config = cast(config.Config, config.CONFIG)

start_time = time.perf_counter()
perform_taxonomy_import(config.CONFIG)
perform_taxonomy_import(global_config)
end_time = time.perf_counter()
logger.info("Import time: %s seconds", end_time - start_time)

Expand Down Expand Up @@ -112,6 +116,8 @@ def import_from_queue(
exists=True,
),
):
from typing import cast

from app import config
from app.config import check_config_is_defined, set_global_config, settings
from app.queue_helpers import run_queue_safe
Expand All @@ -129,9 +135,10 @@ def import_from_queue(
connection.get_es_client()

check_config_is_defined()
global_config = cast(config.Config, config.CONFIG)

# run queue
run_queue_safe(config.CONFIG) # type: ignore
run_queue_safe(global_config)


def main() -> None:
Expand Down
6 changes: 2 additions & 4 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,10 +236,8 @@ class IndexConfig(BaseModel):
class TaxonomyIndexConfig(BaseModel):
name: Annotated[
str,
Field(
default="taxonomy", description="name of the taxonomy index alias to use"
),
]
Field(description="name of the taxonomy index alias to use"),
] = "taxonomy"
number_of_shards: Annotated[
int, Field(description="number of shards to use for the index")
] = 4
Expand Down
7 changes: 5 additions & 2 deletions app/openfoodfacts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import copy
import re
from typing import Any

from app._types import JSONType
from app.indexing import BaseDocumentPreprocessor
Expand Down Expand Up @@ -149,7 +150,7 @@ def build_image_fields(product: JSONType):
# Python copy of the code from
# https://github.com/openfoodfacts/openfoodfacts-server/blob/b297ed858d526332649562cdec5f1d36be184984/lib/ProductOpener/Display.pm#L10128
code = product["code"]
fields = {}
fields: dict[str, Any] = {}

for image_type in ["front", "ingredients", "nutrition", "packaging"]:
display_ids = []
Expand Down Expand Up @@ -186,7 +187,9 @@ def build_image_fields(product: JSONType):
for language_code in product["languages_codes"]:
image_id = f"{image_type}_{language_code}"
if images and images.get(image_id) and images[image_id]["sizes"]:
fields.setdefault("selected_images", {}).update(
if "selected_images" not in fields:
fields["selected_images"] = {}
fields["selected_images"].update(
{
image_type: {
"display": {
Expand Down
10 changes: 8 additions & 2 deletions app/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def create_aggregation_clauses(config: Config) -> dict[str, Agg]:


def build_search_query(
q: str,
q: str | None,
langs: set[str],
size: int,
page: int,
Expand All @@ -225,7 +225,13 @@ def build_search_query(
:param sort_by: sorting key, defaults to None (=relevance-based sorting)
:return: the built Query
"""
filter_query, remaining_terms = parse_lucene_dsl_query(q, filter_query_builder)
filter_query: list[JSONType]
if q is None:
filter_query = []
remaining_terms = ""
else:
filter_query, remaining_terms = parse_lucene_dsl_query(q, filter_query_builder)

logger.debug("filter query: %s", filter_query)
logger.debug("remaining terms: '%s'", remaining_terms)

Expand Down
7 changes: 4 additions & 3 deletions app/queue_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,16 @@ def run_queue_safe(config: Config):
logger.info("Starting redis consumer")

# we need a dict to have a reference
queues = {"current": None}
queues: dict[str, QueueManager | None] = {"current": None}

atexit.register(handle_stop, queues)

alive = True
while alive:
try:
queues["current"] = QueueManager(config)
queues["current"].consume()
queue = QueueManager(config)
queues["current"] = queue
queue.consume()
alive = False
except Exception as e:
logger.info("Received %s, respawning a consumer", e)
59 changes: 59 additions & 0 deletions tests/unit/data/empty_query_with_sort_by.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"aggs": {
"brands_tags": {
"terms": {
"field": "brands_tags"
}
},
"lang": {
"terms": {
"field": "lang"
}
},
"owner": {
"terms": {
"field": "owner"
}
},
"categories_tags": {
"terms": {
"field": "categories_tags"
}
},
"labels_tags": {
"terms": {
"field": "labels_tags"
}
},
"countries_tags": {
"terms": {
"field": "countries_tags"
}
},
"states_tags": {
"terms": {
"field": "states_tags"
}
},
"nutrition_grades": {
"terms": {
"field": "nutrition_grades"
}
},
"ecoscore_grade": {
"terms": {
"field": "ecoscore_grade"
}
},
"nova_groups": {
"terms": {
"field": "nova_groups"
}
}
},
"sort": [
"unique_scans_n"
],
"size": 25,
"from": 25
}
2 changes: 1 addition & 1 deletion tests/unit/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_process_text_lang_field(data, input_field, split, expected):
sources=[
TaxonomySourceConfig(
name="category",
url="https://static.openfoodfacts.org/data/taxonomies/categories.full.json",
url="https://static.openfoodfacts.org/data/taxonomies/categories.full.json", # type: ignore
)
],
exported_langs=["en"],
Expand Down
8 changes: 8 additions & 0 deletions tests/unit/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,14 @@ def test_parse_lucene_dsl_query(
2,
None,
),
(
"empty_query_with_sort_by",
None,
{"en"},
25,
2,
"unique_scans_n",
),
],
)
def test_build_search_query(
Expand Down
Loading