From e2a68b1343f3d7368c2dbb3def5912e5cac0b142 Mon Sep 17 00:00:00 2001 From: winstxnhdw Date: Wed, 22 Nov 2023 21:26:50 +0800 Subject: [PATCH] fix: `Document` tuple expects `Section` --- server/features/extraction/extract_from_image.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/server/features/extraction/extract_from_image.py b/server/features/extraction/extract_from_image.py index 0d803b6..69b8d74 100644 --- a/server/features/extraction/extract_from_image.py +++ b/server/features/extraction/extract_from_image.py @@ -1,15 +1,15 @@ +from io import BytesIO from typing import Generator from uuid import uuid4 from fastapi import UploadFile - -from tesserocr import PyTessBaseAPI -from io import BytesIO from PIL import Image +from tesserocr import PyTessBaseAPI from server.features.extraction.models import Document from server.features.extraction.models.document import Section + def extract_texts_from_image(file_name: str, image: Image) -> Document: """ Summary @@ -27,11 +27,11 @@ def extract_texts_from_image(file_name: str, image: Image) -> Document: """ with PyTessBaseAPI(path='/usr/share/tesseract-ocr/5/tessdata') as ocr: ocr.SetImage(image) - sections = [ocr.GetUTF8Text()] + section = Section('', ocr.GetUTF8Text()) return Document( id=str(uuid4()), - sections=sections, + sections=[section], semantic_identifier=file_name ) @@ -55,4 +55,4 @@ def extract_texts_from_image_requests(requests: list[UploadFile]) -> Generator[D extract_texts_from_image(request.filename.rsplit('.', 1)[0], image=img) if request.filename else None - ) \ No newline at end of file + )