Skip to content

Commit

Permalink
fix: Document tuple expects Section
Browse files Browse the repository at this point in the history
  • Loading branch information
winstxnhdw committed Nov 22, 2023
1 parent 4c699a7 commit e2a68b1
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions server/features/extraction/extract_from_image.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from io import BytesIO
from typing import Generator
from uuid import uuid4

from fastapi import UploadFile

from tesserocr import PyTessBaseAPI
from io import BytesIO
from PIL import Image
from tesserocr import PyTessBaseAPI

from server.features.extraction.models import Document
from server.features.extraction.models.document import Section


def extract_texts_from_image(file_name: str, image: Image) -> Document:
"""
Summary
Expand All @@ -27,11 +27,11 @@ def extract_texts_from_image(file_name: str, image: Image) -> Document:
"""
with PyTessBaseAPI(path='/usr/share/tesseract-ocr/5/tessdata') as ocr:
ocr.SetImage(image)
sections = [ocr.GetUTF8Text()]
section = Section('', ocr.GetUTF8Text())

return Document(
id=str(uuid4()),
sections=sections,
sections=[section],
semantic_identifier=file_name
)

Expand All @@ -55,4 +55,4 @@ def extract_texts_from_image_requests(requests: list[UploadFile]) -> Generator[D
extract_texts_from_image(request.filename.rsplit('.', 1)[0], image=img)
if request.filename
else None
)
)

0 comments on commit e2a68b1

Please sign in to comment.