diff --git a/Dockerfile b/Dockerfile index c9b6afc..cb5212b 100755 --- a/Dockerfile +++ b/Dockerfile @@ -17,7 +17,7 @@ RUN pip --default-timeout=1000 install -r requirements.txt WORKDIR /app COPY --chown=python:python ./src/. ./src COPY --chown=python:python ./models/. ./models/ +COPY --chown=python:python ./data/. ./data/ RUN python src/download_models.py ENV PYTHONPATH "${PYTHONPATH}:/app/src" - diff --git a/Makefile b/Makefile index a2e414b..9e7c642 100644 --- a/Makefile +++ b/Makefile @@ -26,8 +26,10 @@ remove_docker_images: start: ifeq ($(OS), Windows_NT) if not exist models mkdir models + if not exist data mkdir data else mkdir -p ./models + mkdir -p ./data endif ifeq ($(HAS_GPU), 1) @echo "NVIDIA GPU detected, using docker-compose-gpu.yml" @@ -38,6 +40,7 @@ else endif start_no_gpu: + mkdir -p ./data mkdir -p ./models docker compose up --build diff --git a/docker-compose-gpu.yml b/docker-compose-gpu.yml index cd7a07a..eccf679 100755 --- a/docker-compose-gpu.yml +++ b/docker-compose-gpu.yml @@ -3,10 +3,31 @@ services: extends: file: docker-compose.yml service: ner + volumes: + - ./data:/app/data deploy: resources: reservations: devices: - driver: nvidia count: 1 - capabilities: [ gpu ] \ No newline at end of file + capabilities: [ gpu ] + + worker-pdf-layout-gpu: + extends: + file: docker-compose.yml + service: worker-pdf-layout-gpu + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [ gpu ] + +networks: + network-ner: + driver: bridge + +volumes: + data: \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 676fd45..3afcca3 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,6 +7,8 @@ services: build: context: . dockerfile: Dockerfile + volumes: + - ./data:/app/data networks: - network-ner ports: @@ -26,3 +28,6 @@ services: networks: network-ner: driver: bridge + +volumes: + data: \ No newline at end of file diff --git a/src/drivers/rest/app.py b/src/drivers/rest/app.py index 9474671..6654a13 100644 --- a/src/drivers/rest/app.py +++ b/src/drivers/rest/app.py @@ -1,3 +1,4 @@ +import json import sys import tempfile import uuid @@ -34,7 +35,14 @@ async def get_named_entities(text: str = Form("")): @app.post("/pdf") -async def get_pdf_named_entities(file: UploadFile = File(...)): +async def get_pdf_named_entities(file: UploadFile = File(...), save_locally: bool = Form(False)): repository = PDFLayoutAnalysisRepository() pdf_path: Path = pdf_content_to_pdf_path(file.file.read()) - return [entity for entity in NamedEntitiesFromPDFUseCase(repository).get_entities(pdf_path)] + entities = [entity for entity in NamedEntitiesFromPDFUseCase(repository).get_entities(pdf_path)] + + if save_locally: + entities_json = [entity.model_dump() for entity in entities] + save_path: Path = Path("/app/data", pdf_path.name.replace(".pdf", ".json")) + save_path.write_text(json.dumps(entities_json, indent=2)) + + return entities