From 669ed48297b90708fb4186c7fc3b3b637e67edd4 Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Thu, 31 Oct 2024 11:11:21 -0700 Subject: [PATCH 1/6] implement image alignment api endpoint --- OCR/ocr/api.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/OCR/ocr/api.py b/OCR/ocr/api.py index 9a10e173..c6976099 100644 --- a/OCR/ocr/api.py +++ b/OCR/ocr/api.py @@ -9,6 +9,7 @@ from fastapi.middleware.cors import CORSMiddleware from ocr.services.image_ocr import ImageOCR +from ocr.services.alignment import ImageAligner from ocr.services.image_segmenter import ImageSegmenter, segment_by_color_bounding_box app = FastAPI() @@ -36,6 +37,18 @@ async def health_check(): return {"status": "UP"} +@app.post("/image_alignment/") +async def image_alignment(source_image: UploadFile, segmentation_template: UploadFile): + source_image_np = np.frombuffer(await source_image.read(), np.uint8) + source_image_img = cv.imdecode(source_image_np, cv.IMREAD_COLOR) + + segmentation_template_np = np.frombuffer(await segmentation_template.read(), np.uint8) + segmentation_template_img = cv.imdecode(segmentation_template_np, cv.IMREAD_COLOR) + + aligner = ImageAligner() + return aligner.align(source_image_img, segmentation_template_img) + + @app.post("/image_file_to_text/") async def image_file_to_text(source_image: UploadFile, segmentation_template: UploadFile, labels: str = Form()): source_image_np = np.frombuffer(await source_image.read(), np.uint8) From fe808d7ba5432077cd09fe4aff479351334af307 Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Mon, 4 Nov 2024 14:46:37 -0800 Subject: [PATCH 2/6] Add frontend API function and interface types --- frontend/api/api.ts | 26 +++++++++++++++++++++++++- frontend/api/types/types.ts | 12 +++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/frontend/api/api.ts b/frontend/api/api.ts index 2ff009b0..a923fba2 100644 --- a/frontend/api/api.ts +++ b/frontend/api/api.ts @@ -1,4 +1,28 @@ -import { ImageToTextArgs, ImageToTextResponse } from "./types/types"; +import { ImageToTextArgs, ImageToTextResponse, AlignImageArgs, AlignImageResponse } from "./types/types"; + +export const AlignImage = async (args: AlignImageArgs): Promise => { + const { sourceImage, templateImage } = args; + const form = new URLSearchParams({ + source_image: sourceImage, + segmentation_template: templateImage, + }); + + const apiUrl = import.meta.env.VITE_API_URL || "http://localhost:8000/" + const alignImageURL = `${apiUrl}image_alignment/` + try { + const response = await fetch(alignImageURL, { + "method": "POST", + "headers": { + "Content-Type": "application/x-www-form-urlencoded" + }, + body: form + }) + return await response.json() as AlignImageResponse; + } catch (error) { + console.error(error); + return null; + } +} export const ImageToText = async (args: ImageToTextArgs): Promise => { diff --git a/frontend/api/types/types.ts b/frontend/api/types/types.ts index a64676fb..e9301bec 100644 --- a/frontend/api/types/types.ts +++ b/frontend/api/types/types.ts @@ -12,6 +12,10 @@ export type ImageToTextResponse = { [key: string]: [string, number]; }; +export type AlignImageResponse = { + [key: string]: [string, number]; +}; + export interface ResultItem { text: string; confidence: number; @@ -32,4 +36,10 @@ export interface ResultItem { sourceImage: string; templateImage: string; fieldNames: Field[]; -} \ No newline at end of file +} + +export interface AlignImageArgs { + // base 64 encoded image + sourceImage: string; + templateImage: string; +} From 98a0897bb20fed461a7d77594ad87a722d33d74f Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Mon, 4 Nov 2024 15:23:37 -0800 Subject: [PATCH 3/6] factor out api url --- frontend/api/api.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/api/api.ts b/frontend/api/api.ts index a923fba2..0e48b606 100644 --- a/frontend/api/api.ts +++ b/frontend/api/api.ts @@ -1,5 +1,7 @@ import { ImageToTextArgs, ImageToTextResponse, AlignImageArgs, AlignImageResponse } from "./types/types"; +const apiUrl = import.meta.env.VITE_API_URL || "http://localhost:8000/" + export const AlignImage = async (args: AlignImageArgs): Promise => { const { sourceImage, templateImage } = args; const form = new URLSearchParams({ @@ -7,7 +9,6 @@ export const AlignImage = async (args: AlignImageArgs): Promise Date: Mon, 4 Nov 2024 16:56:09 -0800 Subject: [PATCH 4/6] Properly encode/decode images at api boundary --- OCR/ocr/api.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/OCR/ocr/api.py b/OCR/ocr/api.py index c6976099..39276e30 100644 --- a/OCR/ocr/api.py +++ b/OCR/ocr/api.py @@ -1,4 +1,5 @@ import base64 +import io import uvicorn import json @@ -31,6 +32,16 @@ ) ocr = ImageOCR() +def data_uri_to_image(data_uri: str): + image_stripped = data_uri.replace("data:image/png;base64,", "", 1) + image_np = np.frombuffer(base64.b64decode(image_stripped), np.uint8) + return cv.imdecode(image_np, cv.IMREAD_COLOR) + +def image_to_data_uri(image: np.ndarray): + _, encoded = cv.imencode(".png", image) + return b"data:image/png;base64," + base64.b64encode(encoded) + + @app.get("/") async def health_check(): @@ -38,15 +49,13 @@ async def health_check(): @app.post("/image_alignment/") -async def image_alignment(source_image: UploadFile, segmentation_template: UploadFile): - source_image_np = np.frombuffer(await source_image.read(), np.uint8) - source_image_img = cv.imdecode(source_image_np, cv.IMREAD_COLOR) - - segmentation_template_np = np.frombuffer(await segmentation_template.read(), np.uint8) - segmentation_template_img = cv.imdecode(segmentation_template_np, cv.IMREAD_COLOR) +async def image_alignment(source_image: str = Form(), segmentation_template: str = Form()): + source_image_img = data_uri_to_image(source_image) + segmentation_template_img = data_uri_to_image(segmentation_template) aligner = ImageAligner() - return aligner.align(source_image_img, segmentation_template_img) + result = aligner.align(source_image_img, segmentation_template_img) + return {"result": image_to_data_uri(result)} @app.post("/image_file_to_text/") @@ -66,13 +75,8 @@ async def image_file_to_text(source_image: UploadFile, segmentation_template: Up @app.post("/image_to_text/") async def image_to_text(source_image: str = Form(), segmentation_template: str = Form(), labels: str = Form()): - source_image_stripped = source_image.replace("data:image/png;base64,", "", 1) - source_image_np = np.frombuffer(base64.b64decode(source_image_stripped), np.uint8) - source_image_img = cv.imdecode(source_image_np, cv.IMREAD_COLOR) - - segmentation_template_stripped = segmentation_template.replace("data:image/png;base64,", "", 1) - segmentation_template_np = np.frombuffer(base64.b64decode(segmentation_template_stripped), np.uint8) - segmentation_template_img = cv.imdecode(segmentation_template_np, cv.IMREAD_COLOR) + source_image_img = data_uri_to_image(source_image) + segmentation_template_img = data_uri_to_image(segmentation_template) loaded_json = json.loads(labels) segments = segmenter.segment(source_image_img, segmentation_template_img, loaded_json) From 193c1a651213ebb107b6adec07e77fd98f6e29dc Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Mon, 4 Nov 2024 17:11:35 -0800 Subject: [PATCH 5/6] lint --- OCR/ocr/api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/OCR/ocr/api.py b/OCR/ocr/api.py index 39276e30..0b01a6ba 100644 --- a/OCR/ocr/api.py +++ b/OCR/ocr/api.py @@ -1,5 +1,4 @@ import base64 -import io import uvicorn import json From a6f09be269d8b5d90cb1a49ec9835ce6c1a7719f Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Mon, 4 Nov 2024 17:25:09 -0800 Subject: [PATCH 6/6] lint --- OCR/ocr/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/OCR/ocr/api.py b/OCR/ocr/api.py index 0b01a6ba..444c834e 100644 --- a/OCR/ocr/api.py +++ b/OCR/ocr/api.py @@ -31,17 +31,18 @@ ) ocr = ImageOCR() + def data_uri_to_image(data_uri: str): image_stripped = data_uri.replace("data:image/png;base64,", "", 1) image_np = np.frombuffer(base64.b64decode(image_stripped), np.uint8) return cv.imdecode(image_np, cv.IMREAD_COLOR) + def image_to_data_uri(image: np.ndarray): _, encoded = cv.imencode(".png", image) return b"data:image/png;base64," + base64.b64encode(encoded) - @app.get("/") async def health_check(): return {"status": "UP"}