From 669ed48297b90708fb4186c7fc3b3b637e67edd4 Mon Sep 17 00:00:00 2001
From: Jonathan Chang <jonathan@skylight.digital>
Date: Thu, 31 Oct 2024 11:11:21 -0700
Subject: [PATCH 1/6] implement image alignment api endpoint

---
 OCR/ocr/api.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/OCR/ocr/api.py b/OCR/ocr/api.py
index 9a10e173..c6976099 100644
--- a/OCR/ocr/api.py
+++ b/OCR/ocr/api.py
@@ -9,6 +9,7 @@
 from fastapi.middleware.cors import CORSMiddleware
 
 from ocr.services.image_ocr import ImageOCR
+from ocr.services.alignment import ImageAligner
 from ocr.services.image_segmenter import ImageSegmenter, segment_by_color_bounding_box
 
 app = FastAPI()
@@ -36,6 +37,18 @@ async def health_check():
     return {"status": "UP"}
 
 
+@app.post("/image_alignment/")
+async def image_alignment(source_image: UploadFile, segmentation_template: UploadFile):
+    source_image_np = np.frombuffer(await source_image.read(), np.uint8)
+    source_image_img = cv.imdecode(source_image_np, cv.IMREAD_COLOR)
+
+    segmentation_template_np = np.frombuffer(await segmentation_template.read(), np.uint8)
+    segmentation_template_img = cv.imdecode(segmentation_template_np, cv.IMREAD_COLOR)
+
+    aligner = ImageAligner()
+    return aligner.align(source_image_img, segmentation_template_img)
+
+
 @app.post("/image_file_to_text/")
 async def image_file_to_text(source_image: UploadFile, segmentation_template: UploadFile, labels: str = Form()):
     source_image_np = np.frombuffer(await source_image.read(), np.uint8)

From fe808d7ba5432077cd09fe4aff479351334af307 Mon Sep 17 00:00:00 2001
From: Jonathan Chang <jonathan@skylight.digital>
Date: Mon, 4 Nov 2024 14:46:37 -0800
Subject: [PATCH 2/6] Add frontend API function and interface types

---
 frontend/api/api.ts         | 26 +++++++++++++++++++++++++-
 frontend/api/types/types.ts | 12 +++++++++++-
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/frontend/api/api.ts b/frontend/api/api.ts
index 2ff009b0..a923fba2 100644
--- a/frontend/api/api.ts
+++ b/frontend/api/api.ts
@@ -1,4 +1,28 @@
-import { ImageToTextArgs, ImageToTextResponse } from "./types/types";
+import { ImageToTextArgs, ImageToTextResponse, AlignImageArgs, AlignImageResponse } from "./types/types";
+
+export const AlignImage = async (args: AlignImageArgs): Promise<AlignImageResponse | null> => {
+    const { sourceImage, templateImage } = args;
+    const form = new URLSearchParams({
+        source_image: sourceImage,
+        segmentation_template: templateImage,
+      });
+
+    const apiUrl = import.meta.env.VITE_API_URL || "http://localhost:8000/"
+    const alignImageURL = `${apiUrl}image_alignment/`
+    try {
+        const response = await fetch(alignImageURL, {
+            "method": "POST",
+            "headers": {
+              "Content-Type": "application/x-www-form-urlencoded"
+            },
+            body: form
+          })
+          return await response.json() as AlignImageResponse;
+    } catch (error) {
+        console.error(error);
+        return null;
+    }
+}
 
 export const ImageToText = async (args: ImageToTextArgs): Promise<ImageToTextResponse | null> => {
 
diff --git a/frontend/api/types/types.ts b/frontend/api/types/types.ts
index a64676fb..e9301bec 100644
--- a/frontend/api/types/types.ts
+++ b/frontend/api/types/types.ts
@@ -12,6 +12,10 @@ export type ImageToTextResponse = {
     [key: string]: [string, number];
 };
 
+export type AlignImageResponse = {
+    [key: string]: [string, number];
+};
+
 export interface ResultItem {
     text: string;
     confidence: number;
@@ -32,4 +36,10 @@ export interface ResultItem {
     sourceImage: string;
     templateImage: string;
     fieldNames: Field[];
-}
\ No newline at end of file
+}
+
+export interface AlignImageArgs {
+    // base 64 encoded image
+    sourceImage: string;
+    templateImage: string;
+}

From 98a0897bb20fed461a7d77594ad87a722d33d74f Mon Sep 17 00:00:00 2001
From: Jonathan Chang <jonathan@skylight.digital>
Date: Mon, 4 Nov 2024 15:23:37 -0800
Subject: [PATCH 3/6] factor out api url

---
 frontend/api/api.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/frontend/api/api.ts b/frontend/api/api.ts
index a923fba2..0e48b606 100644
--- a/frontend/api/api.ts
+++ b/frontend/api/api.ts
@@ -1,5 +1,7 @@
 import { ImageToTextArgs, ImageToTextResponse, AlignImageArgs, AlignImageResponse } from "./types/types";
 
+const apiUrl = import.meta.env.VITE_API_URL || "http://localhost:8000/"
+
 export const AlignImage = async (args: AlignImageArgs): Promise<AlignImageResponse | null> => {
     const { sourceImage, templateImage } = args;
     const form = new URLSearchParams({
@@ -7,7 +9,6 @@ export const AlignImage = async (args: AlignImageArgs): Promise<AlignImageRespon
         segmentation_template: templateImage,
       });
 
-    const apiUrl = import.meta.env.VITE_API_URL || "http://localhost:8000/"
     const alignImageURL = `${apiUrl}image_alignment/`
     try {
         const response = await fetch(alignImageURL, {
@@ -33,7 +34,6 @@ export const ImageToText = async (args: ImageToTextArgs): Promise<ImageToTextRes
         labels: JSON.stringify(fieldNames),
       });
 
-    const apiUrl = import.meta.env.VITE_API_URL || "http://localhost:8000/"
     const imageToTextURL = `${apiUrl}image_to_text/`
     try {
         const response = await fetch(imageToTextURL, {

From 77aa1e19c803401e100b3793c1c6a4cbc046d245 Mon Sep 17 00:00:00 2001
From: Jonathan Chang <jonathan@skylight.digital>
Date: Mon, 4 Nov 2024 16:56:09 -0800
Subject: [PATCH 4/6] Properly encode/decode images at api boundary

---
 OCR/ocr/api.py | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/OCR/ocr/api.py b/OCR/ocr/api.py
index c6976099..39276e30 100644
--- a/OCR/ocr/api.py
+++ b/OCR/ocr/api.py
@@ -1,4 +1,5 @@
 import base64
+import io
 
 import uvicorn
 import json
@@ -31,6 +32,16 @@
 )
 ocr = ImageOCR()
 
+def data_uri_to_image(data_uri: str):
+    image_stripped = data_uri.replace("data:image/png;base64,", "", 1)
+    image_np = np.frombuffer(base64.b64decode(image_stripped), np.uint8)
+    return cv.imdecode(image_np, cv.IMREAD_COLOR)
+
+def image_to_data_uri(image: np.ndarray):
+    _, encoded = cv.imencode(".png", image)
+    return b"data:image/png;base64," + base64.b64encode(encoded)
+
+
 
 @app.get("/")
 async def health_check():
@@ -38,15 +49,13 @@ async def health_check():
 
 
 @app.post("/image_alignment/")
-async def image_alignment(source_image: UploadFile, segmentation_template: UploadFile):
-    source_image_np = np.frombuffer(await source_image.read(), np.uint8)
-    source_image_img = cv.imdecode(source_image_np, cv.IMREAD_COLOR)
-
-    segmentation_template_np = np.frombuffer(await segmentation_template.read(), np.uint8)
-    segmentation_template_img = cv.imdecode(segmentation_template_np, cv.IMREAD_COLOR)
+async def image_alignment(source_image: str = Form(), segmentation_template: str = Form()):
+    source_image_img = data_uri_to_image(source_image)
+    segmentation_template_img = data_uri_to_image(segmentation_template)
 
     aligner = ImageAligner()
-    return aligner.align(source_image_img, segmentation_template_img)
+    result = aligner.align(source_image_img, segmentation_template_img)
+    return {"result": image_to_data_uri(result)}
 
 
 @app.post("/image_file_to_text/")
@@ -66,13 +75,8 @@ async def image_file_to_text(source_image: UploadFile, segmentation_template: Up
 
 @app.post("/image_to_text/")
 async def image_to_text(source_image: str = Form(), segmentation_template: str = Form(), labels: str = Form()):
-    source_image_stripped = source_image.replace("data:image/png;base64,", "", 1)
-    source_image_np = np.frombuffer(base64.b64decode(source_image_stripped), np.uint8)
-    source_image_img = cv.imdecode(source_image_np, cv.IMREAD_COLOR)
-
-    segmentation_template_stripped = segmentation_template.replace("data:image/png;base64,", "", 1)
-    segmentation_template_np = np.frombuffer(base64.b64decode(segmentation_template_stripped), np.uint8)
-    segmentation_template_img = cv.imdecode(segmentation_template_np, cv.IMREAD_COLOR)
+    source_image_img = data_uri_to_image(source_image)
+    segmentation_template_img = data_uri_to_image(segmentation_template)
 
     loaded_json = json.loads(labels)
     segments = segmenter.segment(source_image_img, segmentation_template_img, loaded_json)

From 193c1a651213ebb107b6adec07e77fd98f6e29dc Mon Sep 17 00:00:00 2001
From: Jonathan Chang <jonathan@skylight.digital>
Date: Mon, 4 Nov 2024 17:11:35 -0800
Subject: [PATCH 5/6] lint

---
 OCR/ocr/api.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/OCR/ocr/api.py b/OCR/ocr/api.py
index 39276e30..0b01a6ba 100644
--- a/OCR/ocr/api.py
+++ b/OCR/ocr/api.py
@@ -1,5 +1,4 @@
 import base64
-import io
 
 import uvicorn
 import json

From a6f09be269d8b5d90cb1a49ec9835ce6c1a7719f Mon Sep 17 00:00:00 2001
From: Jonathan Chang <jonathan@skylight.digital>
Date: Mon, 4 Nov 2024 17:25:09 -0800
Subject: [PATCH 6/6] lint

---
 OCR/ocr/api.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/OCR/ocr/api.py b/OCR/ocr/api.py
index 0b01a6ba..444c834e 100644
--- a/OCR/ocr/api.py
+++ b/OCR/ocr/api.py
@@ -31,17 +31,18 @@
 )
 ocr = ImageOCR()
 
+
 def data_uri_to_image(data_uri: str):
     image_stripped = data_uri.replace("data:image/png;base64,", "", 1)
     image_np = np.frombuffer(base64.b64decode(image_stripped), np.uint8)
     return cv.imdecode(image_np, cv.IMREAD_COLOR)
 
+
 def image_to_data_uri(image: np.ndarray):
     _, encoded = cv.imencode(".png", image)
     return b"data:image/png;base64," + base64.b64encode(encoded)
 
 
-
 @app.get("/")
 async def health_check():
     return {"status": "UP"}