Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement API endpoint for image alignment #364

Merged
merged 6 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 24 additions & 7 deletions OCR/ocr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from fastapi.middleware.cors import CORSMiddleware

from ocr.services.image_ocr import ImageOCR
from ocr.services.alignment import ImageAligner
from ocr.services.image_segmenter import ImageSegmenter, segment_by_color_bounding_box

app = FastAPI()
Expand All @@ -31,11 +32,32 @@
ocr = ImageOCR()


def data_uri_to_image(data_uri: str):
image_stripped = data_uri.replace("data:image/png;base64,", "", 1)
image_np = np.frombuffer(base64.b64decode(image_stripped), np.uint8)
return cv.imdecode(image_np, cv.IMREAD_COLOR)


def image_to_data_uri(image: np.ndarray):
_, encoded = cv.imencode(".png", image)
return b"data:image/png;base64," + base64.b64encode(encoded)


@app.get("/")
async def health_check():
return {"status": "UP"}


@app.post("/image_alignment/")
async def image_alignment(source_image: str = Form(), segmentation_template: str = Form()):
source_image_img = data_uri_to_image(source_image)
segmentation_template_img = data_uri_to_image(segmentation_template)

aligner = ImageAligner()
result = aligner.align(source_image_img, segmentation_template_img)
return {"result": image_to_data_uri(result)}


@app.post("/image_file_to_text/")
async def image_file_to_text(source_image: UploadFile, segmentation_template: UploadFile, labels: str = Form()):
source_image_np = np.frombuffer(await source_image.read(), np.uint8)
Expand All @@ -53,13 +75,8 @@ async def image_file_to_text(source_image: UploadFile, segmentation_template: Up

@app.post("/image_to_text/")
async def image_to_text(source_image: str = Form(), segmentation_template: str = Form(), labels: str = Form()):
source_image_stripped = source_image.replace("data:image/png;base64,", "", 1)
source_image_np = np.frombuffer(base64.b64decode(source_image_stripped), np.uint8)
source_image_img = cv.imdecode(source_image_np, cv.IMREAD_COLOR)

segmentation_template_stripped = segmentation_template.replace("data:image/png;base64,", "", 1)
segmentation_template_np = np.frombuffer(base64.b64decode(segmentation_template_stripped), np.uint8)
segmentation_template_img = cv.imdecode(segmentation_template_np, cv.IMREAD_COLOR)
source_image_img = data_uri_to_image(source_image)
segmentation_template_img = data_uri_to_image(segmentation_template)

loaded_json = json.loads(labels)
segments = segmenter.segment(source_image_img, segmentation_template_img, loaded_json)
Expand Down
28 changes: 26 additions & 2 deletions frontend/api/api.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,29 @@
import { ImageToTextArgs, ImageToTextResponse } from "./types/types";
import { ImageToTextArgs, ImageToTextResponse, AlignImageArgs, AlignImageResponse } from "./types/types";

const apiUrl = import.meta.env.VITE_API_URL || "http://localhost:8000/"

export const AlignImage = async (args: AlignImageArgs): Promise<AlignImageResponse | null> => {
const { sourceImage, templateImage } = args;
const form = new URLSearchParams({
source_image: sourceImage,
segmentation_template: templateImage,
});

const alignImageURL = `${apiUrl}image_alignment/`
try {
const response = await fetch(alignImageURL, {
"method": "POST",
"headers": {
"Content-Type": "application/x-www-form-urlencoded"
},
body: form
})
return await response.json() as AlignImageResponse;
} catch (error) {
console.error(error);
return null;
}
}

export const ImageToText = async (args: ImageToTextArgs): Promise<ImageToTextResponse | null> => {

Expand All @@ -9,7 +34,6 @@ export const ImageToText = async (args: ImageToTextArgs): Promise<ImageToTextRes
labels: JSON.stringify(fieldNames),
});

const apiUrl = import.meta.env.VITE_API_URL || "http://localhost:8000/"
const imageToTextURL = `${apiUrl}image_to_text/`
try {
const response = await fetch(imageToTextURL, {
Expand Down
12 changes: 11 additions & 1 deletion frontend/api/types/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ export type ImageToTextResponse = {
[key: string]: [string, number];
};

export type AlignImageResponse = {
[key: string]: [string, number];
};

export interface ResultItem {
text: string;
confidence: number;
Expand All @@ -32,4 +36,10 @@ export interface ResultItem {
sourceImage: string;
templateImage: string;
fieldNames: Field[];
}
}

export interface AlignImageArgs {
// base 64 encoded image
sourceImage: string;
templateImage: string;
}
Loading