Skip to content

Commit

Permalink
ruff lint
Browse files Browse the repository at this point in the history
  • Loading branch information
jonchang committed Jan 15, 2025
1 parent 721c1a5 commit 90b2299
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 15 deletions.
1 change: 1 addition & 0 deletions OCR/ocr/api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module for FastAPI interface functions."""

import base64

import uvicorn
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class FourPointTransform:
Attributes:
image (np.ndarray): The input image as a NumPy array.
"""

def __init__(self, image: Path | np.ndarray):
"""Initializes the FourPointTransform object with an image.
Expand Down
2 changes: 2 additions & 0 deletions OCR/ocr/services/alignment/backends/image_homography.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Aligns two images using image homography algorithms."""

from pathlib import Path

import numpy as np
Expand All @@ -17,6 +18,7 @@ class ImageHomography:
match_ratio (float): The ratio used for Lowe's ratio test to filter good matches.
_sift (cv.SIFT): The SIFT detector used to find keypoints and descriptors.
"""

def __init__(self, template: Path | np.ndarray, match_ratio=0.3):
"""Initializes the ImageHomography object with a template image.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class RandomPerspectiveTransform:
"""Class to generates a random perspective transform based on a template image.
This class allows you to apply random distortions to an image by computing
a perspective transformation matrix and warping the image accordingly.
Expand Down Expand Up @@ -85,7 +85,7 @@ def transform(self, transformer: np.ndarray) -> np.ndarray:
def random_transform(self, distortion_scale: float) -> np.ndarray:
"""Warp the template image with specified distortion_scale.
This method internally calls `make_transform` to generate the transformation matrix
This method internally calls `make_transform` to generate the transformation matrix
and applies it using `transform`.
Args:
Expand Down
4 changes: 3 additions & 1 deletion OCR/ocr/services/alignment/image_alignment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module for aligning images using a specified image alignment backend."""

import numpy as np

from ocr.services.alignment.backends import ImageHomography
Expand All @@ -8,9 +9,10 @@ class ImageAligner:
"""Class for aligning images using a specified image alignment backend.
Attributes:
aligner: An alignment backend class or instance that provides an `align` method.
aligner: An alignment backend class or instance that provides an `align` method.
Default is the ImageHomography backend from the ocr.services.alignment module.
"""

def __init__(self, aligner=ImageHomography):
"""Initializes an ImageAligner instance with the specified image alignment backend.
Expand Down
2 changes: 2 additions & 0 deletions OCR/ocr/services/batch_metrics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module for batch processing OCR and ground truth files and calculating metrics."""

from ocr.services.metrics_analysis import OCRMetrics
import os
import csv
Expand All @@ -16,6 +17,7 @@ class BatchMetricsAnalysis:
ground_truth_folder (str): Path to the folder containing ground truth files.
csv_output_folder (str): Path to the folder where CSV output files will be saved.
"""

def __init__(self, ocr_folder: str, ground_truth_folder: str, csv_output_folder: str) -> None:
"""Initializes the BatchMetricsAnalysis class with paths to OCR, ground truth, and output folders.
Expand Down
10 changes: 8 additions & 2 deletions OCR/ocr/services/batch_segmentation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Process and segment a batch of images and perform OCR on the results."""

import os
import json
import time
Expand All @@ -18,7 +19,10 @@ class BatchSegmentationOCR:
output_folder (str): Path to the folder where OCR results and timing information will be saved.
model (ImageOCR): An optional pre-defined OCR model; if None, a default instance of ImageOCR is used.
"""
def __init__(self, image_folder: str, segmentation_template: str, labels_path: str, output_folder: str, model=None) -> None:

def __init__(
self, image_folder: str, segmentation_template: str, labels_path: str, output_folder: str, model=None
) -> None:
"""Initializes the BatchSegmentationOCR instance with the specified paths and an optional OCR model.
Args:
Expand Down Expand Up @@ -76,7 +80,9 @@ def process_images(self) -> list[dict]:
print("Processing complete.")
return results

def segment_ocr_image(self, segmenter: ImageSegmenter, ocr, image_path: str, image_file: str) -> tuple[dict[str, tuple[str, float]], float]:
def segment_ocr_image(
self, segmenter: ImageSegmenter, ocr, image_path: str, image_file: str
) -> tuple[dict[str, tuple[str, float]], float]:
"""Segments the image and runs OCR, returning results and time taken.
Args:
Expand Down
10 changes: 6 additions & 4 deletions OCR/ocr/services/image_ocr.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module for OCR using a transformers-based OCR model."""

from collections.abc import Iterator

from transformers import TrOCRProcessor, VisionEncoderDecoderModel
Expand All @@ -16,6 +17,7 @@ class ImageOCR:
processor (TrOCRProcessor): Processor for TrOCR model that prepares images for OCR.
model (VisionEncoderDecoderModel): Pre-trained TrOCR model for extracting text from images.
"""

def __init__(self, model="microsoft/trocr-large-printed"):
"""Initializes the ImageOCR class with the specified OCR model.
Expand All @@ -32,7 +34,7 @@ def __init__(self, model="microsoft/trocr-large-printed"):
def compute_line_angle(lines: list) -> Iterator[float]:
"""Computes the angle in degrees of the lines detected by the Hough transform, based on their endpoints.
This method processes the output of `cv.HoughLinesP` (lines in (x1, y1, x2, y2) format) and computes the angle
This method processes the output of `cv.HoughLinesP` (lines in (x1, y1, x2, y2) format) and computes the angle
between each line and the horizontal axis.
Args:
Expand Down Expand Up @@ -186,12 +188,12 @@ def image_to_text(self, segments: dict[str, np.ndarray]) -> dict[str, tuple[str,
For each segment, it extracts the text and the average confidence score.
Args:
segments (dict[str, np.ndarray]): A dictionary where keys are segment labels (e.g., 'header', 'body'),
segments (dict[str, np.ndarray]): A dictionary where keys are segment labels (e.g., 'header', 'body'),
and values are NumPy arrays representing the corresponding image segments.
Returns:
dict[str, tuple[str, float]]: A dictionary where each key corresponds to a segment label, and each value is
a tuple containing the recognized text (as a string) and the confidence score
dict[str, tuple[str, float]]: A dictionary where each key corresponds to a segment label, and each value is
a tuple containing the recognized text (as a string) and the confidence score
(as a float) for the recognition.
"""
digitized: dict[str, tuple[str, float]] = {}
Expand Down
14 changes: 11 additions & 3 deletions OCR/ocr/services/image_segmenter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module to segment images based on a segmentation template and a set of labels."""

import cv2 as cv
import numpy as np
import json
Expand Down Expand Up @@ -31,7 +32,9 @@ def crop_zeros(image: np.ndarray) -> np.ndarray:
] # inclusive


def segment_by_mask_then_crop(raw_image: np.ndarray, segmentation_template: np.ndarray, labels: list[dict[str, str]], debug: bool) -> dict[str, np.ndarray]:
def segment_by_mask_then_crop(
raw_image: np.ndarray, segmentation_template: np.ndarray, labels: list[dict[str, str]], debug: bool
) -> dict[str, np.ndarray]:
"""Segments a raw image based on a color mask in the segmentation template, and then crops the resulting regions to remove zero (black) areas.
Args:
Expand Down Expand Up @@ -75,7 +78,9 @@ def segment_by_mask_then_crop(raw_image: np.ndarray, segmentation_template: np.n
return segments


def segment_by_color_bounding_box(raw_image: np.ndarray, segmentation_template: np.ndarray, labels: list[dict[str, str]], debug: bool) -> dict[str, np.ndarray]:
def segment_by_color_bounding_box(
raw_image: np.ndarray, segmentation_template: np.ndarray, labels: list[dict[str, str]], debug: bool
) -> dict[str, np.ndarray]:
"""Segments a raw image by detecting colored boundary boxes in the segmentation template.
Args:
Expand Down Expand Up @@ -119,6 +124,7 @@ class ImageSegmenter:
`segment_by_mask_then_crop` or `segment_by_color_bounding_box`.
debug (bool): If `True`, saves debug images and prints additional information.
"""

def __init__(
self,
segmentation_function=segment_by_mask_then_crop,
Expand Down Expand Up @@ -152,7 +158,9 @@ def segment(
"""
return self.segmentation_function(raw_image, segmentation_template, labels, self.debug)

def load_and_segment(self, raw_image_path: str, segmentation_template_path: str, labels_path: str) -> dict[str, np.ndarray]:
def load_and_segment(
self, raw_image_path: str, segmentation_template_path: str, labels_path: str
) -> dict[str, np.ndarray]:
"""Loads image files and labels from specified paths, and then segments the image.
Args:
Expand Down
2 changes: 2 additions & 0 deletions OCR/ocr/services/metrics_analysis.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module to calculate OCR metrics and compare results to ground truth data."""

import json
import csv
import Levenshtein
Expand All @@ -15,6 +16,7 @@ class OCRMetrics:
ocr_json (dict): A dict from JSON data containing OCR results.
ground_truth_json (dict): A dict from JSON data containing the ground truth values.
"""

def __init__(self, ocr_json_path=None, ground_truth_json_path=None, ocr_json=None, ground_truth_json=None):
"""Initializes the OCRMetrics object with OCR and ground truth data, either loaded from files or provided as dictionaries.
Expand Down
7 changes: 4 additions & 3 deletions OCR/ocr/services/tesseract_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class TesseractOCR:
* https://github.com/sirfz/tesserocr/blob/bbe0fb8edabdcc990f1e6fa9334c0747c2ac76ee/tesserocr/__init__.pyi#L47
* https://tesseract-ocr.github.io/tessdoc/tess3/ControlParams.html
"""

def __init__(self, psm=PSM.AUTO, variables=dict()):
"""Initializes the TesseractOCR object with the specified page segmentation mode and internal variables.
Expand Down Expand Up @@ -88,12 +89,12 @@ def image_to_text(self, segments: dict[str, np.ndarray]) -> dict[str, tuple[str,
For each segment, it extracts the text and the average confidence score returned from the Tesseract API.
Args:
segments (dict[str, np.ndarray]): A dictionary where keys are segment labels (e.g., 'header', 'body'),
segments (dict[str, np.ndarray]): A dictionary where keys are segment labels (e.g., 'header', 'body'),
and values are NumPy arrays representing the corresponding image segments.
Returns:
dict[str, tuple[str, float]]: A dictionary where each key corresponds to a segment label, and each value is
a tuple containing the recognized text (as a string) and the confidence score
dict[str, tuple[str, float]]: A dictionary where each key corresponds to a segment label, and each value is
a tuple containing the recognized text (as a string) and the confidence score
(as a float) for the recognition.
"""
digitized: dict[str, tuple[str, float]] = {}
Expand Down

0 comments on commit 90b2299

Please sign in to comment.