From 90b2299b6e4cec6509ca8ee71a780a605449f759 Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Wed, 15 Jan 2025 15:06:25 -0800 Subject: [PATCH] ruff lint --- OCR/ocr/api.py | 1 + .../alignment/backends/four_point_transform.py | 1 + .../alignment/backends/image_homography.py | 2 ++ .../backends/random_perspective_transform.py | 4 ++-- OCR/ocr/services/alignment/image_alignment.py | 4 +++- OCR/ocr/services/batch_metrics.py | 2 ++ OCR/ocr/services/batch_segmentation.py | 10 ++++++++-- OCR/ocr/services/image_ocr.py | 10 ++++++---- OCR/ocr/services/image_segmenter.py | 14 +++++++++++--- OCR/ocr/services/metrics_analysis.py | 2 ++ OCR/ocr/services/tesseract_ocr.py | 7 ++++--- 11 files changed, 42 insertions(+), 15 deletions(-) diff --git a/OCR/ocr/api.py b/OCR/ocr/api.py index 4d42caf3..8ea4b936 100644 --- a/OCR/ocr/api.py +++ b/OCR/ocr/api.py @@ -1,4 +1,5 @@ """Module for FastAPI interface functions.""" + import base64 import uvicorn diff --git a/OCR/ocr/services/alignment/backends/four_point_transform.py b/OCR/ocr/services/alignment/backends/four_point_transform.py index a3535aea..e3eeb13c 100644 --- a/OCR/ocr/services/alignment/backends/four_point_transform.py +++ b/OCR/ocr/services/alignment/backends/four_point_transform.py @@ -16,6 +16,7 @@ class FourPointTransform: Attributes: image (np.ndarray): The input image as a NumPy array. """ + def __init__(self, image: Path | np.ndarray): """Initializes the FourPointTransform object with an image. diff --git a/OCR/ocr/services/alignment/backends/image_homography.py b/OCR/ocr/services/alignment/backends/image_homography.py index 00a4af11..b001083b 100644 --- a/OCR/ocr/services/alignment/backends/image_homography.py +++ b/OCR/ocr/services/alignment/backends/image_homography.py @@ -1,4 +1,5 @@ """Aligns two images using image homography algorithms.""" + from pathlib import Path import numpy as np @@ -17,6 +18,7 @@ class ImageHomography: match_ratio (float): The ratio used for Lowe's ratio test to filter good matches. _sift (cv.SIFT): The SIFT detector used to find keypoints and descriptors. """ + def __init__(self, template: Path | np.ndarray, match_ratio=0.3): """Initializes the ImageHomography object with a template image. diff --git a/OCR/ocr/services/alignment/backends/random_perspective_transform.py b/OCR/ocr/services/alignment/backends/random_perspective_transform.py index c49d7df7..34a35b38 100644 --- a/OCR/ocr/services/alignment/backends/random_perspective_transform.py +++ b/OCR/ocr/services/alignment/backends/random_perspective_transform.py @@ -9,7 +9,7 @@ class RandomPerspectiveTransform: """Class to generates a random perspective transform based on a template image. - + This class allows you to apply random distortions to an image by computing a perspective transformation matrix and warping the image accordingly. @@ -85,7 +85,7 @@ def transform(self, transformer: np.ndarray) -> np.ndarray: def random_transform(self, distortion_scale: float) -> np.ndarray: """Warp the template image with specified distortion_scale. - This method internally calls `make_transform` to generate the transformation matrix + This method internally calls `make_transform` to generate the transformation matrix and applies it using `transform`. Args: diff --git a/OCR/ocr/services/alignment/image_alignment.py b/OCR/ocr/services/alignment/image_alignment.py index d88cc3b0..155b2b73 100644 --- a/OCR/ocr/services/alignment/image_alignment.py +++ b/OCR/ocr/services/alignment/image_alignment.py @@ -1,4 +1,5 @@ """Module for aligning images using a specified image alignment backend.""" + import numpy as np from ocr.services.alignment.backends import ImageHomography @@ -8,9 +9,10 @@ class ImageAligner: """Class for aligning images using a specified image alignment backend. Attributes: - aligner: An alignment backend class or instance that provides an `align` method. + aligner: An alignment backend class or instance that provides an `align` method. Default is the ImageHomography backend from the ocr.services.alignment module. """ + def __init__(self, aligner=ImageHomography): """Initializes an ImageAligner instance with the specified image alignment backend. diff --git a/OCR/ocr/services/batch_metrics.py b/OCR/ocr/services/batch_metrics.py index 9bf5137b..cec24e61 100644 --- a/OCR/ocr/services/batch_metrics.py +++ b/OCR/ocr/services/batch_metrics.py @@ -1,4 +1,5 @@ """Module for batch processing OCR and ground truth files and calculating metrics.""" + from ocr.services.metrics_analysis import OCRMetrics import os import csv @@ -16,6 +17,7 @@ class BatchMetricsAnalysis: ground_truth_folder (str): Path to the folder containing ground truth files. csv_output_folder (str): Path to the folder where CSV output files will be saved. """ + def __init__(self, ocr_folder: str, ground_truth_folder: str, csv_output_folder: str) -> None: """Initializes the BatchMetricsAnalysis class with paths to OCR, ground truth, and output folders. diff --git a/OCR/ocr/services/batch_segmentation.py b/OCR/ocr/services/batch_segmentation.py index a3193ca0..ff37edbe 100644 --- a/OCR/ocr/services/batch_segmentation.py +++ b/OCR/ocr/services/batch_segmentation.py @@ -1,4 +1,5 @@ """Process and segment a batch of images and perform OCR on the results.""" + import os import json import time @@ -18,7 +19,10 @@ class BatchSegmentationOCR: output_folder (str): Path to the folder where OCR results and timing information will be saved. model (ImageOCR): An optional pre-defined OCR model; if None, a default instance of ImageOCR is used. """ - def __init__(self, image_folder: str, segmentation_template: str, labels_path: str, output_folder: str, model=None) -> None: + + def __init__( + self, image_folder: str, segmentation_template: str, labels_path: str, output_folder: str, model=None + ) -> None: """Initializes the BatchSegmentationOCR instance with the specified paths and an optional OCR model. Args: @@ -76,7 +80,9 @@ def process_images(self) -> list[dict]: print("Processing complete.") return results - def segment_ocr_image(self, segmenter: ImageSegmenter, ocr, image_path: str, image_file: str) -> tuple[dict[str, tuple[str, float]], float]: + def segment_ocr_image( + self, segmenter: ImageSegmenter, ocr, image_path: str, image_file: str + ) -> tuple[dict[str, tuple[str, float]], float]: """Segments the image and runs OCR, returning results and time taken. Args: diff --git a/OCR/ocr/services/image_ocr.py b/OCR/ocr/services/image_ocr.py index e9fdb784..b482c75c 100644 --- a/OCR/ocr/services/image_ocr.py +++ b/OCR/ocr/services/image_ocr.py @@ -1,4 +1,5 @@ """Module for OCR using a transformers-based OCR model.""" + from collections.abc import Iterator from transformers import TrOCRProcessor, VisionEncoderDecoderModel @@ -16,6 +17,7 @@ class ImageOCR: processor (TrOCRProcessor): Processor for TrOCR model that prepares images for OCR. model (VisionEncoderDecoderModel): Pre-trained TrOCR model for extracting text from images. """ + def __init__(self, model="microsoft/trocr-large-printed"): """Initializes the ImageOCR class with the specified OCR model. @@ -32,7 +34,7 @@ def __init__(self, model="microsoft/trocr-large-printed"): def compute_line_angle(lines: list) -> Iterator[float]: """Computes the angle in degrees of the lines detected by the Hough transform, based on their endpoints. - This method processes the output of `cv.HoughLinesP` (lines in (x1, y1, x2, y2) format) and computes the angle + This method processes the output of `cv.HoughLinesP` (lines in (x1, y1, x2, y2) format) and computes the angle between each line and the horizontal axis. Args: @@ -186,12 +188,12 @@ def image_to_text(self, segments: dict[str, np.ndarray]) -> dict[str, tuple[str, For each segment, it extracts the text and the average confidence score. Args: - segments (dict[str, np.ndarray]): A dictionary where keys are segment labels (e.g., 'header', 'body'), + segments (dict[str, np.ndarray]): A dictionary where keys are segment labels (e.g., 'header', 'body'), and values are NumPy arrays representing the corresponding image segments. Returns: - dict[str, tuple[str, float]]: A dictionary where each key corresponds to a segment label, and each value is - a tuple containing the recognized text (as a string) and the confidence score + dict[str, tuple[str, float]]: A dictionary where each key corresponds to a segment label, and each value is + a tuple containing the recognized text (as a string) and the confidence score (as a float) for the recognition. """ digitized: dict[str, tuple[str, float]] = {} diff --git a/OCR/ocr/services/image_segmenter.py b/OCR/ocr/services/image_segmenter.py index b4a15cc8..b1eb8b1c 100644 --- a/OCR/ocr/services/image_segmenter.py +++ b/OCR/ocr/services/image_segmenter.py @@ -1,4 +1,5 @@ """Module to segment images based on a segmentation template and a set of labels.""" + import cv2 as cv import numpy as np import json @@ -31,7 +32,9 @@ def crop_zeros(image: np.ndarray) -> np.ndarray: ] # inclusive -def segment_by_mask_then_crop(raw_image: np.ndarray, segmentation_template: np.ndarray, labels: list[dict[str, str]], debug: bool) -> dict[str, np.ndarray]: +def segment_by_mask_then_crop( + raw_image: np.ndarray, segmentation_template: np.ndarray, labels: list[dict[str, str]], debug: bool +) -> dict[str, np.ndarray]: """Segments a raw image based on a color mask in the segmentation template, and then crops the resulting regions to remove zero (black) areas. Args: @@ -75,7 +78,9 @@ def segment_by_mask_then_crop(raw_image: np.ndarray, segmentation_template: np.n return segments -def segment_by_color_bounding_box(raw_image: np.ndarray, segmentation_template: np.ndarray, labels: list[dict[str, str]], debug: bool) -> dict[str, np.ndarray]: +def segment_by_color_bounding_box( + raw_image: np.ndarray, segmentation_template: np.ndarray, labels: list[dict[str, str]], debug: bool +) -> dict[str, np.ndarray]: """Segments a raw image by detecting colored boundary boxes in the segmentation template. Args: @@ -119,6 +124,7 @@ class ImageSegmenter: `segment_by_mask_then_crop` or `segment_by_color_bounding_box`. debug (bool): If `True`, saves debug images and prints additional information. """ + def __init__( self, segmentation_function=segment_by_mask_then_crop, @@ -152,7 +158,9 @@ def segment( """ return self.segmentation_function(raw_image, segmentation_template, labels, self.debug) - def load_and_segment(self, raw_image_path: str, segmentation_template_path: str, labels_path: str) -> dict[str, np.ndarray]: + def load_and_segment( + self, raw_image_path: str, segmentation_template_path: str, labels_path: str + ) -> dict[str, np.ndarray]: """Loads image files and labels from specified paths, and then segments the image. Args: diff --git a/OCR/ocr/services/metrics_analysis.py b/OCR/ocr/services/metrics_analysis.py index f3d6c256..1370c49c 100644 --- a/OCR/ocr/services/metrics_analysis.py +++ b/OCR/ocr/services/metrics_analysis.py @@ -1,4 +1,5 @@ """Module to calculate OCR metrics and compare results to ground truth data.""" + import json import csv import Levenshtein @@ -15,6 +16,7 @@ class OCRMetrics: ocr_json (dict): A dict from JSON data containing OCR results. ground_truth_json (dict): A dict from JSON data containing the ground truth values. """ + def __init__(self, ocr_json_path=None, ground_truth_json_path=None, ocr_json=None, ground_truth_json=None): """Initializes the OCRMetrics object with OCR and ground truth data, either loaded from files or provided as dictionaries. diff --git a/OCR/ocr/services/tesseract_ocr.py b/OCR/ocr/services/tesseract_ocr.py index 03872a77..6bb3102d 100644 --- a/OCR/ocr/services/tesseract_ocr.py +++ b/OCR/ocr/services/tesseract_ocr.py @@ -22,6 +22,7 @@ class TesseractOCR: * https://github.com/sirfz/tesserocr/blob/bbe0fb8edabdcc990f1e6fa9334c0747c2ac76ee/tesserocr/__init__.pyi#L47 * https://tesseract-ocr.github.io/tessdoc/tess3/ControlParams.html """ + def __init__(self, psm=PSM.AUTO, variables=dict()): """Initializes the TesseractOCR object with the specified page segmentation mode and internal variables. @@ -88,12 +89,12 @@ def image_to_text(self, segments: dict[str, np.ndarray]) -> dict[str, tuple[str, For each segment, it extracts the text and the average confidence score returned from the Tesseract API. Args: - segments (dict[str, np.ndarray]): A dictionary where keys are segment labels (e.g., 'header', 'body'), + segments (dict[str, np.ndarray]): A dictionary where keys are segment labels (e.g., 'header', 'body'), and values are NumPy arrays representing the corresponding image segments. Returns: - dict[str, tuple[str, float]]: A dictionary where each key corresponds to a segment label, and each value is - a tuple containing the recognized text (as a string) and the confidence score + dict[str, tuple[str, float]]: A dictionary where each key corresponds to a segment label, and each value is + a tuple containing the recognized text (as a string) and the confidence score (as a float) for the recognition. """ digitized: dict[str, tuple[str, float]] = {}