-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathOCR_reader.py
119 lines (103 loc) · 3.56 KB
/
OCR_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import cv2
import numpy as np
import pytesseract
from scipy import signal
from PIL import Image
# tesseract path link
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
class OCRReader:
def __init__(self, image_path: str, display_bg: bool=True, display_image: bool=True, display_text: bool=False):
self.image = cv2.imread(image_path) # load image
# Preprocessing
self.gray_scale = self.to_gray()
self.gray_array = self.gray_to_array()
self.background = self.get_background_color()
if display_bg:
self.display_image("Background")
self.mask = self.compute_mask()
self.processed_image = self.process_image()
if display_image:
self.display_image("Processed")
self.final_image = self.array_to_image()
self.text = self.get_image_text()
self.write_output()
if display_text:
print(self.text)
def to_gray(self):
"""
Convert the image to gray scale
return:
gray scale image
"""
return cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
def gray_to_array(self):
"""
Convert gray image to array
return:
gray image in array format
"""
return np.asarray(self.gray_scale).astype('float32') / 255.0
def get_background_color(self):
"""
Estimate 'background' color by a median filter
return:
filtered gray image
"""
return signal.medfilt2d(self.gray_array, 15)
def display_image(self, image: str = "Background"):
"""
Display image i.e. background noise or processed image
Param:
image : Name of type of image (act as a flag)
return:
None
"""
if image == "Background":
cv2.imshow(image, self.background)
else:
# Processed
cv2.imshow(image, self.processed_image)
cv2.waitKey(0) # waits until a key is pressed
cv2.destroyAllWindows() # destroys the window showing image
def compute_mask(self):
"""
Compute 'foreground' mask as anything that is significantly darker than the background
return:
masked image
"""
return self.gray_array < (self.background - 0.1)
def process_image(self):
"""
Return the input value for all pixels in the mask or pure white otherwise
return:
selection based on mask image
"""
return np.where(self.mask, self.gray_array, 1.0)
def array_to_image(self):
"""
convert image array back to image format
return:
Final image
"""
processed_image = self.processed_image.astype(np.uint8) * 255
return Image.fromarray(processed_image)
def get_image_text(self):
"""
Apply OCR reading on the filtered image
return:
Text from the image
"""
return pytesseract.image_to_string(self.final_image)
def write_output(self, file_name="recognized.txt"):
"""
output the content to .txt file
param
file_name (str)
return:
None. .txt file is saved to local
"""
text_file = open(file_name, "w")
text_file.write(self.text)
text_file.close()
if __name__ == "__main__":
ocr_object = OCRReader("example_image.png")