-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathmain.py
96 lines (82 loc) · 3.03 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import argparse
import os
import cv2
import imutils
import pytesseract
from imutils.perspective import four_point_transform
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"-i", "--image", type=str, required=True, help="path to input image"
)
args = parser.parse_args()
# check if image with given path exists
if not os.path.exists(args.image):
raise Exception("The given image does not exist.")
# load the image, resize and compute ratio
img_orig = cv2.imread(args.image)
image = img_orig.copy()
image = imutils.resize(image, width=500)
ratio = img_orig.shape[1] / float(image.shape[1])
# convert the image to grayscale, blur it slightly, and then apply
# edge detection
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(
gray,
(
5,
5,
),
0,
)
edged = cv2.Canny(blurred, 75, 200)
# cv2.imwrite("edged.jpg", edged)
# find contours in the edge map and sort them by size in descending
# order
cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
# initialize a contour that corresponds to the receipt outline
receiptCnt = None
# loop over the contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# if our approximated contour has four points, then we can
# assume we have found the outline of the receipt
if len(approx) == 4:
receiptCnt = approx
break
# cv2.drawContours(image, [receiptCnt], -1, (0, 255, 0), 2)
# cv2.imwrite('image_with_outline.jpg', image)
# cv2.imshow("Receipt Outline", image)
# cv2.waitKey(0)
# if the receipt contour is empty then our script could not find the
# outline and we should be notified
if receiptCnt is None:
raise Exception(
(
"Could not find receipt outline. "
"Try debugging your edge detection and contour steps."
)
)
# apply a four-point perspective transform to the *original* image to
# obtain a top-down bird's-eye view of the receipt
receipt = four_point_transform(img_orig, receiptCnt.reshape(4, 2) * ratio)
# cv2.imwrite('transformed_receipt.jpg', receipt)
# apply OCR to the receipt image by assuming column data, ensuring
# the text is *concatenated across the row* (additionally, for your
# own images you may need to apply additional processing to cleanup
# the image, including resizing, thresholding, etc.)
options = "--psm 6"
text = pytesseract.image_to_string(
cv2.cvtColor(receipt, cv2.COLOR_BGR2RGB), config=options
)
# show the raw output of the OCR process
print("[INFO] raw output:")
print("==================")
print(text)
print("\n")
if __name__ == "__main__":
main()