-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathutils.py
69 lines (60 loc) · 2.27 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import cv2
import imutils
import numpy as np
import pytesseract
from imutils.perspective import four_point_transform
def perform_ocr(img: np.ndarray):
img_orig = cv2.imdecode(img, cv2.IMREAD_COLOR)
image = img_orig.copy()
image = imutils.resize(image, width=500)
ratio = img_orig.shape[1] / float(image.shape[1])
# convert the image to grayscale, blur it slightly, and then apply
# edge detection
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(
gray,
(
5,
5,
),
0,
)
edged = cv2.Canny(blurred, 75, 200)
# find contours in the edge map and sort them by size in descending
# order
cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
# initialize a contour that corresponds to the receipt outline
receiptCnt = None
# loop over the contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# if our approximated contour has four points, then we can
# assume we have found the outline of the receipt
if len(approx) == 4:
receiptCnt = approx
break
# if the receipt contour is empty then our script could not find the
# outline and we should be notified
if receiptCnt is None:
raise Exception(
(
"Could not find receipt outline. "
"Try debugging your edge detection and contour steps."
)
)
# apply a four-point perspective transform to the *original* image to
# obtain a top-down bird's-eye view of the receipt
receipt = four_point_transform(img_orig, receiptCnt.reshape(4, 2) * ratio)
# apply OCR to the receipt image by assuming column data, ensuring
# the text is *concatenated across the row* (additionally, for your
# own images you may need to apply additional processing to cleanup
# the image, including resizing, thresholding, etc.)
options = "--psm 6"
text = pytesseract.image_to_string(
cv2.cvtColor(receipt, cv2.COLOR_BGR2RGB), config=options
)
return text