-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsample.txt
283 lines (233 loc) · 9.79 KB
/
sample.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# USAGE
# python text_recognition.py --east frozen_east_text_detection.pb --image images/example_01.jpg
# python text_recognition.py --east frozen_east_text_detection.pb --image images/example_04.jpg --padding 0.05
# import the necessary packages
from imutils.object_detection import non_max_suppression
import numpy as np
import pytesseract
import argparse
import cv2
def decode_predictions(scores, geometry):
# grab the number of rows and columns from the scores volume, then
# initialize our set of bounding box rectangles and corresponding
# confidence scores
(numRows, numCols) = scores.shape[2:4]
rects = []
confidences = []
# loop over the number of rows
for y in range(0, numRows):
# extract the scores (probabilities), followed by the
# geometrical data used to derive potential bounding box
# coordinates that surround text
scoresData = scores[0, 0, y]
xData0 = geometry[0, 0, y]
xData1 = geometry[0, 1, y]
xData2 = geometry[0, 2, y]
xData3 = geometry[0, 3, y]
anglesData = geometry[0, 4, y]
# loop over the number of columns
for x in range(0, numCols):
# if our score does not have sufficient probability,
# ignore it
if scoresData[x] < args["min_confidence"]:
continue
# compute the offset factor as our resulting feature
# maps will be 4x smaller than the input image
(offsetX, offsetY) = (x * 4.0, y * 4.0)
# extract the rotation angle for the prediction and
# then compute the sin and cosine
angle = anglesData[x]
cos = np.cos(angle)
sin = np.sin(angle)
# use the geometry volume to derive the width and height
# of the bounding box
h = xData0[x] + xData2[x]
w = xData1[x] + xData3[x]
# compute both the starting and ending (x, y)-coordinates
# for the text prediction bounding box
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
startX = int(endX - w)
startY = int(endY - h)
# add the bounding box coordinates and probability score
# to our respective lists
rects.append((startX, startY, endX, endY))
confidences.append(scoresData[x])
# return a tuple of the bounding boxes and associated confidences
return (rects, confidences)
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", type=str,
help="path to input image")
ap.add_argument("-east", "--east", type=str,
help="path to input EAST text detector")
ap.add_argument("-c", "--min-confidence", type=float, default=0.5,
help="minimum probability required to inspect a region")
ap.add_argument("-w", "--width", type=int, default=320,
help="nearest multiple of 32 for resized width")
ap.add_argument("-e", "--height", type=int, default=320,
help="nearest multiple of 32 for resized height")
ap.add_argument("-p", "--padding", type=float, default=0.0,
help="amount of padding to add to each border of ROI")
args = vars(ap.parse_args())
# load the input image and grab the image dimensions
image = cv2.imread("images/syamil.jpg")
orig = image.copy()
(origH, origW) = image.shape[:2]
# set the new width and height and then determine the ratio in change
# for both the width and height
(newW, newH) = (args["width"], args["height"])
rW = origW / float(newW)
rH = origH / float(newH)
# resize the image and grab the new image dimensions
image = cv2.resize(image, (newW, newH))
image = cv2.fastNlMeansDenoisingColored(image,None,10,10,7,21)
(H, W) = image.shape[:2]
# define the two output layer names for the EAST detector model that
# we are interested -- the first is the output probabilities and the
# second can be used to derive the bounding box coordinates of text
layerNames = [
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"]
# load the pre-trained EAST text detector
print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet("frozen_east_text_detection.pb")
# construct a blob from the image and then perform a forward pass of
# the model to obtain the two output layer sets
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
(123.68, 116.78, 103.94), swapRB=True, crop=False)
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
# decode the predictions, then apply non-maxima suppression to
# suppress weak, overlapping bounding boxes
(rects, confidences) = decode_predictions(scores, geometry)
boxes = non_max_suppression(np.array(rects), probs=confidences)
# initialize the list of results
results = []
# loop over the bounding boxes
for (startX, startY, endX, endY) in boxes:
# scale the bounding box coordinates based on the respective
# ratios
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY * rH)
# in order to obtain a better OCR of the text we can potentially
# apply a bit of padding surrounding the bounding box -- here we
# are computing the deltas in both the x and y directions
dX = int((endX - startX) * args["padding"])
dY = int((endY - startY) * args["padding"])
# apply padding to each side of the bounding box, respectively
startX = max(0, startX - dX)
startY = max(0, startY - dY)
endX = min(origW, endX + (dX * 2))
endY = min(origH, endY + (dY * 2))
# extract the actual padded ROI
roi = orig[startY:endY, startX:endX]
# type 1 - convert into gray,blurring, denoising and convert into b&W
# gray = cv2.cvtColor(roi , cv2.COLOR_BGR2GRAY)
# blur = cv2.GaussianBlur(gray, (31, 120), 0)
# denoising = cv2.fastNlMeansDenoising(blur,blur,7,10,21)
# bw = cv2.adaptiveThreshold(denoising,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
# cv2.THRESH_BINARY,11,2)
# type 2 - convert into gray,blurring and convert into b&W
# blur = cv2.pyrMeanShiftFiltering(roi,4,91)
# gray = cv2.cvtColor(blur , cv2.COLOR_BGR2GRAY)
# bw = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
# cv2.THRESH_BINARY,11,2)
# type 3 - to convert into b&W, clean the threshold
gray = cv2.cvtColor(roi , cv2.COLOR_BGR2GRAY)
bw = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY,11,2)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 5))
bw = cv2.morphologyEx(bw, cv2.MORPH_OPEN, kernel)
# Setup SimpleBlobDetector parameters.
params = cv2.SimpleBlobDetector_Params()
# Filter by Inertia
params.filterByInertia = True
params.minInertiaRatio = 0.3
params.maxInertiaRatio = 0.9
# Create a detector with the parameters
ver = (cv2.__version__).split('.')
if int(ver[0]) < 3 :
detector = cv2.SimpleBlobDetector(params)
else :
detector = cv2.SimpleBlobDetector_create(params)
# Detect blobs.
keypoints = detector.detect(bw)
# Draw detected blobs as red circles.
# cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS ensures the size of the circle corresponds to the size of blob
im_with_keypoints = cv2.drawKeypoints(bw, keypoints, np.array([]), (0,0,255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
# Show keypoints
cv2.imshow("Keypoints", im_with_keypoints)
cv2.waitKey(0)
# type 4 - does't work but legit
# gray = cv2.cvtColor(roi,cv2.COLOR_BGR2GRAY) # grayscale
# thresh = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
# cv2.THRESH_BINARY,11,2)
# kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(1,5))
# thresh = cv2.morphologyEx(kernel, cv2.MORPH_OPEN, kernel)
# dilated = cv2.dilate(thresh,kernel,iterations = 5) # dilate
# _, contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) # get contours
_, contours , _ = cv2.findContours(bw, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
print(len(contours))
# cv2.drawContours(roi,contours,-1,(0,0,255),1)
# type 4's but trying to implement with type 3
# for each contour found, draw a rectangle around it on original image
for contour in contours:
# get rectangle bounding contour
[x,y,w,h] = cv2.boundingRect(contour)
# discard areas that are too large
# if h>2 and w>25:
if h>20 and w>30:
continue
# discard areas that are too small
if h<15 or w<10:
continue
# draw rectangle around contour on original image
cv2.rectangle(roi,(x,y),(x+w,y+h),(255,0,255),2)
# # to read every bondingbox
# endw = x+w
# endh = y+h
# digit = roi[y:endh,x:endw]
# config = ("--oem 3 --psm 13 -c tessedit_char_whitelist=0123456789")
# text = pytesseract.image_to_string(digit, lang="eng" ,config=config)
# print (text)
# cv2.imshow("digit",digit)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# in order to apply Tesseract v4 to OCR text we must supply
# (1) a language, (2) an OEM flag of 4, indicating that the
# wish to use the LSTM neural net model for OCR, and finally
# (3) an OEM value, in this case, 7 which implies that we are
# treating the ROI as a single line of text
config = ("--oem 3 --psm 13 -c tessedit_char_whitelist=0123456789")
text = pytesseract.image_to_string(roi, lang="eng" ,config=config)
# show the output image
cv2.imshow("Text Detection", bw)
print(text)
cv2.waitKey(0)
cv2.destroyAllWindows()
# # add the bounding box coordinates and OCR'd text to the list
# # of results
# results.append(((startX, startY, endX, endY), text))
# # sort the results bounding box coordinates from top to bottom
# results = sorted(results, key=lambda r:r[0][1])
# # loop over the results
# for ((startX, startY, endX, endY), text) in results:
# # display the text OCR'd by Tesseract
# print("OCR TEXT")
# print("========")
# print("{}\n".format(text))
# # strip out non-ASCII text so we can draw the text on the image
# # using OpenCV, then draw the text and a bounding box surrounding
# # the text region of the input image
# text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
# output = orig.copy()
# cv2.rectangle(output, (startX, startY), (endX, endY),
# (0, 0, 255), 2)
# cv2.putText(output, text, (startX, startY - 20),
# cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)
# # show the output image
# cv2.imshow("Text Detection", output)
# cv2.waitKey(0)