-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathdetector.py
290 lines (247 loc) · 9.28 KB
/
detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# -*- coding: utf8 -*-
"""
detector.py
author: Septimus Liu
Provide QRCode detection, text region detection and face detection.
"""
import cv2
import math
import numpy as np
import os
TEXT_LAPLACIAN_THRESHOLD = 2000
TEXT_MAX_REGION = 3
TEXT_AREA_THRESHOLD = 0.09
def text_detect(img):
"""
Detect if picture contains so many text regions
:param img: image object
:return: True - Too many text regions; False - Otherwise
"""
img = cv2.imread(img)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Pre-processing to bring out text
dilation = _preproc_txt(gray)
# Calculate area and number of text region
region, area_text = _find_txt_region(dilation, gray)
area_total = img.shape[0] * img.shape[1]
area_portion = float(area_text) / float(area_total)
return area_portion > TEXT_AREA_THRESHOLD or len(region) > TEXT_MAX_REGION
def qrcode_detect(img):
"""
Detect if picture contains QR code
:param img: image object
:return: True - QR code has been detected; False - Otherwise
"""
img = cv2.imread(img)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gb = cv2.GaussianBlur(gray, (5, 5), 0)
edges = cv2.Canny(gb, 100, 200)
th, bi_img = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)
def cv_distance(P, Q):
return int(math.sqrt(pow((P[0] - Q[0]), 2) + pow((P[1] - Q[1]),2)))
def is_timing_pattern(line):
# Remove white pixel on the begin and the end
while line[0] != 0:
line = line[1:]
while line[-1] != 0:
line = line[:-1]
# Count continuing white or black pixels
c = []
count = 1
l = line[0]
for p in line[1:]:
if p == l:
count += 1
else:
c.append(count)
count = 1
l = p
c.append(count)
# If black and white interval is few
if len(c) < 5:
return False
# Calculate variance to judge if it is a Timing Pattern
threshold = 4
return np.var(c) < threshold
def check(a, b):
# Store two endpoints of the shortest line a and b
s1_ab = ()
s2_ab = ()
# Store distance of two endpoints of the shortest line a and b
s1 = np.iinfo('i').max
s2 = s1
for ai in a:
for bi in b:
d = cv_distance(ai, bi)
if d < s2:
if d < s1:
s1_ab, s2_ab = (ai, bi), s1_ab
s1, s2 = d, s1
else:
s2_ab = (ai, bi)
s2 = d
a1, a2 = s1_ab[0], s2_ab[0]
b1, b2 = s1_ab[1], s2_ab[1]
a1 = (a1[0] + (a2[0]-a1[0])*1/14, a1[1] + (a2[1]-a1[1])*1/14)
b1 = (b1[0] + (b2[0]-b1[0])*1/14, b1[1] + (b2[1]-b1[1])*1/14)
a2 = (a2[0] + (a1[0]-a2[0])*1/14, a2[1] + (a1[1]-a2[1])*1/14)
b2 = (b2[0] + (b1[0]-b2[0])*1/14, b2[1] + (b1[1]-b2[1])*1/14)
# Pick out the shortest two lines
try:
line1 = [x[2] for x in _create_line_iterator(a1, b1, bi_img)]
if is_timing_pattern(line1):
return True
else:
line2 = [x[2] for x in _create_line_iterator(a2, b2, bi_img)]
if is_timing_pattern(line2):
return True
except Exception, err:
pass
return False
# Pick out all QRcode-like contours
contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
if hierarchy is None or contours is None or not (len(hierarchy) and len(contours)):
return False
hierarchy = hierarchy[0]
found = []
for i in range(len(contours)):
k = i
c = 0
while hierarchy[k][2] != -1:
k = hierarchy[k][2]
c += 1
# QR code may has contours nested more than 5 layers
if c >= 5:
found.append(i)
boxes = []
for i in found:
rect = cv2.minAreaRect(contours[i])
box = np.asarray(cv2.cv.BoxPoints(rect))
box = map(tuple, box.astype(int))
boxes.append(box)
# Store valid QR code endpoints
valid = set()
for i in range(len(boxes)):
for j in range(i + 1, len(boxes)):
if check(boxes[i], boxes[j]):
valid.add(i)
valid.add(j)
if len(valid) > 0 and len(valid) % 3 == 0:
return True
else:
return False
def _preproc_txt(gray):
"""
Bring out text region, make them prepared to be find
:param gray: gray-scaled image object
:return: Dilated and erosed image object
"""
sobel = cv2.Sobel(gray, cv2.CV_8U, 1, 0, ksize=3)
ret, binary = cv2.threshold(sobel, 0, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY)
ele1 = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 9))
ele2 = cv2.getStructuringElement(cv2.MORPH_RECT, (24, 6))
# Dilate and erose contours to outline the text region
dilation = cv2.dilate(binary, ele2, iterations=1)
# Remove details like table lines
erosion = cv2.erode(dilation, ele1, iterations=1)
dilation2 = cv2.dilate(erosion, ele2, iterations=1)
return dilation2
def _find_txt_region(img, gray):
"""
Find text regions and calculate their area and numbers
:param img: image object
:param gray: gray-scaled image object
:return: Region box points and their total area size
"""
area_text = 0
region = []
contours, hierarchy = cv2.findContours(img.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for i in range(len(contours)):
cnt = contours[i]
area = cv2.contourArea(cnt)
# Exclude contours that are too small
if area < 1000:
continue
rect = cv2.minAreaRect(cnt)
box = cv2.cv.BoxPoints(rect)
box = np.asarray(box)
# If the region is blurry, it may not be text region
lap = cv2.Laplacian(gray[box[1][1]:box[0][1], box[0][0]:box[3][0]], cv2.CV_64F)
if lap is None or lap.var() < TEXT_LAPLACIAN_THRESHOLD:
continue
box = box.astype(int)
x0 = box[0][0] if box[0][0] > 0 else 0
x1 = box[2][0] if box[2][0] > 0 else 0
y0 = box[0][1] if box[0][1] > 0 else 0
y1 = box[2][1] if box[2][1] > 0 else 0
height = abs(y0 - y1)
width = abs(x0 - x1)
# Exclude vertical rectangle
if height > width * 0.3:
continue
area_text += height * width
region.append(box)
return region, area_text
def _create_line_iterator(p1, p2, img):
"""
Produces and array that consists of the coordinates and intensities of each pixel in a line between two points
Parameters:
-P1: a numpy array that consists of the coordinate of the first point (x,y)
-P2: a numpy array that consists of the coordinate of the second point (x,y)
-img: the image being processed
Returns:
-it: a numpy array that consists of the coordinates and intensities of each pixel in the radii (shape: [numPixels, 3], row = [x,y,intensity])
""" # define local variables for readability
image_h = img.shape[0]
image_w = img.shape[1]
p1_x = p1[0]
p1_y = p1[1]
p2_x = p2[0]
p2_y = p2[1]
# difference and absolute difference between points
# used to calculate slope and relative location between points
d_x = p2_x - p1_x
d_y = p2_y - p1_y
d_x_a = np.abs(d_x)
d_y_a = np.abs(d_y)
# predefine numpy array for output based on distance between points
itbuffer = np.empty(shape=(np.maximum(d_y_a, d_x_a), 3), dtype=np.float32)
itbuffer.fill(np.nan)
# Obtain coordinates along the line using a form of Bresenham's algorithm
neg_y = p1_y > p2_y
neg_x = p1_x > p2_x
if p1_x == p2_x: # vertical line segment
itbuffer[:, 0] = p1_x
if neg_y:
itbuffer[:, 1] = np.arange(p1_y - 1, p1_y - d_y_a - 1, -1)
else:
itbuffer[:, 1] = np.arange(p1_y + 1, p1_y + d_y_a + 1)
elif p1_y == p2_y: # horizontal line segment
itbuffer[:, 1] = p1_y
if neg_x:
itbuffer[:, 0] = np.arange(p1_x - 1, p1_x - d_x_a - 1, -1)
else:
itbuffer[:, 0] = np.arange(p1_x + 1, p1_x + d_x_a + 1)
else: # diagonal line segment
steep_slope = d_y_a > d_x_a
if steep_slope:
slope = d_x.astype(np.float32) / d_y.astype(np.float32)
if neg_y:
itbuffer[:, 1] = np.arange(p1_y - 1, p1_y - d_y_a - 1, -1)
else:
itbuffer[:, 1] = np.arange(p1_y + 1, p1_y + d_y_a + 1)
itbuffer[:, 0] = (slope * (itbuffer[:, 1] - p1_y)).astype(np.int) + p1_x
else:
slope = d_y.astype(np.float32) / d_x.astype(np.float32)
if neg_x:
itbuffer[:, 0] = np.arange(p1_x - 1, p1_x - d_x_a - 1, -1)
else:
itbuffer[:, 0] = np.arange(p1_x + 1, p1_x + d_x_a + 1)
itbuffer[:, 1] = (slope * (itbuffer[:, 0] - p1_x)).astype(np.int) + p1_y
# Remove points outside of image
col_x = itbuffer[:, 0]
col_y = itbuffer[:, 1]
itbuffer = itbuffer[(col_x >= 0) & (col_y >= 0) & (col_x < image_w) & (col_y < image_h)]
# Get intensities from img ndarray
itbuffer[:, 2] = img[itbuffer[:, 1].astype(np.uint), itbuffer[:, 0].astype(np.uint)]
return itbuffer