-
Notifications
You must be signed in to change notification settings - Fork 0
/
Code
336 lines (252 loc) · 12.5 KB
/
Code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
import os
import os.path
import cv2
import glob
import imutils
CAPTCHA_IMAGE_FOLDER = "D:\Machine Learning Projects\Captcha Cracking\solving_captchas_code_examples\generated_captcha_images"
OUTPUT_FOLDER = "D:\Machine Learning Projects\Captcha Cracking\solving_captchas_code_examples\extracted_captcha_images"
# Get a list of all the captcha images we need to process
captcha_image_files = glob.glob(os.path.join(CAPTCHA_IMAGE_FOLDER, "*"))
counts = {}
# loop over the image paths
for (i, captcha_image_file) in enumerate(captcha_image_files):
print("[INFO] processing image {}/{}".format(i + 1, len(captcha_image_files)))
# Since the filename contains the captcha text (i.e. "2A2X.png" has the text "2A2X"),
# grab the base filename as the text
filename = os.path.basename(captcha_image_file)
captcha_correct_text = os.path.splitext(filename)[0]
# Load the image and convert it to grayscale
image = cv2.imread(captcha_image_file)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# gray = cv2.resize(gray, (100, 100))
# Add some extra padding around the image
gray = cv2.copyMakeBorder(gray, 8, 8, 8, 8, cv2.BORDER_REPLICATE)
# threshold the image (convert it to pure black and white)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
# find the contours (continuous blobs of pixels) the image
contours = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Hack for compatibility with different OpenCV versions
contours = contours[1] if imutils.is_cv2() else contours[0]
letter_image_regions = []
# Now we can loop through each of the four contours and extract the letter
# inside of each one
for contour in contours:
# Get the rectangle that contains the contour
(x, y, w, h) = cv2.boundingRect(contour)
# Compare the width and height of the contour to detect letters that
# are conjoined into one chunk
if w / h > 1.25:
# This contour is too wide to be a single letter!
# Split it in half into two letter regions!
half_width = int(w / 2)
letter_image_regions.append((x, y, half_width, h))
letter_image_regions.append((x + half_width, y, half_width, h))
else:
# This is a normal letter by itself
letter_image_regions.append((x, y, w, h))
# If we found more or less than 4 letters in the captcha, our letter extraction
# didn't work correcly. Skip the image instead of saving bad training data!
if len(letter_image_regions) != 4:
continue
# Sort the detected letter images based on the x coordinate to make sure
# we are processing them from left-to-right so we match the right image
# with the right letter
letter_image_regions = sorted(letter_image_regions, key=lambda x: x[0])
# Save out each letter as a single image
for letter_bounding_box, letter_text in zip(letter_image_regions, captcha_correct_text):
# Grab the coordinates of the letter in the image
x, y, w, h = letter_bounding_box
# Extract the letter from the original image with a 2-pixel margin around the edge
letter_image = gray[y - 2:y + h + 2, x - 2:x + w + 2]
# Get the folder to save the image in
save_path = os.path.join(OUTPUT_FOLDER, letter_text)
# if the output directory does not exist, create it
if not os.path.exists(save_path):
os.makedirs(save_path)
# write the letter image to a file
count = counts.get(letter_text, 1)
p = os.path.join(save_path, "{}.png".format(str(count).zfill(6)))
cv2.imwrite(p, letter_image)
# increment the count for the current key
counts[letter_text] = count + 1
import imutils
import cv2
def resize_to_fit(image, width, height):
"""
A helper function to resize an image to fit within a given size
:param image: image to resize
:param width: desired width in pixels
:param height: desired height in pixels
:return: the resized image
"""
# grab the dimensions of the image, then initialize
# the padding values
(h, w) = image.shape[:2]
# if the width is greater than the height then resize along
# the width
if w > h:
image = imutils.resize(image, width=width)
# otherwise, the height is greater than the width so resize
# along the height
else:
image = imutils.resize(image, height=height)
# determine the padding values for the width and height to
# obtain the target dimensions
padW = int((width - image.shape[1]) / 2.0)
padH = int((height - image.shape[0]) / 2.0)
# pad the image then apply one more resizing to handle any
# rounding issues
image = cv2.copyMakeBorder(image, padH, padH, padW, padW,
cv2.BORDER_REPLICATE)
image = cv2.resize(image, (width, height))
# return the pre-processed image
return image
import cv2
import pickle
import os.path
import numpy as np
from imutils import paths
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Flatten, Dense
from helpers import resize_to_fit
LETTER_IMAGES_FOLDER = "D:\Machine Learning Projects\Captcha Cracking\solving_captchas_code_examples\extracted_captcha_images"
MODEL_FILENAME = "D:\Machine Learning Projects\Captcha Cracking\solving_captchas_code_examples\captcha_model.hdf5"
MODEL_LABELS_FILENAME = "D:\Machine Learning Projects\Captcha Cracking\solving_captchas_code_examples\model_labels.dat"
# initialize the data and labels
data = []
labels = []
# loop over the input images
for image_file in paths.list_images(LETTER_IMAGES_FOLDER):
# Load the image and convert it to grayscale
image = cv2.imread(image_file)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Resize the letter so it fits in a 20x20 pixel box
image = resize_to_fit(image, 20, 20)
# Add a third channel dimension to the image to make Keras happy
image = np.expand_dims(image, axis=2)
# Grab the name of the letter based on the folder it was in
label = image_file.split(os.path.sep)[-2]
# Add the letter image and it's label to our training data
data.append(image)
labels.append(label)
# scale the raw pixel intensities to the range [0, 1] (this improves training)
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
# Split the training data into separate train and test sets
(X_train, X_test, Y_train, Y_test) = train_test_split(data, labels, test_size=0.25, random_state=0)
# Convert the labels (letters) into one-hot encodings that Keras can work with
lb = LabelBinarizer().fit(Y_train)
Y_train = lb.transform(Y_train)
Y_test = lb.transform(Y_test)
# Save the mapping from labels to one-hot encodings.
# We'll need this later when we use the model to decode what it's predictions mean
with open(MODEL_LABELS_FILENAME, "wb") as f:
pickle.dump(lb, f)
# Build the neural network!
model = Sequential()
# First convolutional layer with max pooling
model.add(Conv2D(20, (5, 5), padding="same", input_shape=(20, 20, 1), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
# Second convolutional layer with max pooling
model.add(Conv2D(50, (5, 5), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
# Hidden layer with 500 nodes
model.add(Flatten())
model.add(Dense(500, activation="relu"))
# Output layer with 32 nodes (one for each possible letter/number we predict)
model.add(Dense(32, activation="softmax"))
# Ask Keras to build the TensorFlow model behind the scenes
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
# Train the neural network
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size=32, epochs=10, verbose=1)
# Save the trained model to disk
model.save(MODEL_FILENAME)
from keras.models import load_model
from helpers import resize_to_fit
from imutils import paths
import numpy as np
import imutils
import cv2
import pickle
MODEL_FILENAME = "D:\Machine Learning Projects\Captcha Cracking\solving_captchas_code_examples\captcha_model.hdf5"
MODEL_LABELS_FILENAME = "D:\Machine Learning Projects\Captcha Cracking\solving_captchas_code_examples\model_labels.dat"
CAPTCHA_IMAGE_FOLDER = "D:\Machine Learning Projects\Captcha Cracking\solving_captchas_code_examples\generated_captcha_images"
# Load up the model labels (so we can translate model predictions to actual letters)
with open(MODEL_LABELS_FILENAME, "rb") as f:
lb = pickle.load(f)
# Load the trained neural network
model = load_model(MODEL_FILENAME)
# Grab some random CAPTCHA images to test against.
# In the real world, you'd replace this section with code to grab a real
# CAPTCHA image from a live website.
captcha_image_files = list(paths.list_images(CAPTCHA_IMAGE_FOLDER))
captcha_image_files = np.random.choice(captcha_image_files, size=(10,), replace=False)
# loop over the image paths
for image_file in captcha_image_files:
# Load the image and convert it to grayscale
image = cv2.imread(image_file)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Add some extra padding around the image
image = cv2.copyMakeBorder(image, 20, 20, 20, 20, cv2.BORDER_REPLICATE)
# threshold the image (convert it to pure black and white)
thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
# find the contours (continuous blobs of pixels) the image
contours = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Hack for compatibility with different OpenCV versions
contours = contours[1] if imutils.is_cv2() else contours[0]
letter_image_regions = []
# Now we can loop through each of the four contours and extract the letter
# inside of each one
for contour in contours:
# Get the rectangle that contains the contour
(x, y, w, h) = cv2.boundingRect(contour)
# Compare the width and height of the contour to detect letters that
# are conjoined into one chunk
if w / h > 1.25:
# This contour is too wide to be a single letter!
# Split it in half into two letter regions!
half_width = int(w / 2)
letter_image_regions.append((x, y, half_width, h))
letter_image_regions.append((x + half_width, y, half_width, h))
else:
# This is a normal letter by itself
letter_image_regions.append((x, y, w, h))
# If we found more or less than 4 letters in the captcha, our letter extraction
# didn't work correcly. Skip the image instead of saving bad training data!
if len(letter_image_regions) != 4:
continue
# Sort the detected letter images based on the x coordinate to make sure
# we are processing them from left-to-right so we match the right image
# with the right letter
letter_image_regions = sorted(letter_image_regions, key=lambda x: x[0])
# Create an output image and a list to hold our predicted letters
output = cv2.merge([image] * 3)
predictions = []
# loop over the lektters
for letter_bounding_box in letter_image_regions:
# Grab the coordinates of the letter in the image
x, y, w, h = letter_bounding_box
# Extract the letter from the original image with a 2-pixel margin around the edge
letter_image = image[y - 2:y + h + 2, x - 2:x + w + 2]
# Re-size the letter image to 20x20 pixels to match training data
letter_image = resize_to_fit(letter_image, 20, 20)
# Turn the single image into a 4d list of images to make Keras happy
letter_image = np.expand_dims(letter_image, axis=2)
letter_image = np.expand_dims(letter_image, axis=0)
# Ask the neural network to make a prediction
prediction = model.predict(letter_image)
# Convert the one-hot-encoded prediction back to a normal letter
letter = lb.inverse_transform(prediction)[0]
predictions.append(letter)
# draw the prediction on the output image
cv2.rectangle(output, (x - 2, y - 2), (x + w + 4, y + h + 4), (0, 255, 0), 1)
cv2.putText(output, letter, (x - 5, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2)
# Print the captcha's text
captcha_text = "".join(predictions)
print("CAPTCHA text is: {}".format(captcha_text))
# Show the annotated image
cv2.imshow("Output", output)
cv2.waitKey()