Skip to content

Commit fcaff0c

Browse files
committed
基于tensorflow的验证码识别
1 parent 9663c7c commit fcaff0c

File tree

4 files changed

+414
-0
lines changed

4 files changed

+414
-0
lines changed

README.md

+18
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,24 @@ nice_download.py是多线程模式,所以去除断点下载功能,否则维护
1818
```
1919
![](https://github.com/LockGit/Py/blob/master/img/download.gif)
2020

21+
22+
### 基于tensorflow的验证码识别
23+
```
24+
依赖:
25+
pip install tensorflow
26+
pip install numpy
27+
28+
0x01,cd tensorflow
29+
0x02,模型训练:python train.py
30+
0x03,验证验证:python cnn_test.py
31+
32+
已有大多相关案例,测试相关总结与截图如下:
33+
```
34+
![](https://github.com/LockGit/Hacking/blob/master/img/cnn_test.png)
35+
+[相关截图](https://github.com/LockGit/Hacking#基于机器学习tensorflow的复杂验证码识别)
36+
总结文档:[基于机器学习(TensorFlow)的复杂验证码识别.pdf](https://github.com/LockGit/Hacking/blob/master/res/doc/基于机器学习(TensorFlow)的复杂验证码识别.pdf)
37+
38+
2139
### ac.py 字符串搜索算法(tire树+AC自动机)
2240
```
2341
学习记录:

tensorflow/cnn_test.py

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/usr/bin/env python
2+
# encoding: utf-8
3+
# author: Lock
4+
# time: 2018/3/18 17:26
5+
6+
import tensorflow as tf
7+
from train import cnn_graph
8+
from train import get_random_captcha_text_and_image
9+
from train import vec2text, convert2gray
10+
from create_captcha_img import CAPTCHA_LIST, CAPTCHA_WIDTH, CAPTCHA_HEIGHT, CAPTCHA_LEN
11+
12+
13+
def captcha_to_text(image_list, height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH):
14+
'''
15+
验证码图片转化为文本
16+
:param image_list:
17+
:param height:
18+
:param width:
19+
:return:
20+
'''
21+
x = tf.placeholder(tf.float32, [None, height * width])
22+
keep_prob = tf.placeholder(tf.float32)
23+
y_conv = cnn_graph(x, keep_prob, (height, width))
24+
saver = tf.train.Saver()
25+
with tf.Session() as sess:
26+
saver.restore(sess, tf.train.latest_checkpoint('.'))
27+
predict = tf.argmax(tf.reshape(y_conv, [-1, CAPTCHA_LEN, len(CAPTCHA_LIST)]), 2)
28+
vector_list = sess.run(predict, feed_dict={x: image_list, keep_prob: 1})
29+
vector_list = vector_list.tolist()
30+
text_list = [vec2text(vector) for vector in vector_list]
31+
return text_list[0]
32+
33+
34+
def multi_test(height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH):
35+
x = tf.placeholder(tf.float32, [None, height * width])
36+
keep_prob = tf.placeholder(tf.float32)
37+
y_conv = cnn_graph(x, keep_prob, (height, width))
38+
saver = tf.train.Saver()
39+
with tf.Session() as sess:
40+
saver.restore(sess, tf.train.latest_checkpoint('.'))
41+
while 1:
42+
text, image = get_random_captcha_text_and_image()
43+
image = convert2gray(image)
44+
image = image.flatten() / 255
45+
image_list = [image]
46+
predict = tf.argmax(tf.reshape(y_conv, [-1, CAPTCHA_LEN, len(CAPTCHA_LIST)]), 2)
47+
vector_list = sess.run(predict, feed_dict={x: image_list, keep_prob: 1})
48+
vector_list = vector_list.tolist()
49+
text_list = [vec2text(vector) for vector in vector_list]
50+
pre_text = text_list[0]
51+
flag = u'错误'
52+
if text == pre_text:
53+
flag = u'正确'
54+
print u"实际值(actual):%s, 预测值(predict):%s, 预测结果:%s" % (text, pre_text, flag,)
55+
56+
57+
if __name__ == '__main__':
58+
try:
59+
# 多个测试
60+
multi_test()
61+
exit()
62+
63+
text, image = get_random_captcha_text_and_image()
64+
image = convert2gray(image)
65+
image = image.flatten() / 255
66+
pre_text = captcha_to_text([image])
67+
flag = u'错误'
68+
if text == pre_text:
69+
flag = u'正确'
70+
print u"实际值(actual):%s, 预测值(predict):%s, 预测结果:%s" % (text, pre_text, flag,)
71+
except KeyboardInterrupt as e:
72+
print e.message

tensorflow/create_captcha_img.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/usr/bin/env python
2+
# encoding: utf-8
3+
# author: Lock
4+
# time: 2018/3/18 13:25
5+
6+
import string
7+
import random
8+
from captcha.image import ImageCaptcha
9+
from PIL import Image
10+
import numpy as np
11+
import os
12+
13+
CAPTCHA_HEIGHT = 60 # 验证码高度
14+
CAPTCHA_WIDTH = 160 # 验证码宽度
15+
CAPTCHA_LEN = 4 # 验证码长度
16+
# CAPTCHA_LIST = [str(i) for i in range(0, 10)] + list(string.ascii_letters) # 验证码字符列表
17+
CAPTCHA_LIST = [str(i) for i in range(0, 10)] # 验证码字符列表,改小一点的访问,提高速度
18+
19+
20+
def get_random_captcha_text(char_set=CAPTCHA_LIST, length=CAPTCHA_LEN):
21+
captcha_text = [random.choice(char_set) for _ in range(length)]
22+
return ''.join(captcha_text)
23+
24+
25+
def get_random_captcha_text_and_image(width=CAPTCHA_WIDTH, height=CAPTCHA_HEIGHT, save=None):
26+
image = ImageCaptcha(width=width, height=height)
27+
captcha_text = get_random_captcha_text()
28+
captcha = image.generate(captcha_text)
29+
if save:
30+
image.write(captcha_text, 'image/' + captcha_text + '.jpg')
31+
captcha_image = Image.open(captcha)
32+
# 转化为np数组
33+
captcha_image_np = np.array(captcha_image)
34+
return captcha_text, captcha_image_np
35+
36+
37+
if __name__ == "__main__":
38+
if os.path.exists('image') is False:
39+
os.mkdir('image')
40+
41+
while 1:
42+
text, np_data = get_random_captcha_text_and_image(CAPTCHA_WIDTH, CAPTCHA_HEIGHT, 1)
43+
print text

0 commit comments

Comments
 (0)