基于tensorflow的验证码识别

LockGit · LockGit · commit fcaff0cad7a5 · 2018-04-18T18:07:21.000+08:00
diff --git a/README.md b/README.md
@@ -18,6 +18,24 @@ nice_download.py是多线程模式,所以去除断点下载功能，否则维护
 ```
 ![](https://github.com/LockGit/Py/blob/master/img/download.gif)
 
+
+### 基于tensorflow的验证码识别
+```
+依赖:
+pip install tensorflow
+pip install numpy
+
+0x01,cd tensorflow
+0x02,模型训练：python train.py
+0x03,验证验证：python cnn_test.py
+
+已有大多相关案例，测试相关总结与截图如下:
+```
+![](https://github.com/LockGit/Hacking/blob/master/img/cnn_test.png)
++[相关截图](https://github.com/LockGit/Hacking#基于机器学习tensorflow的复杂验证码识别)
+总结文档：[基于机器学习(TensorFlow)的复杂验证码识别.pdf](https://github.com/LockGit/Hacking/blob/master/res/doc/基于机器学习(TensorFlow)的复杂验证码识别.pdf)
+
+
 ### ac.py 字符串搜索算法（tire树+AC自动机)
 ```
 学习记录:
diff --git a/tensorflow/cnn_test.py b/tensorflow/cnn_test.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# encoding: utf-8
+# author: Lock
+# time: 2018/3/18 17:26
+
+import tensorflow as tf
+from train import cnn_graph
+from train import get_random_captcha_text_and_image
+from train import vec2text, convert2gray
+from create_captcha_img import CAPTCHA_LIST, CAPTCHA_WIDTH, CAPTCHA_HEIGHT, CAPTCHA_LEN
+
+
+def captcha_to_text(image_list, height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH):
+    '''
+    验证码图片转化为文本
+    :param image_list:
+    :param height:
+    :param width:
+    :return:
+    '''
+    x = tf.placeholder(tf.float32, [None, height * width])
+    keep_prob = tf.placeholder(tf.float32)
+    y_conv = cnn_graph(x, keep_prob, (height, width))
+    saver = tf.train.Saver()
+    with tf.Session() as sess:
+        saver.restore(sess, tf.train.latest_checkpoint('.'))
+        predict = tf.argmax(tf.reshape(y_conv, [-1, CAPTCHA_LEN, len(CAPTCHA_LIST)]), 2)
+        vector_list = sess.run(predict, feed_dict={x: image_list, keep_prob: 1})
+        vector_list = vector_list.tolist()
+        text_list = [vec2text(vector) for vector in vector_list]
+        return text_list[0]
+
+
+def multi_test(height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH):
+    x = tf.placeholder(tf.float32, [None, height * width])
+    keep_prob = tf.placeholder(tf.float32)
+    y_conv = cnn_graph(x, keep_prob, (height, width))
+    saver = tf.train.Saver()
+    with tf.Session() as sess:
+        saver.restore(sess, tf.train.latest_checkpoint('.'))
+        while 1:
+            text, image = get_random_captcha_text_and_image()
+            image = convert2gray(image)
+            image = image.flatten() / 255
+            image_list = [image]
+            predict = tf.argmax(tf.reshape(y_conv, [-1, CAPTCHA_LEN, len(CAPTCHA_LIST)]), 2)
+            vector_list = sess.run(predict, feed_dict={x: image_list, keep_prob: 1})
+            vector_list = vector_list.tolist()
+            text_list = [vec2text(vector) for vector in vector_list]
+            pre_text = text_list[0]
+            flag = u'错误'
+            if text == pre_text:
+                flag = u'正确'
+            print u"实际值(actual):%s, 预测值(predict):%s, 预测结果:%s" % (text, pre_text, flag,)
+
+
+if __name__ == '__main__':
+    try:
+        # 多个测试
+        multi_test()
+        exit()
+
+        text, image = get_random_captcha_text_and_image()
+        image = convert2gray(image)
+        image = image.flatten() / 255
+        pre_text = captcha_to_text([image])
+        flag = u'错误'
+        if text == pre_text:
+            flag = u'正确'
+        print u"实际值(actual):%s, 预测值(predict):%s, 预测结果:%s" % (text, pre_text, flag,)
+    except KeyboardInterrupt as e:
+        print e.message
diff --git a/tensorflow/create_captcha_img.py b/tensorflow/create_captcha_img.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+# encoding: utf-8
+# author: Lock
+# time: 2018/3/18 13:25
+
+import string
+import random
+from captcha.image import ImageCaptcha
+from PIL import Image
+import numpy as np
+import os
+
+CAPTCHA_HEIGHT = 60  # 验证码高度
+CAPTCHA_WIDTH = 160  # 验证码宽度
+CAPTCHA_LEN = 4  # 验证码长度
+# CAPTCHA_LIST = [str(i) for i in range(0, 10)] + list(string.ascii_letters)  # 验证码字符列表
+CAPTCHA_LIST = [str(i) for i in range(0, 10)]  # 验证码字符列表,改小一点的访问,提高速度
+
+
+def get_random_captcha_text(char_set=CAPTCHA_LIST, length=CAPTCHA_LEN):
+    captcha_text = [random.choice(char_set) for _ in range(length)]
+    return ''.join(captcha_text)
+
+
+def get_random_captcha_text_and_image(width=CAPTCHA_WIDTH, height=CAPTCHA_HEIGHT, save=None):
+    image = ImageCaptcha(width=width, height=height)
+    captcha_text = get_random_captcha_text()
+    captcha = image.generate(captcha_text)
+    if save:
+        image.write(captcha_text, 'image/' + captcha_text + '.jpg')
+    captcha_image = Image.open(captcha)
+    # 转化为np数组
+    captcha_image_np = np.array(captcha_image)
+    return captcha_text, captcha_image_np
+
+
+if __name__ == "__main__":
+    if os.path.exists('image') is False:
+        os.mkdir('image')
+
+    while 1:
+        text, np_data = get_random_captcha_text_and_image(CAPTCHA_WIDTH, CAPTCHA_HEIGHT, 1)
+        print text
diff --git a/tensorflow/train.py b/tensorflow/train.py