tensorflow识别验证码

前言

tensorflow是深度学习中的一个非常好的一个框架,可以实现人工智能的很多方面,而卷积神经网络是一种非常好的一个神经网络,卷积神经网络的基础就是线性回归的问题。在我上一篇博客就讲解了用两层卷积网络来实现识别mnist手写数字的识别,而这篇博客就写一下卷积神经网络实现验证码的识别。

步骤

这里步骤可以分为两种,一种是先生成图片和标签,然后把图片和标签一起存在tfrcords文件当中,这里读取的时候就非常方便。但是我这篇博客主要讲解第二种,就是一边生成图片和标签一边训练模型。

生成图片和标签

from PIL import Image, ImageDraw, ImageFont

import random
import pandas as pd
import string

#所有可能的字符,主要是英文字母和数字

characters = string.ascii_letters+string.digits

#获取指定长度的字符串

def selectedCharacters(length):

    '''length:the number of characters to show'''

    result = ""

    for i in range(length):

        result += random.choice(characters)

    return result

def getColor():

    '''get a random color'''

    r = random.randint(0,255)

    g = random.randint(0,255)

    b = random.randint(0,255)

    return (r,g,b)

def main(size, characterNumber, bgcolor,n):

    imageTemp = Image.new('RGB', size, bgcolor)

    #设置字体和字号

    font = ImageFont.truetype('c:\\windows\\fonts\\TIMESBD.TTF', 48)

    draw = ImageDraw.Draw(imageTemp)

    text = selectedCharacters(characterNumber)
    strs = str(text) + "\t"

    width, height = draw.textsize(text, font)

    #绘制验证码字符串

    offset = 2

    for i in range(characterNumber):

        offset += width//characterNumber

        position = (offset, (size[1]-height)//2+random.randint(-10,10))

        draw.text(xy=position, text=text[i], font=font, fill=getColor())

    #对验证码图片进行简单变换,这里采用简单的点运算

    imageFinal = Image.new('RGB', size, bgcolor)

    pixelsFinal = imageFinal.load()

    pixelsTemp = imageTemp.load()

    for y in range(0, size[1]):

        offset = random.randint(-1,1)

        for x in range(0, size[0]):

            newx = x+offset

            if newx>=size[0]:

                newx = size[0]-1

            elif newx<0:

                newx = 0

            pixelsFinal[newx,y] = pixelsTemp[x,y]

    draw = ImageDraw.Draw(imageFinal)

    #绘制干扰噪点像素

    for i in range(int(size[0]*size[1]*0.07)):

        draw.point((random.randint(0,size[0]), random.randint(0,size[1])), fill=getColor())

    #绘制干扰线条

    for i in range(8):

        start = (0, random.randint(0, size[1]-1))

        end = (size[0], random.randint(0, size[1]-1))

        draw.line([start, end], fill=getColor(), width=1)

    #绘制干扰弧线

    for i in range(8):

        start = (-50, -50)

        end = (size[0]+10, random.randint(0, size[1]+10))

        draw.arc(start+end, 0, 360, fill=getColor())

    #保存验证码图片
    file_name = "I:\\crack\DATA\\Code" + "\\" + str(n) + ".jpg"
    imageFinal.save(file_name)

    # imageFinal.show()
    return strs

if __name__=="__main__":
    text_list = []
    for i in range(10000):
        text = main((200,100), 4, (255,255,255),i)
        text_list.append(text)
        print("第{}张图片,内容为:{}".format(i,text))

    data_frame = pd.DataFrame(text_list)
    pd.DataFrame.to_csv(data_frame,"I:\\crack\\DATA\\code.csv",index=0)

这里我就直接给出代码了,这里主要是随机生成四个字符的验证码图片,然后再在验证码图片上加入一些干扰,比如干扰点和干扰线条。

这里要注意把验证码字符存入csv文件当中是有问题的,有些字符放入csv文件当中会自动变成了数学的科学计数法,这里我采用的方法是在字符后面加一个\t然后就不会变成了科学计数法,但是在后面处理数据的也要把\t处理掉。

把图片数据和标签数据存入tfrecords文件

'''
把图片和图片信息存放在tfrecords文件当中
creat on September 10,2019
@Author 小明
'''
import tensorflow as tf
import os
#因为用os构造的文件列表是乱序的,所以我们手动构造文件列表
def read_image():
    file_list = []
    for i in range(10000):
        file = "I:\\crack\DATA\\Code" + "\\" + str(i) + ".jpg"
        file_list.append(file)
    print(file_list)

    #多线程读取图片
    file_queue = tf.train.string_input_producer(file_list,shuffle=False)
    read = tf.WholeFileReader()
    key, value = read.read(file_queue)
    image = tf.image.decode_jpeg(value)
    image.set_shape([100,200,3])

    image_batch = tf.train.batch([image],batch_size=10000,num_threads=1,capacity=20000)
    return image_batch

#批量读取标签数据
def read_label():
    file_queue = tf.train.string_input_producer(["I:\\crack\\DATA\\code.csv"],shuffle=False)
    reader = tf.TextLineReader()
    key, value = reader.read(file_queue)
    record = [["None"]]
    values = tf.decode_csv(value, record_defaults=record)
    label_batch = tf.train.batch([values],batch_size=10000,num_threads=1,capacity=20000)

    return label_batch

#把标签改成数字代表
def deal_with_label(label_str):

    #转换的参考模型
    consult = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

    num_lettle = dict(enumerate(list(consult)))
    letter_num = dict(zip(num_lettle.values(),num_lettle.keys()))


    array = []
    for string in label_str:
        letter_list = []
        for letter in string[0].decode("utf-8")[:4]:
            letter_list.append(letter_num[letter])
        array.append(letter_list)
    label = tf.constant(array)

    print(array)
    return label

#写入tfrecords中去
def write_tfrecords(image_batch,label_batch):
    n = 0
    write = tf.python_io.TFRecordWriter("I:\\crack\\tfrecords\\image.tfrecords")

    #把标签数据类型转换一下
    label_batch = tf.cast(label_batch,dtype=tf.uint8)

    for i in range(10000):
        #要把图片的数据列表转化成字符串才可以存进去
        image_str = image_batch[i].eval().tostring()
        label_str = label_batch[i].eval().tostring()

        example = tf.train.Example(features=tf.train.Features(feature={
                    "image": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_str])),
                    "label": tf.train.Feature(bytes_list=tf.train.BytesList(value=[label_str]))
                }))
        write.write(example.SerializeToString())
        print(n)
        n = n + 1

    write.close()




if __name__ == '__main__':
    label_batch = read_label()
    image_batch = read_image()
    with tf.Session() as sess:
        coord = tf.train.Coordinator()

        threads = tf.train.start_queue_runners(sess, coord=coord)
        #把标签值给提取出来,然后转换成对应的标签数字
        label_str = sess.run(label_batch)
        label_str = list(label_str)
        label_batchs = deal_with_label(label_str)

        write_tfrecords(image_batch,label_batchs)

        coord.request_stop()
        coord.join(threads)

这里主要是把图片数据转化成数组,标签转换成分类数据存入tfrecords文件当中。

这里要注意多线程读取图片数据和多线程读取csv文件数据的时候,要安装顺序读取,要不然图片和标签就不会对应上了,这里训练就没有意义了。

这里我存了10000个样本数据花了半天的时间,用的时间太久了,所以我本人并不推荐这种方法,因为会浪费很多时间在这上面。

主程序

这里的主程序我借鉴了网上的那种三层卷积神经网络模型,虽然可以训练出结果,但是说实话本人觉得这种模型非常的鸡肋,但是我没有那么多时间来测试别的模型,所以我这篇博客也是使用三层卷积模型的,如果你有时间可以尝试一下其他模型结构。

这里我分为三大块来讲解。1、生成图片数据,2、把图片数据转换成一维和把标签数据转换成one-hot编码。

生成图片数据

import random

import numpy as np

from PIL import Image

import matplotlib.pyplot as plt

from captcha.image import ImageCaptcha
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont

import random
import pandas as pd
import string

# 验证码基本信息

NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

LOW_CASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u','v', 'w', 'x', 'y', 'z']

UP_CASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U','V', 'W', 'X', 'Y', 'Z']

CAPTCHA_LIST = NUMBER + LOW_CASE + UP_CASE

CAPTCHA_LEN = 4

CAPTCHA_HEIGHT = 60

CAPTCHA_WIDTH = 160
size = (CAPTCHA_WIDTH,CAPTCHA_HEIGHT)
bgcolor = (255,255,255)

def gen_captcha_text_and_image():
    imageTemp = Image.new('RGB', size, (255,255,255))
    #设置字体和字号
    font = ImageFont.truetype('c:\\windows\\fonts\\TIMESBD.TTF', 44)
    draw = ImageDraw.Draw(imageTemp)
    #生成随机的字符
    characters = string.ascii_letters + string.digits
    text = ""
    for i in range(CAPTCHA_LEN):
        text += random.choice(characters)


    width, height = draw.textsize(text, font)

    # 绘制验证码字符串
    offset = 2

    for i in range(CAPTCHA_LEN):
        offset += width // CAPTCHA_LEN

        position = (offset, (size[1] - height) // 2 + random.randint(-10, 10))

        r = random.randint(0, 255)

        g = random.randint(0, 255)

        b = random.randint(0, 255)

        draw.text(xy=position, text=text[i], font=font, fill=(r,g,b))

    # 对验证码图片进行简单变换,这里采用简单的点运算

    imageFinal = Image.new('RGB', size, bgcolor)

    pixelsFinal = imageFinal.load()

    pixelsTemp = imageTemp.load()

    for y in range(0, size[1]):

        offset = random.randint(-1, 1)

        for x in range(0, size[0]):

            newx = x + offset

            if newx >= size[0]:

                newx = size[0] - 1

            elif newx < 0:

                newx = 0

            pixelsFinal[newx, y] = pixelsTemp[x, y]

    draw = ImageDraw.Draw(imageFinal)

    # 绘制干扰线条

    for i in range(8):
        start = (0, random.randint(0, size[1] - 1))

        end = (size[0], random.randint(0, size[1] - 1))

        draw.line([start, end], fill=(r,g,b), width=1)

    # 绘制干扰弧线

    # for i in range(8):
    #     start = (-50, -50)
    #
    #     end = (size[0] + 10, random.randint(0, size[1] + 10))
    #
    #     draw.arc(start + end, 0, 360, fill=(r,g,b))
    #将image类型转换成numpy类型
    captcha_image = np.array(imageFinal)

    return captcha_image,text

a = gen_captcha_text_and_image()
print(a[1])
import random

import numpy as np

from PIL import Image

import matplotlib.pyplot as plt

from captcha.image import ImageCaptcha

# 验证码基本信息

NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

LOW_CASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u','v', 'w', 'x', 'y', 'z']

UP_CASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U','V', 'W', 'X', 'Y', 'Z']

CAPTCHA_LIST = NUMBER + LOW_CASE + UP_CASE

CAPTCHA_LEN = 4

CAPTCHA_HEIGHT = 60

CAPTCHA_WIDTH = 160


# 随机生成验证码文本

def random_captcha_text(char_set=CAPTCHA_LIST, captcha_size=CAPTCHA_LEN):


    captcha_text = [random.choice(char_set) for _ in range(captcha_size)]

    return ''.join(captcha_text)


# 生成随机验证码

def gen_captcha_text_and_image(width=CAPTCHA_WIDTH, height=CAPTCHA_HEIGHT, save=None):


    image = ImageCaptcha(width=width, height=height)

    # 验证码文本

    captcha_text = random_captcha_text()

    captcha = image.generate(captcha_text)

    # 保存

    if save: image.write(captcha_text, captcha_text + '.jpg')

    captcha_image = Image.open(captcha)

    # 转化为np数组

    captcha_image = np.array(captcha_image)

    return captcha_text, captcha_image

if __name__ == '__main__':

    a = gen_captcha_text_and_image(CAPTCHA_WIDTH, CAPTCHA_HEIGHT, save=False)

    print(a[0])

    plt.imshow(a[1])

    plt.show()

这里有两种生成验证码图片的数据,本人更加趋向于第一种。这里跟上面的差不多,所以也不做过多的讲解。

转换数据

把图片转换成一通道

def convert2gray(img):

    if len(img.shape) > 2:

        img = np.mean(img, -1)

    return img

因为原本图片数据为彩色图片,三通道的,但是三通道在卷积方面计算比较麻烦,所以就把图片转换成一通道,这里图片的特征没有变,也方便计算。

把图片数据转换成一维

image.flatten() / 255

把图片转换成一维,也是为了方便计算,但是其实不转换也没什么差别。

把标签转换成one-hot编码

def text2vec(text, captcha_len=CAPTCHA_LEN, captcha_list=CAPTCHA_LIST):


    text_len = len(text)

    if text_len > captcha_len:

        raise ValueError('验证码最长4个字符')

    vector = np.zeros(captcha_len * len(captcha_list))

    for i in range(text_len): vector[captcha_list.index(text[i]) + i * len(captcha_list)] = 1

    return vector

这里就是把标签转换成one-hot编码,除了这种方法,还有一种把标签转换成对应的数字分类,然后在转换成one-hot编码,也是可以的,看你喜欢什么方法

源码

import numpy as np

from captchaCnn.captcha_create import gen_captcha_text_and_image

from captchaCnn.captcha_create import CAPTCHA_LIST, CAPTCHA_LEN, CAPTCHA_HEIGHT, CAPTCHA_WIDTH


# 图片转为黑白,3维转1维

def convert2gray(img):

    if len(img.shape) > 2:

        img = np.mean(img, -1)

    return img


# 验证码文本转为向量

def text2vec(text, captcha_len=CAPTCHA_LEN, captcha_list=CAPTCHA_LIST):


    text_len = len(text)

    if text_len > captcha_len:

        raise ValueError('验证码最长4个字符')

    vector = np.zeros(captcha_len * len(captcha_list))

    for i in range(text_len): vector[captcha_list.index(text[i]) + i * len(captcha_list)] = 1

    return vector


# 验证码向量转为文本

def vec2text(vec, captcha_list=CAPTCHA_LIST, size=CAPTCHA_LEN):

    vec_idx = vec

    text_list = [captcha_list[v] for v in vec_idx]

    return ''.join(text_list)


# 返回特定shape图片

def wrap_gen_captcha_text_and_image(shape=(CAPTCHA_HEIGHT, CAPTCHA_WIDTH, 3)):


    while True:

        t, im = gen_captcha_text_and_image()

        if im.shape == shape: return t, im


# 获取训练图片组

def next_batch(batch_count=60, width=CAPTCHA_WIDTH, height=CAPTCHA_HEIGHT):


    batch_x = np.zeros([batch_count, width * height])

    batch_y = np.zeros([batch_count, CAPTCHA_LEN * len(CAPTCHA_LIST)])

    for i in range(batch_count):

        text, image = wrap_gen_captcha_text_and_image()

        image = convert2gray(image)

        # 将图片数组一维化 同时将文本也对应在两个二维组的同一行

        batch_x[i, :] = image.flatten() / 255

        batch_y[i, :] = text2vec(text)

        # 返回该训练批次

    return batch_x, batch_y

if __name__ == '__main__':

    x, y = next_batch(batch_count=1)

    print(x, '\n\n', y)

训练模型和保存模型

这里使用的模型有5层结构,三层卷积层和全连接层和输出层。

定义生成随机权重和偏执的函数

def weight_variable(shape, w_alpha=0.01):

    initial = w_alpha * tf.random_normal(shape)

    return tf.Variable(initial)
def bias_variable(shape, b_alpha=0.1):

    initial = b_alpha * tf.random_normal(shape)

    return tf.Variable(initial)

定义卷积计算和池化的函数

def conv2d(x, w):


    return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')


# max pooling,取出区域内最大值为代表特征, 2x2pool,图片尺寸变为1/2

def max_pool_2x2(x):


    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

第一层卷积层


    image_height, image_width = size

    x_image = tf.reshape(x, shape=[-1, image_height, image_width, 1])


    # filter定义为3x3x1, 输出32个特征, 即32个filter

    w_conv1 = weight_variable([3, 3, 1, 32])

    b_conv1 = bias_variable([32])

    h_conv1 = tf.nn.relu(tf.nn.bias_add(conv2d(x_image, w_conv1), b_conv1))

    h_pool1 = max_pool_2x2(h_conv1)


    h_drop1 = tf.nn.dropout(h_pool1, keep_prob)

第一层卷积层的fillter是32,大小为 3 * 3 步数为1,这里为了防止模型过拟合,就删除一部分值。

第二层卷积层

w_conv2 = weight_variable([3, 3, 32, 64])

    b_conv2 = bias_variable([64])

    h_conv2 = tf.nn.relu(tf.nn.bias_add(conv2d(h_drop1, w_conv2), b_conv2))

    h_pool2 = max_pool_2x2(h_conv2)

    h_drop2 = tf.nn.dropout(h_pool2, keep_prob)

第二层卷积层的fillter是64,大小为3 * 3,步数为1

第三层卷积层

 w_conv3 = weight_variable([3, 3, 64, 64])

    b_conv3 = bias_variable([64])

    h_conv3 = tf.nn.relu(tf.nn.bias_add(conv2d(h_drop2, w_conv3), b_conv3))

    h_pool3 = max_pool_2x2(h_conv3)

    h_drop3 = tf.nn.dropout(h_pool3, keep_prob)

第三层卷积层和第二层的结构是一样的。

全连接层

image_height = int(h_drop3.shape[1])

    image_width = int(h_drop3.shape[2])

    w_fc = weight_variable([image_height * image_width * 64, 1024])

    b_fc = bias_variable([1024])

    h_drop3_re = tf.reshape(h_drop3, [-1, image_height * image_width * 64])

    h_fc = tf.nn.relu(tf.add(tf.matmul(h_drop3_re, w_fc), b_fc))

    h_drop_fc = tf.nn.dropout(h_fc, keep_prob)

经过全连接层之后,输出的结果就是[1024]

输出层

w_out = weight_variable([1024, len(captcha_list) * captcha_len])

    b_out = bias_variable([len(captcha_list) * captcha_len])

    y_conv = tf.add(tf.matmul(h_drop_fc, w_out), b_out)

经过输出层就得到结果形状为[-1,4*62]

梯度下降优化模型

def optimize_graph(y, y_conv):


    # 交叉熵计算loss

    # sigmod_cross适用于每个类别相互独立但不互斥,如图中可以有字母和数字

    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_conv, labels=y))

    # 最小化loss优化

    optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

    return optimizer

这里的梯度下降的学习率相对是比较高了,因为在我上一篇博客里面就两层卷积层学习率只是0.0001,而这里的学习率就是0.001,是因为我在前面的图片数据处理当中,把数据简化了很多,所以这里的学习率就要相对高一点。

准确率的计算

def accuracy_graph(y, y_conv, width=len(CAPTCHA_LIST), height=CAPTCHA_LEN):


    # 预测值
    predict = tf.reshape(y_conv, [-1, height, width])

    max_predict_idx = tf.argmax(predict, 2)

    # 标签

    label = tf.reshape(y, [-1, height, width])
    max_label_idx = tf.argmax(label, 2)

    correct_p = tf.equal(max_predict_idx, max_label_idx)

    accuracy = tf.reduce_mean(tf.cast(correct_p, tf.float32))

    return accuracy

这里准确率的计算要注意两点,第一:就是预测值和真实值的形状,都得是[None,4,62]这样才方便对照然后计算出准确率的。第二:就是tf.argmax(label, 2)这里的第二个参数一定是2,因为这里对照的数据是最里面那层,所以要填相应的下标,不然对照得出的准确率也是不靠谱的。

训练模型

def train(height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH, y_size=len(CAPTCHA_LIST) * CAPTCHA_LEN):



    acc_rate = 0.9

    # 按照图片大小申请占位符

    x = tf.placeholder(tf.float32, [None, height * width])

    y = tf.placeholder(tf.float32, [None, y_size])

    # 防止过拟合 训练时启用 测试时不启用

    keep_prob = tf.placeholder(tf.float32)

    # cnn模型

    y_conv = cnn_graph(x, keep_prob, (height, width))

    # 最优化

    optimizer = optimize_graph(y, y_conv)

    # 偏差

    accuracy = accuracy_graph(y, y_conv)

    # 启动会话.开始训练

    saver = tf.train.Saver()

    sess = tf.Session()

    sess.run(tf.global_variables_initializer())

    step = 0

    while 1:

        # 每批次64个样本

        batch_x, batch_y = next_batch(64)

        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.75})

        if step % 100 == 0:

            batch_x_test, batch_y_test = next_batch(100)

            acc = sess.run(accuracy, feed_dict={x: batch_x_test, y: batch_y_test, keep_prob: 1.0})

            print(datetime.now().strftime('%c'), ' step:', step, ' accuracy:', acc)

            # 偏差满足要求,保存模型

            if acc > acc_rate:

                model_path = "I:\\crack\\ckpt-model\\model"

                saver.save(sess, model_path, global_step=step)

                acc_rate += 0.01

                if acc_rate > 0.95: break

        step += 1

    sess.close()

if __name__ == '__main__':

    train()

这里每次都拿出64个样本出来训练,然后训练了100次后就拿100个样本出来测试一下准确率。但准确率大于0.95的时候就保存模型。

这里训练模型的时候是要训练很久的,一般前面3000次训练,准确率都是和低的,就是4000次训练过后准确率才会有所提高,我训练60000多次,准确率才达到了0.95,整整训练了30多个小时。

源码

import os

import tensorflow as tf

from datetime import datetime

from captchaCnn.captcha_process import next_batch

from captchaCnn.captcha_create import CAPTCHA_HEIGHT, CAPTCHA_WIDTH, CAPTCHA_LEN, CAPTCHA_LIST


# 随机生成权重

def weight_variable(shape, w_alpha=0.01):

    initial = w_alpha * tf.random_normal(shape)

    return tf.Variable(initial)


# 随机生成偏置项

def bias_variable(shape, b_alpha=0.1):

    initial = b_alpha * tf.random_normal(shape)

    return tf.Variable(initial)


# 局部变量线性组合,步长为1,模式‘SAME’代表卷积后图片尺寸不变,即零边距

def conv2d(x, w):


    return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')


# max pooling,取出区域内最大值为代表特征, 2x2pool,图片尺寸变为1/2

def max_pool_2x2(x):


    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


# 三层卷积神经网络计算图

def cnn_graph(x, keep_prob, size, captcha_list=CAPTCHA_LIST, captcha_len=CAPTCHA_LEN):

    # 图片reshape为4维向量

    image_height, image_width = size

    x_image = tf.reshape(x, shape=[-1, image_height, image_width, 1])



    # 第一层

    # filter定义为3x3x1, 输出32个特征, 即32个filter

    w_conv1 = weight_variable([3, 3, 1, 32])

    b_conv1 = bias_variable([32])

    # rulu激活函数

    h_conv1 = tf.nn.relu(tf.nn.bias_add(conv2d(x_image, w_conv1), b_conv1))

    # 池化

    h_pool1 = max_pool_2x2(h_conv1)

    # dropout防止过拟合

    h_drop1 = tf.nn.dropout(h_pool1, keep_prob)



    # 第二层

    w_conv2 = weight_variable([3, 3, 32, 64])

    b_conv2 = bias_variable([64])

    h_conv2 = tf.nn.relu(tf.nn.bias_add(conv2d(h_drop1, w_conv2), b_conv2))

    h_pool2 = max_pool_2x2(h_conv2)

    h_drop2 = tf.nn.dropout(h_pool2, keep_prob)



    # 第三层

    w_conv3 = weight_variable([3, 3, 64, 64])

    b_conv3 = bias_variable([64])

    h_conv3 = tf.nn.relu(tf.nn.bias_add(conv2d(h_drop2, w_conv3), b_conv3))

    h_pool3 = max_pool_2x2(h_conv3)

    h_drop3 = tf.nn.dropout(h_pool3, keep_prob)



    # 全连接层

    image_height = int(h_drop3.shape[1])

    image_width = int(h_drop3.shape[2])

    w_fc = weight_variable([image_height * image_width * 64, 1024])

    b_fc = bias_variable([1024])

    h_drop3_re = tf.reshape(h_drop3, [-1, image_height * image_width * 64])

    h_fc = tf.nn.relu(tf.add(tf.matmul(h_drop3_re, w_fc), b_fc))

    h_drop_fc = tf.nn.dropout(h_fc, keep_prob)



    # 全连接层(输出层)

    w_out = weight_variable([1024, len(captcha_list) * captcha_len])

    b_out = bias_variable([len(captcha_list) * captcha_len])

    y_conv = tf.add(tf.matmul(h_drop_fc, w_out), b_out)

    return y_conv


# 最小化loss

def optimize_graph(y, y_conv):


    # 交叉熵计算loss

    # sigmod_cross适用于每个类别相互独立但不互斥,如图中可以有字母和数字

    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_conv, labels=y))

    # 最小化loss优化

    optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

    return optimizer


# 偏差计算

def accuracy_graph(y, y_conv, width=len(CAPTCHA_LIST), height=CAPTCHA_LEN):


    # 预测值
    predict = tf.reshape(y_conv, [-1, height, width])

    max_predict_idx = tf.argmax(predict, 2)

    # 标签

    label = tf.reshape(y, [-1, height, width])
    max_label_idx = tf.argmax(label, 2)

    correct_p = tf.equal(max_predict_idx, max_label_idx)

    accuracy = tf.reduce_mean(tf.cast(correct_p, tf.float32))

    return accuracy


# 训练cnn

def train(height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH, y_size=len(CAPTCHA_LIST) * CAPTCHA_LEN):



    acc_rate = 0.9

    # 按照图片大小申请占位符

    x = tf.placeholder(tf.float32, [None, height * width])

    y = tf.placeholder(tf.float32, [None, y_size])

    # 防止过拟合 训练时启用 测试时不启用

    keep_prob = tf.placeholder(tf.float32)

    # cnn模型

    y_conv = cnn_graph(x, keep_prob, (height, width))

    # 最优化

    optimizer = optimize_graph(y, y_conv)

    # 偏差

    accuracy = accuracy_graph(y, y_conv)

    # 启动会话.开始训练

    saver = tf.train.Saver()

    sess = tf.Session()

    sess.run(tf.global_variables_initializer())

    step = 0

    while 1:

        # 每批次64个样本

        batch_x, batch_y = next_batch(64)

        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.75})

        # 每训练一百次测试一次

        if step % 100 == 0:

            batch_x_test, batch_y_test = next_batch(100)

            acc = sess.run(accuracy, feed_dict={x: batch_x_test, y: batch_y_test, keep_prob: 1.0})

            print(datetime.now().strftime('%c'), ' step:', step, ' accuracy:', acc)

            # 偏差满足要求,保存模型

            if acc > acc_rate:

                model_path = "I:\\crack\\ckpt-model\\model"

                saver.save(sess, model_path, global_step=step)

                acc_rate += 0.01

                if acc_rate > 0.95: break

        step += 1

    sess.close()

if __name__ == '__main__':

    train()

加载模型

import tensorflow as tf
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from captchaCnn.cnn_train import cnn_graph

from captchaCnn.captcha_create import gen_captcha_text_and_image

from captchaCnn.captcha_process import vec2text, convert2gray

from captchaCnn.captcha_process import CAPTCHA_LIST, CAPTCHA_WIDTH, CAPTCHA_HEIGHT, CAPTCHA_LEN


# 验证码图片转化为文本

def captcha2text(image_list, height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH):


    x = tf.placeholder(tf.float32, [None, height * width])

    keep_prob = tf.placeholder(tf.float32)

    y_conv = cnn_graph(x, keep_prob, (height, width))

    saver = tf.train.Saver()

    with tf.Session() as sess:

        saver.restore(sess, "I:\\crack\\ckpt-model\\model-16700")

        predict = tf.argmax(tf.reshape(y_conv, [-1, CAPTCHA_LEN, len(CAPTCHA_LIST)]), 2)

        vector_list = sess.run(predict, feed_dict={x: image_list, keep_prob: 1})

        vector_list = vector_list.tolist()

        text_list = [vec2text(vector) for vector in vector_list]

        return text_list

if __name__ == '__main__':

    text, image = gen_captcha_text_and_image()
    plt.imshow(image)
    plt.show()
    # image = Image.open("I:\\crack\\DATA\\Code\\1.jpg")
    # plt.imshow(image)
    # plt.show()
    # image = np.array(image)
    image = convert2gray(image)

    image = image.flatten() / 255

    pre_text = captcha2text([image])

    print(' Predict:', pre_text)

这里用保存的模型来测试验证码。我来尝试测试一张图片
在这里插入图片描述
得出的结果为
Predict: [‘KKzR’]

最后

最后总结这个模型的优缺点;优点:模型比较简单,模型层级较少,模型容易理解,可以把这个模型套用在非常多的验证码上。缺点:模型太过于拟合,就这个模型我在验证码上在加几条随机曲线就识别不出了,针对这个问题我也有两个解决办法,1、就是把模型的准确率提高,提高到0.9995~1.0。2、把每一层得到的结果都删除一半或者百分之六十,这样模型就不会过拟合了。

学会了这个模型,以后遇到所以验证码就可以直接套用上去,但是要准备大量的测试集,这个要花费大量的人力物力。

如果看了我这篇博客还有疑问的,或者是想一起探讨的可以添加我的QQ1693490575

发布了28 篇原创文章 · 获赞 14 · 访问量 6786

猜你喜欢

转载自blog.csdn.net/weixin_42304193/article/details/100978485
今日推荐