毕业设计——基于OpenCV和数字图像处理的图像识别项目（含信用卡号识别、停车场车位识别、文档扫描、答题卡识别）

信用卡号识别

这次做的项目是一个信用卡数字识别，也就是给定一个信用卡，最终要输出上面的卡号，且在原图上把卡号的位置圈出来。

本质上，这个任务是一个模板的匹配问题，首先我们对于该信用卡，需要先找到数字区域，大体上的思路如下：

先使用轮廓检测算法，找到每个对象的大致轮廓
根据每个对象轮廓的长宽比例，找到中间的这一长串数字部分
然后对数字这一部分，用形态学操作使其更加突出
接下来，对于这一部分，进行轮廓检测，弄成4个框，对于每个框再轮廓检测，这样就能具体到某个数字了
对于每个数字，进行模板匹配，就能知道具体的数字

对于模板，也需要实现处理，转成灰度图，然后二值化，然后轮廓检测分割成10个小框，这样对于信用卡里面的每个数字，都要和这10个小框进行模板匹配，最终就得到结果。
部分代码：

import os
import numpy as np
from imutils import contours
import cv2
import pickle


def cv_show(title, img):
    cv2.imshow(title, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]
    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))
    resized = cv2.resize(image, dim, interpolation=inter)
    return resized


# 下面将轮廓进行排序，这是因为必须保证轮廓的顺序是0-9的顺序排列着
def sort_contours(cnts, method='left-to-right'):
    reverse = False
    i = 0
    if method == 'right-to-left' or method == 'bottom-to-top':
        reverse = True
    if method == 'top-to-bottom' or method == 'bottom-to-top':
        i = 1
    
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]  # 用一个最小矩形，把找到的形状包起来x,y,h,w
    
    # 根据每个轮廓左上角的点进行排序， 这样能保证轮廓的顺序就是0-9的数字排列顺序
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes), key=lambda x:x[1][i], reverse=reverse))
    
    return cnts, boundingBoxes 


def credit_process(credit_gray):
    
    # 顶帽操作，突出更明亮的区域
    # 初始化卷积核
    rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 3))  # 自定义卷积核的大小了
    sqKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    tophat = cv2.morphologyEx(credit_gray, cv2.MORPH_TOPHAT, rectKernel)
    
    # 水平边缘检测  
    gradX = cv2.Sobel(tophat, ddepth=cv2.CV_32F, dx=1, dy=0, ksize=-1)  # 水平边缘检测
    # gradX = cv2.convertScaleAbs(gradX)    这个操作会把一些背景边缘也给检测出来，加了一些噪声

    # 所以下面手动归一化操作
    gradX = np.absolute(gradX)
    (minVal, maxVal) = (np.min(gradX), np.max(gradX))
    gradX = (255 * ((gradX-minVal) / (maxVal-minVal)))
    gradX = gradX.astype('uint8')
    
    # 闭操作: 先膨胀， 后腐蚀  膨胀就能连成一块了
    gradX = cv2.morphologyEx(gradX, cv2.MORPH_CLOSE, rectKernel)
    
    #THRESH_OTSU会自动寻找合适的阈值，适合双峰，需把阈值参数设置为0  让opencv自动的去做判断，找合适的阈值，这样就能自动找出哪些有用，哪些没用
    thresh = cv2.threshold(gradX, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] 
    
    #再来一个闭操作
    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, sqKernel) #再来一个闭操作
    
    return thresh

def comput_contours(thresh):
    
    threshCnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = threshCnts
    
    # 找到包围数字的那四个大轮廓
    locs = []
    # 遍历轮廓
    for i, c in enumerate(cnts):
        # 计算外接矩形
        (x, y, w, h) = cv2.boundingRect(c)
        ar = w / float(h)

        # 选择合适的区域， 这里的基本都是四个数字一组
        if ar > 2.5 and ar < 4.0:
            if (w > 40 and w < 55) and (h > 10 and h < 20):
                # 符合
                locs.append((x, y, w, h))

    # 轮廓从左到右排序
    locs = sorted(locs, key=lambda x: x[0])
    return locs

def getOutput(locs, digits2Cnt, credit_card, credit_gray):
    outputs = []

    # 遍历每一个轮廓中的的数字
    for (i, (gX, gY, gW, gH)) in enumerate(locs):
        # 初始化组
        groupOutput = []

        # 根据坐标提取每一组
        group = credit_gray[gY-5:gY+gH+5, gX-5:gX+gW+5]  # 有5的一个容错长度

        # 对于这每一组，先预处理  
        # 二值化，自动寻找合适阈值，增强对比，更突出有用的部分，即数字
        group = cv2.threshold(group, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

        # 计算每一组的轮廓
        digitCnts, hierarchy = cv2.findContours(group.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        digitCnts = sort_contours(digitCnts, method='left-to-right')[0]

        # 拿到每一组的每一个数字，然后进行模板匹配
        for c in digitCnts:
            # 找到当前数值的轮廓，resize成合适的大小
            (x, y, w, h) = cv2.boundingRect(c)
            roi = group[y:y+h, x:x+w]
            roi = cv2.resize(roi, (57, 88))

            # 模板匹配
            scores = []
            for (digit, digitROI) in digits2Cnt.items():
                result = cv2.matchTemplate(roi, digitROI, cv2.TM_CCOEFF)
                (_, score, _, _) = cv2.minMaxLoc(result)
                scores.append(score)

            # 得到合适的数字
            # 这是个列表，存储的每个小组里面的数字识别结果
            groupOutput.append(str(np.argmax(scores)))

        # 画出来
        cv2.rectangle(credit_card, (gX - 5, gY - 5), (gX + gW + 5, gY + gH + 5), (0, 0, 255), 1)
        cv2.putText(credit_card, "".join(groupOutput), (gX, gY - 15), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 0, 255), 2)

        # 合并到最后的结果里面
        outputs.extend(groupOutput)
    return outputs

停车场车位识别

部分源码

import os
import torch
import torch.nn as nn
from torchvision.models import resnet34
import torch.optim as optim

from utils.model_utils import get_dataloader, data_transform_pretrain, model_train

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def train_model():
    # 获取dataloader
    data_root = os.getcwd()
    image_path = os.path.join(data_root, "train_data")
    train_data_path = os.path.join(image_path, "train")
    val_data_path = os.path.join(image_path, "test")
    train_loader, validat_loader, train_num, val_num = get_dataloader(train_data_path, val_data_path,
                                                                      data_transform_pretrain, batch_size=8)

    # 创建模型 注意这里没指定类的个数，默认是1000类
    net = resnet34()
    model_weight_path = 'saved_model_weight/resnet34_pretrain_ori_low_torch_version.pth'

    # 使用预训练的参数，然后进行finetune
    net.load_state_dict(torch.load(model_weight_path, map_location='cpu'))

    # 改变fc layer structure  把fc的输出维度改为2
    in_channel = net.fc.in_features
    net.fc = nn.Linear(in_channel, 2)
    net.to(device)

    # 模型训练配置
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    epochs = 30
    save_path = "saved_model_weight/resnet34_pretrain.pth"
    best_acc = 0.
    train_steps = len(train_loader)

    model_train(net, train_loader, validat_loader, epochs, device, optimizer, loss_function, train_steps, val_num,
                save_path, best_acc)

文本扫描OCR识别

部分源码：

import os
import cv2
import numpy as np
from PIL import Image
import pytesseract


def cv_show(title, img):
    cv2.imshow(title, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    """resize函数之所以自定义，是可以只指定高度或者高度
        原理就是： 
        如果只指定某一个维度，图片的高度和宽度都会同比例缩小，比如指定height，那就宽度变成height/float(h)*w, 高度为height， 指定width同理
        如果都指定， 那么就按照实际的大小resize
    """
    dim = None
    (h, w) = image.shape[:2]
    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w*r), height)
    else:
        r = width / float(w)
        dim = (width, int(h*r))
    resized = cv2.resize(image, dim, interpolation=inter)
    return resized

def order_points(pts):
    # 一共4个坐标点
    rect = np.zeros((4, 2), dtype="float32")
    
    # 下面这个操作，是因为这四个点目前是乱序的，下面通过了一种巧妙的方式来找到对应位置
    # 左上和右下， 对于左上的这个点，(x,y)坐标和会最小， 对于右下这个点，(x,y)坐标和会最大，所以坐标求和，然后找最小和最大位置就是了
    # 按照顺序找到对应坐标0123分别是左上， 右上， 右下，左下
    s = pts.sum(axis=1)
    # 拿到左上和右下
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
    
    # 右上和左下， 对于右上这个点，(x,y)坐标差会最小，因为x很大，y很小， 而左下这个点， x很小，y很大，所以坐标差会很大
    # 拿到右上和左下
    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    return rect

def four_point_transform(image, pts):
    # 拿到正确的左上，右上， 右下，左下四个坐标点的位置
    rect = order_points(pts)
    (tl, tr, br, bl) = rect
    
    # 计算输入的w和h值 这里就是宽度和高度，计算方式就是欧几里得距离，坐标对应位置相减平方和开根号
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))
    
    # 变换后对应坐标位置   
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype = "float32")

    # 计算变换矩阵  变换矩阵这里，要有原始的图像的四个点的坐标， 变换之后的四个点的对应坐标，然后求一个线性矩阵，相当于每个点通过一个线性映射
    # 到新的图片里面去。那么怎么求线性矩阵呢？  
    # 其实解线性方程组， 原始图片四个点坐标矩阵A(4, 2)， 新图片四个点坐标矩阵B(4, 2)， 在列这个维度上扩充1维1
    # A变成了(4, 3), B也是(4, 3)， 每个点相当于(x, y, 1) 
    # B = WA， 其中W是3*3的矩阵，A的每个点是3*1， B的每个点是3*1
    # W矩阵初始化[[h11, h12, h13], [h21, h22, h23], [h31, h32, 1]]  这里面8个未知数，通过上面给出的4个点
    # 所以这里A， B四个点的坐标都扩充了1列，已知A,B四个点的坐标，这里去求参数，解8个线性方程组得到W，就是cv2.getPerspectiveTransform干的事情
    # 这个文章说的不错：https://blog.csdn.net/overflow_1/article/details/80330835
    W = cv2.getPerspectiveTransform(rect, dst)
    # 有了透视矩阵W, 对于原始图片中的每个坐标， 都扩充1列，然后与W乘， 就得到了在变换之后图片的坐标点(x, y, z), 然后把第三列给去掉(x/z, y/z)就是最终的坐标
    warped = cv2.warpPerspective(image, W, (maxWidth, maxHeight))

    # 返回变换后结果
    return warped

def get_screenCnt(cnts):
    # 下面要获取到最外围的大轮廓， 因为我们只需要这个大轮廓里面的所有东西， 外面黑色的背景其实不需要
    for c in cnts:
        # 计算轮廓近似
        peri = cv2.arcLength(c, True)
        # C表示输入的点集
        # epsilon表示从原始轮廓到近似轮廓的最大距离
        # True表示封闭
        approx = cv2.approxPolyDP(c, 0.02*peri, True)
        
        # 4个点的时候，说明是最外面的大轮廓，此时把这个拿出来
        if len(approx) == 4:
            screenCnt = approx
            break

    return screenCnt

def get_rotated_img(ref):
    rows, cols = ref.shape[:2]
    center = (cols/2, rows/2)  # 以图像中心为旋转中心
    angle = 90                 # 顺时针旋转90°
    scale = 1                  # 等比例旋转，即旋转后尺度不变    
     
    M = cv2.getRotationMatrix2D(center, angle, scale)
    rotated_img = cv2.warpAffine(ref, M, (cols, rows))

    return rotated_img

def ocr_check(image, preprocess='thresh'):

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # 滤波或者二值化下，使得图片更加清楚
    
    if preprocess == "thresh":
        gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

    if preprocess == "blur":
        gray = cv2.medianBlur(gray, 3)

    filename = "{}.png".format(os.getpid())
    cv2.imwrite(filename, gray)

    img = Image.open(filename)
    text = pytesseract.image_to_string(img)

    return text

答题卡识别判卷

import numpy as np
import cv2
from utils import four_point_transform, sort_contours, get_dotCnts, filter_cnts, get_dect_res


def answer_dect(img, ANSWER_KEY):

    # 转成灰度图
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # 高斯滤波
    blured = cv2.GaussianBlur(gray, (5, 5), 0)

    # 边缘检测
    edged = cv2.Canny(blured, 75, 200)

    # 轮廓检测
    # 轮廓检测这里应该在边缘检测的结果上进行，才能锁定答题区域， 如果换成灰度图，这里检测不到答题卡的轮廓
    cnts, _ = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # 获取答题卡边框的四个顶点坐标
    dotCnts = get_dotCnts(cnts)

    # 透视变换
    warped = four_point_transform(gray, dotCnts.reshape(4, 2))

    # 圆圈的轮廓检测
    # 在轮廓检测之前，先通过阈值把图像处理成黑白图像，这样后面找圆圈的轮廓才能更加清晰
    thresh = cv2.threshold(warped, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]

    # 检测每个圆圈轮廓
    cnts, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # 过滤轮廓
    questionCnts = filter_cnts(cnts)

    # 接下来， 就是把这些圆圈排序，首先需要先按照每个题排列好，不同题的x坐标一致， y坐标是从小到大
    questionCnts = sort_contours(questionCnts, method='top-to-bottom')[0]

    # 遍历每一题的每个圆圈，获取最终的结果及图像
    correct, warped = get_dect_res(questionCnts, ANSWER_KEY, warped, thresh)


    # 结果可视化
    exam_img = warped.copy()
    score = (correct / 5) * 100
    print("[INFO] score: {:.2f}%".format(score))
    cv2.putText(exam_img, "{:.2f}%".format(score), (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)

    return exam_img

完整源码私信获取

毕业设计——基于OpenCV和数字图像处理的图像识别项目（含信用卡号识别、停车场车位识别、文档扫描、答题卡识别）

信用卡号识别

停车场车位识别

文本扫描OCR识别

答题卡识别判卷

猜你喜欢