数字图像处理(名字的分割与识别)

内容

利用所学图像分割技术,将图片中包含的名字进行分割与识别

操作步骤

  1. 利用基于边界的分割技术,提取图像边缘信息,然后根据灰度分布,提供数个可能存在名字的选区框
  2. 训练一个能识别出图像中是否有名字的卷积神经网络
  3. 去重后,若CNN识别到选区中有名字,则显示该选区框

1. 提取边缘,产生可能存在名字的选区框

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import PIL.Image as im
from function import*
dim = 256
img = plt.imread('2.jpg')
img = img/255.0
img = tf.image.resize(img, (dim,dim))
plt.title("original picture")
plt.imshow(img)
<matplotlib.image.AxesImage at 0x1968646a148>

在这里插入图片描述

# 使用log模板,对原图进行卷积,提取边缘
img_in = np.zeros((1, dim, dim, 1)) #首位1表示1张图片,末位1表示单通道
img_in[0,:,:,0] = img[:,:,0] #生成灰度图像
fil = np.zeros((5, 5, 1, 1)) #滤波器
fil[:,:,0,0]=[[0,  0, -1,  0, 0], 
   [0, -1, -2, -1, 0],
  [-1, -2, 16, -2, -1],
  [0, -1, -2, -1, 0],
  [0, 0, -1, 0, 0 ]]
img_out = tf.nn.conv2d(input = img_in, filters = fil, strides = 1, padding = 'SAME')
img_out = np.clip(img_out, 0, 1)
ccc = np.zeros((dim,dim,3))
ccc[:,:,0] = img_out[0,:,:,0]
ccc[:,:,1] = img_out[0,:,:,0]
ccc[:,:,2] = img_out[0,:,:,0]
plt.imshow(ccc)
# 产生可能存在名字的选区,并将这些“提议”选区显示出来
co = []
mat = img_out[0,:,:,0]
gray_mean = np.mean(mat)
gray_std = np.var(mat)
for x in range(50, 230, 30):
    for y in range(50, 230, 30):
        mean = np.mean(mat[x-50:x+50, y-50:y+50])
        if(mean - gray_mean > 0):
            co.append((x,y))
RGBmat = creat_RGBmat(ccc,co)
plt.figure(1)
plt.title("proposal blocks")
plt.imshow(RGBmat)

plt.figure(2)
picture_test = creat_X_test(img, co, 100)
print("已分割出选区数:",len(co))
print("block shape:",picture_test.shape)
for i in range(picture_test.shape[0]):
    plt.subplot(2,4,i+1)
    plt.imshow(picture_test[i])
    plt.axis('off')

2. 训练一个卷积神经网络

xx,yy = load_picture()
xx_train, yy_train, xx_test, yy_test = pre_process(xx, yy, 6)
xx_train = tf.image.resize(xx_train, (100,100))
xx_test = tf.image.resize(xx_test, (100,100))
print("xx_trian.shape:",xx_train.shape)
print("xx_test.shape:",xx_test.shape)
print("\t\t显示6张用于训练的图片")
for i in range(6):
    plt.subplot(2,3,i+1)
    plt.title(yy_train[i])
    plt.imshow(xx_train[i])
    plt.axis('off')
# 定义一个模型
def model_init_fun(input_shape, num_classes):
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    layers = ([
        tf.keras.layers.Conv2D(input_shape = input_shape, filters = 30, padding = 'same', 
                               kernel_size = (10,10), activation = 'relu',kernel_initializer=initializer, name = 'conv1'),

        tf.keras.layers.Conv2D(filters = 60, padding = 'same', kernel_size = (5,5), 
                               activation = 'relu', kernel_initializer=initializer, name = 'conv2'),
        tf.keras.layers.Conv2D(filters = 120, padding = 'same', kernel_size = (3,3), 
                       activation = 'relu', kernel_initializer=initializer, name = 'conv3'),
        tf.keras.layers.MaxPool2D(pool_size = (4,4)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(num_classes, kernel_initializer=initializer, name = 'fc'),
        tf.keras.layers.Softmax()
    ])
    model = tf.keras.Sequential(layers)
    return model

def optimizer_init_fn(learning_rate):
    return tf.keras.optimizers.Adam(learning_rate=learning_rate) 

cnn = model_init_fun((100, 100, 3), 2)
opt = optimizer_init_fn(1e-4)
# 训练模型并显示部分验证结果
cnn.compile(optimizer = opt, loss = 'sparse_categorical_crossentropy',
                    metrics=[tf.keras.metrics.sparse_categorical_accuracy])
cnn.fit(x = xx_train, y = yy_train, epochs=20, verbose=0)
cnn.evaluate(x = xx_test, y = yy_test,verbose=2)

y_pre = cnn.predict_classes(xx_test)
print("\n\t\t\t验证结果")
for i in range(xx_test.shape[0]):
    plt.subplot(2, xx_test.shape[0]/2, i+1)
    if y_pre[i] == 1:
        title = "detected"
    else:
            title = "no"
    plt.title(title)
    plt.imshow(xx_test[i])
    plt.axis('off')
6/6 - 0s - loss: 0.0607 - sparse_categorical_accuracy: 1.0000

			验证结果

在这里插入图片描述

3. 对选区进行检测,显示检测到名字的选区框

# 逐个检测选区
y_pre = cnn.predict_classes(picture_test)
print("\n\t\t\t检测结果")
for i in range(y_pre.shape[0]):
    plt.subplot(3, 4, i+1)
    if y_pre[i] == 1:
        title = "detected"
    else:
            title = "no"
    plt.title(title)
    plt.imshow(picture_test[i])
    plt.axis('off')
			检测结果

在这里插入图片描述

#对选区去重并显示
co_final = []
index = np.where(y_pre == 1)
index = index[0]
for i in range(len(index)):
        x,y = co[index[i]]
        if(i==len(index)-1):
            px,py=co[i-1]
            dist = (x-px)**2 + (y-py)**2
            if(dist > 50**2):
                co_final.append((x,y))
        else:
            px,py=co[i+1]
            co_final.append((x,y))
            dist = (x-px)**2 + (y-py)**2
            if(dist < 50**2):
                i = i + 1
                
final_picture = creat_RGBmat(np.array(img), co_final)
plt.title("picture detected")
plt.imshow(final_picture)                
<matplotlib.image.AxesImage at 0x22244d0edc8>

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/qq_20493631/article/details/116354646