计算机视觉和图像处理
图像分类
一、图像分类简介
图像分类实质上就是从给定的类别集合中为图像分配对应标签的任务。也就是说我们的任务是分析一个输入输入图像并返回一个该图像类别的标签。
图像分类常用的数据集:mnist、CIFAR-100、CIFAT-10、ImageNet
二、AlexNet
- AlexNet包含8层变换,有5层卷积和2层全连接隐藏层,以及一个全连接输出层。
- AlexNet通过DropOut来控制全连接层的模型复杂度。
- AlexNet模型
# 导包
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.datasets import mnist
# 构建AlexNet模型
model = Sequential([
layers.Conv2D(filters=96,kernel_size=11,strides=4,activation="relu"),
layers.MaxPool2D(pool_size=3,strides=2),
layers.Conv2D(filters=256,kernel_size=5,strides=1,padding="same",activation="relu"),
layers.MaxPool2D(pool_size=3,strides=2),
layers.Conv2D(filters=384,kernel_size=3,strides=1,padding="same",activation="relu"),
layers.Conv2D(filters=384,kernel_size=3,strides=1,padding="same",activation="relu"),
layers.Conv2D(filters=256,kernel_size=3,strides=1,padding="same",activation="relu"),
layers.MaxPool2D(pool_size=3,strides=2),
layers.Flatten(),
layers.Dense(4096,activation='relu'),
layers.Dropout(0.5),
layers.Dense(4096,activation='relu'),
layers.Dropout(0.5),
layers.Dense(10,activation='softmax')
])
x = tf.random.uniform((1,227,227,1))
y = model(x)
model.summary()
- 手写数字势识别
# 加载数据
(train_image,train_label),(test_image,test_label) = mnist.load_data()
train_image.shape,test_image.shape
# 维度调整
train_image = train_image.reshape(60000,28,28,1)
test_image = test_image.reshape(10000,28,28,1)
import numpy as np
# 随机抽取样本
def get_trian(size):
index = np.random.choice(60000,size,replace=False)
# 将样本resize为227*227大小
resize_image = tf.image.resize_with_pad(train_image[index],227,227)
return resize_image.numpy(),train_label[index]
def get_test(size):
index = np.random.choice(10000,size,replace=False)
resize_image = tf.image.resize_with_pad(test_image[index],227,227)
return resize_image.numpy(),test_label[index]
# 获取训练样本和测试样本
train_image, train_label = get_trian(256)
test_image, test_label = get_test(128)
# 数据展示
import matplotlib.pyplot as plt
for i in range(9):
plt.subplot(3,3,i+1)
plt.imshow(train_image[i],cmap='gray')
plt.title(train_label[i])
# 模型编译
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
model.compile(optimizer=optimizer,loss=tf.keras.losses.sparse_categorical_crossentropy,metrics=['accuracy'])
# 模型训练
model.fit(train_image,train_label,batch_size=128,epochs=3,validation_split=0.1,verbose=1)
# 模型评估
model.evaluate(test_image,test_label,verbose=1)
三、VGG网络架构
VGG可以看成加深版的AlexNet,整个网络由卷积层和全连接层叠加而成。VGGNet使用的全部都是33的小卷积核和22的池化核,通过不断加深网络来提升性能。VGG可以通过重复使用简单的基础块来构建深度模型。
# 模型构建
def vgg(convs_arch):
model = Sequential()
# VGG块的构建
for (num_convs,num_filters) in convs_arch:
for _ in range(num_convs):
model.add(layers.Conv2D(num_filters,kernel_size=3,padding='same',activation='relu'))
model.add(layers.MaxPool2D(pool_size=2,strides=2))
# 卷积块后添加全连接层
model.add(Sequential([
layers.Flatten(),
layers.Dense(4096,activation='relu'),
layers.Dropout(0.5),
layers.Dense(4096,activation='relu'),
layers.Dropout(0.5),
layers.Dense(10,activation='softmax'),
]))
return model
# 卷积块参数
convs_arch = ((2,64),(2,128),(3,256),(3,512),(3,512))
model = vgg(convs_arch)
x = tf.random.uniform((1,224,224,1))
y = model(x)
model.summary()
四、GoogLeNet
Inception 层通过多个路径(不同的卷积核大小和池化操作)提取不同尺度的特征,并将这些特征图拼接在一起。每次通过一个 Inception 层时,可能会增加特征图的数量。
4.1 Inception模块
Inception 模块的主要目的是通过并行使用不同大小的卷积核来捕捉不同尺度的空间信息,并通过拼接这些特征图来增强模型的表征能力。
class Inception(tf.keras.layers.Layer):
def __init__(self,c1,c2,c3,c4):
super().__init__()
# 线路1
self.p1_1 = layers.Conv2D(c1,kernel_size=1,activation='relu',padding='same')
# 线路2
self.p2_1 = layers.Conv2D(c2[0],kernel_size=1,activation='relu',padding='same')
self.p2_2 = layers.Conv2D(c2[1],kernel_size=3,activation='relu',padding='same')
# 线路3
self.p3_1 = layers.Conv2D(c3[0],kernel_size=1,activation='relu',padding='same')
self.p3_2 = layers.Conv2D(c3[1],kernel_size=5,activation='relu',padding='same')
# 线路4
self.p4_1 = layers.MaxPool2D(pool_size=3,padding='same',strides=1)
self.p4_2 = layers.Conv2D(c4,kernel_size=1,activation='relu',padding='same')
# 向前传播过程
def call(self,input):
# 线路1
p1 = self.p1_1(input)
# 线路2
p2 = self.p2_2(self.p2_1(input))
# 线路3
p3 = self.p3_2(self.p3_1(input))
# 线路4
p4 = self.p4_2(self.p4_1(input))
outputs = tf.concat([p1,p2,p3,p4],axis=-1)
return outputs
# 指定通道数,对Inception进行实例化
Inception(64,(96,128),(16,32),32)
4.2 GoogLeNet构建
# B1模块
inputs = tf.keras.Input(shape=(224,224,3),name="input")
x = tf.keras.layers.Conv2D(64,kernel_size=7,strides=2,padding="same",activation="relu")(inputs)
x = tf.keras.layers.MaxPool2D(pool_size=3,strides=2,padding="same")(x)
# B2模块
x = tf.keras.layers.Conv2D(64,kernel_size=1,strides=2,padding="same",activation="relu")(x)
x = tf.keras.layers.Conv2D(192,kernel_size=3,strides=2,padding="same",activation="relu")(x)
x = tf.keras.layers.MaxPool2D(pool_size=3,strides=2,padding="same")(x)
# B2模块
x = tf.keras.layers.Conv2D(64,kernel_size=1,strides=2,padding="same",activation="relu")(x)
x = tf.keras.layers.Conv2D(192,kernel_size=3,strides=2,padding="same",activation="relu")(x)
x = tf.keras.layers.MaxPool2D(pool_size=3,strides=2,padding="same")(x)
# B4模块
# 辅助分类器
def aux_classifier(x,filter_size):
x = tf.keras.layers.AveragePooling2D(pool_size=5,strides=3,padding="same")(x)
x = tf.keras.layers.Conv2D(filters=filter_size[0],kernel_size=1,strides=1,padding='valid',activation="relu")(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(units=filter_size[1],activation="relu")(x)
x = tf.keras.layers.Dense(10,activation="softmax")(x)
return x
x = Inception(192,(96,208),(16,48),64)(x)
# 辅助输出
aux_output1 = aux_classifier(x,[128,1024])
# Inception层
x = Inception(160, (112, 224), (24, 64), 64)(x)
x = Inception(128, (128, 256), (24, 64), 64)(x)
x = Inception(112, (144, 288), (32, 64), 64)(x)
# 辅助输出
aux_output2 = aux_classifier(x,[128,1024])
x = Inception(256,(160,320),(32,128),128)(x)
x = tf.keras.layers.MaxPool2D(pool_size=3,strides=2,padding='same')(x)
# B5模块
x = Inception(256,(160,320),(32,128),128)(x)
x = Inception(384,(192,384),(48,128),128)(x)
# 全局平均池化层(GPA),用来代替全连接层
x = tf.keras.layers.GlobalAvgPool2D()(x)
outputs = tf.keras.layers.Dense(10,activation='softmax')(x)
# 模型
model = tf.keras.Model(inputs=inputs,outputs=[outputs,aux_output1,aux_output2])
model.summary()
五、ResNet
5.1 定义ResNet的残差块
残差块主要用于解决深层神经网络中的梯度消失问题和训练过程中的退化问题。
class Residual(tf.keras.Model):
def __init__(self,num_filters,use_1X1conv=False,strides=1):
super(Residual,self).__init__()
self.conv1 = tf.keras.layers.Conv2D(num_filters,kernel_size=3,strides=strides,padding='same')
self.conv2 = tf.keras.layers.Conv2D(num_filters,kernel_size=3,strides=1,padding='same')
if use_1X1conv:
self.conv3 = tf.keras.layers.Conv2D(num_filters,kernel_size=1,strides=strides,padding='same')
else:
self.conv3 = None
# BN层
self.bn1 = tf.keras.layers.BatchNormalization()
self.bn2 = tf.keras.layers.BatchNormalization()
# 向前传播过程
def call(self,x):
y = tf.keras.activations.relu(self.bn1(self.conv1(x)))
y = self.bn2(self.conv2(y))
if self.conv3:
x = self.conv3(x)
# "跳跃连接"返回相加后激活的结果
outputs = tf.keras.activations.relu(y + x)
return outputs
5.2 ResNet网络中模块的构成
ResnetBlock旨在通过串联多个残差块来构建深层神经网络,从而改善模型的训练稳定性和泛化能力。
# ResNet网络中模块的构成
class ResnetBlock(tf.keras.layers.Layer):
# 定义所需的网络结构
def __init__(self,num_filters,num_res,first_block=False):
super(ResnetBlock,self).__init__()
# 存储残差块
self.listLayers = []
for i in range(num_res):
# 若为第一个残差块并且不是第一个模块,使用1*1卷积
if i == 0 and not first_block:
self.listLayers.append(Residual(num_filters,use_1X1conv=True,strides=2))
else:
self.listLayers.append(Residual(num_filters))
def call(self,x):
for layer in self.listLayers:
x = layer(x)
return x
5.3 ResNet网络的构建
# 构建ResNet网络
class ResNet(tf.keras.Model):
# 定义网络的组成
def __init__(self,num_blocks):
super(ResNet,self).__init__()
self.conv = tf.keras.layers.Conv2D(64,kernel_size=7,strides=2,padding='same')
self.bn = tf.keras.layers.BatchNormalization()
self.relu = tf.keras.layers.Activation('relu')
self.mp = tf.keras.layers.MaxPool2D(pool_size=3,strides=2,padding='same')
# 残差模块
self.res_block1 = ResnetBlock(64,num_blocks[0],first_block=True)
self.res_block2 = ResnetBlock(128,num_blocks[1])
self.res_block3 = ResnetBlock(256,num_blocks[2])
self.res_block4 = ResnetBlock(512,num_blocks[3])
# GAP
self.gap = tf.keras.layers.GlobalAvgPool2D()
# 全连接层
self.fc = tf.keras.layers.Dense(units=10,activation='softmax')
# 定义前向传播过程
def call(self,x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
x = self.mp(x)
x = self.res_block1(x)
x = self.res_block2(x)
x = self.res_block3(x)
x = self.res_block4(x)
x = self.gap(x)
x = self.fc(x)
return x
# 实例化
my_net = ResNet([2,2,2,2])
x = tf.random.uniform((1,224,224,1))
y = my_net(x)
my_net.summary()
六、图像增强
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
# 读取图像并显示
cat = plt.imread("./cat.jpg")
plt.imshow(cat)
# 左右翻转
cat = tf.image.random_flip_left_right(cat)
plt.imshow(cat)
# 上下翻转
image = tf.image.random_flip_up_down(cat)
plt.imshow(image)
# 随机裁剪
image_1 = tf.image.random_crop(cat,(200,200,3))
plt.imshow(image_1)
# 亮度调整
image_2 = tf.image.random_brightness(cat,0.5)
plt.imshow(image_2)
# 颜色调整
image_3 = tf.image.random_hue(cat,0.4)
plt.imshow(image_3)
# 使用ImageDataGenerator()进行图像增强
tf.keras.preprocessing.image.ImageDataGenerator(
rotation_range=0, #随机旋转的度数范围
width_shift_range=0.0, #宽度平移
height_shift_range=0.0, #高度平移
brightness_range=None, #亮度调整
shear_range=0.0, #裁剪
zoom_range=0.0, #缩放
horizontal_flip=False, #左右翻转
vertical_flip=False, # 垂直翻转
rescale=None, # 尺度调整
)
from tensorflow.keras.datasets import mnist
# 数据获取
(x_train,y_train),(x_test,y_test) = tf.keras.datasets.mnist.load_data()
x_train.shape,x_test.shape
x_train = x_train.reshape(60000,28,28,1)
x_test = x_test.reshape(10000,28,28,1)
# 实例化
datagen = tf.keras.preprocessing.image.ImageDataGenerator(horizontal_flip=True)
for x,y in datagen.flow(x_train,y_train,batch_size=9):
for i in range(0,9):
plt.subplot(3,3,i + 1)
plt.imshow(x[i])
plt.title(y[i])
plt.show()
break