这段时间在学习卷积神经网络,为了对CNN有更深的了解和认识,便搞了一个简单的人脸识别作为练习。
运行环境
- Windows
- Python 3.x (tensorflow, opencv, numpy, sklearn, dlib)
获取人脸
第一步就是要获取用于训练的人脸图像,其中自己的人脸,我们使用程序来拍照,数量需求比较大,我用了12000张自己的人脸图像。
其他人人的人脸在网上找到,这里有一个人脸数据集:http://vis-www.cs.umass.edu/lfw/lfw.tgz
要获取人脸,首先要能检测出人脸来,OpenCV与dlib都能实现,OpenCV的检测速度更快,但是精度却不如dlib,这里我们采用dlib进行检测。这里我们将所有图片裁剪成64x64。
import cv2
import dlib
import os
import random
output_dir = './my_faces'
size = 64
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# 改变图片的亮度与对比度
def relight(image, light=1, bias=0):
width = image.shape[1]
height = image.shape[0]
for w in range(0, width):
for h in range(0, height):
for c in range(3):
tmp = int(image[h, w, c]*light + bias)
if tmp > 255:
tmp = 255
elif tmp < 0:
tmp = 0
image[h, w, c] = tmp
return img
# 使用dlib自带的frontal_face_detector作为我们的特征提取器
detector = dlib.get_frontal_face_detector()
# 打开摄像头 参数为输入流,可以为摄像头或视频文件
camera = cv2.VideoCapture(0)
index = 1
while True:
if index <= 10000:
print('Being processed picture %s' % index)
# 从摄像头读取照片
success, img = camera.read()
# 转为灰度图片
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 使用detector进行人脸检测
dets = detector(gray_img, 1)
for i, d in enumerate(dets):
x1 = d.top() if d.top() > 0 else 0
y1 = d.bottom() if d.bottom() > 0 else 0
x2 = d.left() if d.left() > 0 else 0
y2 = d.right() if d.right() > 0 else 0
face = img[x1:y1, x2:y2]
# 调整图片的对比度与亮度, 对比度与亮度值都取随机数,这样能增加样本的多样性
face = relight(face, random.uniform(0.5, 1.5), random.randint(-50, 50))
face = cv2.resize(face, (size, size))
cv2.imshow('image', face)
cv2.imwrite(output_dir+'/'+str(index)+'.jpg', face)
index += 1
key = cv2.waitKey(30) & 0xff
if key == 27:
break
else:
print('Finished!')
break
现在要处理其他人的人脸,将人脸从图中提取出来,并裁剪成64x64大小。
import sys
import os
import cv2
import dlib
input_dir = './input'
output_dir = './others_faces'
size = 64
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# 使用dlib自带的frontal_face_detector作为我们的特征提取器
detector = dlib.get_frontal_face_detector()
index = 0
for (path, dirnames, filenames) in os.walk(input_dir):
for j, filename in enumerate(filenames):
if filename.endswith('.jpg'):
img_path = path+'/'+filename
# 从文件读取图片
img = cv2.imread(img_path)
# 转为灰度图片
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 使用detector进行人脸检测 dets为返回的结果
dets = detector(gray_img, 1)
for i, d in enumerate(dets):
x1 = d.top() if d.top() > 0 else 0
y1 = d.bottom() if d.bottom() > 0 else 0
x2 = d.left() if d.left() > 0 else 0
y2 = d.right() if d.right() > 0 else 0
# img[y:y+h,x:x+w]
face = img[x1:y1, x2:y2]
# 调整图片的尺寸
face = cv2.resize(face, (size,size))
cv2.imshow('image',face)
# 保存图片
cv2.imwrite(output_dir+'/'+str(index)+'.jpg', face)
index += 1
print('Being processed picture %s' % index)
key = cv2.waitKey(30) & 0xff
if key == 27:
sys.exit(0)
数据处理
将获取到的图片转化成能处理的数据类型。
import cv2
import numpy as np
import os
my_faces_path = './my_faces'
other_faces_path = './others_faces'
image_size = 64
images_name = []
images = []
labels = []
def get_padding_size(image):
h, w, _ = image.shape
top, bottom, left, right = (0, 0, 0, 0)
longest = max(h, w)
if w < longest:
tmp = longest - w
left = tmp // 2
right = tmp - left
elif h < longest:
tmp = longest - h
top = tmp // 2
bottom = tmp - top
else:
pass
return top, bottom, left, right
def read_data(path):
for file_name in os.listdir(path):
if file_name.endswith('.jpg'):
file_name = path + '/' + file_name
images_name.append(file_name)
labels.append(path)
def read_image(names, height=image_size, width=image_size):
for image_name in names:
image = cv2.imread(image_name)
top, bottom, left, right = get_padding_size(image)
image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
image = cv2.resize(image, (height, width))
images.append(image)
read_data(my_faces_path)
read_data(other_faces_path)
images_name = np.array(images_name)
labels = np.array(labels)
data = np.array([images_name, labels])
data = data.transpose()
np.random.shuffle(data)
images_name = list(data[:, 0])
labels = list(data[:, 1])
read_image(images_name)
images = np.array(images)
labels = np.array([[1, 0] if label == my_faces_path else [0, 1] for label in labels])
np.save('images.npy', images)
np.save('labels.npy', labels)
训练
使用卷积神经网络进行训练。
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import random
image_data = np.load('images.npy')
image_label = np.load('labels.npy')
train_data, test_data, train_label, test_label = train_test_split(image_data, image_label,
test_size=0.05, random_state=random.randint(0, 100))
train_images = np.float32(np.reshape(train_data, [train_data.shape[0], 64, 64, 3]))
test_images = np.float32(np.reshape(test_data, [test_data.shape[0], 64, 64, 3]))
train_images = train_images.astype('float32')/255.0
test_images = test_images.astype('float32')/255.0
tf.reset_default_graph()
x = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name='x')
y = tf.placeholder(tf.float32, shape=[None, 2], name='y')
w1 = tf.Variable(tf.truncated_normal(shape=[3, 3, 3, 32], stddev=0.1), name='w1')
b1 = tf.Variable(tf.constant(0.1, shape=[32]), name='b1')
h_conv1 = tf.nn.relu(tf.nn.conv2d(input=x, filter=w1, strides=[1, 1, 1, 1], padding='SAME') + b1, name='h_conv1')
h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='h_pool1')
w2 = tf.Variable(tf.truncated_normal(shape=[3, 3, 32, 64], stddev=0.1), name='w2')
b2 = tf.Variable(tf.constant(0.1, shape=[64]), name='b2')
h_conv2 = tf.nn.relu(tf.nn.conv2d(input=h_pool1, filter=w2, strides=[1, 1, 1, 1], padding='SAME') + b2, name='h_conv2')
h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='h_pool2')
w_fc1 = tf.Variable(tf.truncated_normal(shape=[16 * 16 * 64, 512], stddev=0.1), name='w_fc1')
b_fc1 = tf.Variable(tf.constant(0.1, shape=[512]), name='b_fc1')
h_pool2_flat = tf.reshape(h_pool2, [-1, 16 * 16 * 64], name='h_pool2_flat')
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1, name='h_fc1')
keep_prob = tf.placeholder(tf.float32, name='keep_prob')
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob, name='h_fc1_drop')
w_fc2 = tf.Variable(tf.truncated_normal(shape=[512, 2], stddev=0.1), name='w_fc2')
b_fc2 = tf.Variable(tf.constant(0.1, shape=[2]), name='b_fc2')
y_conv = tf.nn.softmax(tf.matmul(h_fc1, w_fc2) + b_fc2, name='y_conv')
cross_entropy = tf.reduce_mean(-tf.reduce_mean(y * tf.log(y_conv), axis=1), name='cross_entropy')
train = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1), name='correct_prediction')
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
batch_size = 50
num_epochs = 21
num_batch = int(train_images.shape[0] / batch_size)
epoch = 0
for i in range(num_epochs):
for j in range(num_batch):
batch_data = train_images[j * batch_size: (j + 1) * batch_size]
batch_label = train_label[j * batch_size: (j + 1) * batch_size]
_, acc, loss = sess.run([train, accuracy, cross_entropy],
feed_dict={x: batch_data, y: batch_label, keep_prob: 0.5})
if (i * num_epochs + j * batch_size) % 200 == 0:
print('Iter ', i * num_epochs + j * batch_size, ' : acc = ', acc, ' loss = ', loss)
print('Testing accuracy %g' % sess.run(accuracy, feed_dict={x: test_images, y: test_label, keep_prob: 1.0}))
print("---Train end---")
print('Testing accuracy %g' % sess.run(accuracy, feed_dict={x: test_images, y: test_label}))
saver = tf.train.Saver()
saver.save(sess, './model/My_Model')
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('./model/My_Model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./model/'))
graph = tf.get_default_graph()
x_new = graph.get_tensor_by_name('x:0')
y_new = graph.get_tensor_by_name('y_conv:0')
keep_prob_new = graph.get_tensor_by_name('keep_prob:0')
pre_y = sess.run(y_new, feed_dict={x_new: test_images, keep_prob_new: 1.0})
print(np.mean(np.argmax(pre_y, axis=1)))