前言
2023-3-18 天小雨,午觉舒适程度5颗星。任务完成指数2颗星。续接上文:《MidiaPipe +stgcn(时空图卷积网络)实现人体姿态判断(单目标)》,我们这边需要实现的是一个多目标的检测,并且我们期望能够适用在家庭这里领域,因此,在前者算法改进的基础上,我们还需要加入这个人脸识别模块。调某度这些现成的开发的API当然可以,但是自己搭建价更高,能装13还不用小钱钱,数据牢牢在手,主打的就是一个安全。
同样的,我们还是按照以前的习惯,一个模块一个模块来进行说明,写完之后的话,再进行完整版本的开源,但是每一个独立的模块都是开源直接使用的,这个可以放心,也就是说按照本文给到项目结构和代码,是可以直接复现运行的。但是这里声明一下,这个项目是一个综合项目,有算法,有前端,有后端,技术体系覆盖vue , python(fastapi),java(SpringBoot)开发
这里开源的模块是算法模块
。其他的模块不开源,因为这个也是我们技术团队做的一个大创项目,同时,这个项目也不算是个小项目,里面覆盖的技术不少,需要同时具备Python和Java开发能力以及以Python为体现的人工智能应用开发能力,所以就算开源全部模块,对于读者的要求是比较高的。
之后我要说明的一点是,本篇文章中的相当一部分代码,其实是通过NewBing和ChatGPT生成的,使用的第三方库是dlib
,我的工作是负责将这些代码进行整合,修改,测试调整。
原理
首先这个人脸识别的原理的话,这个说实话相关的博文已经烂大街了,所以这里就简简单单的过一下。这里的话就主要说到我们这个自己用到的这个方法,因为相关的方法其实挺多了。那么原理的话首先是这个样子的:
首先这个是一张图片:
然后,我们识别出它的脸部,也就是这样:
然后呢我们提取出脸部的68个关键点
然后呢,我们在这个基础上提取出长度为128的特征向量:
之后呢,我们进行对比的时候呢,先识别出一个人A,按照同样的流程拿到,A的128的特征向量,然后呢,我们把这个向量和我们已经存起来了的向量给进行一个对比,也就是计算一下欧氏距离,然后呢,得到一个组距离,然后,我们得到距离最小的那个对应的名字就好了。
流程呢大概就是这样的:
项目结构
都说了是个大工厂哈,所以项目结构是这样的:
这里的话有三个文件注意一下:
然后这个依赖模型的话,可以在这里拿到,也可以自己去下载:
链接:https://pan.baidu.com/s/1PQOj_gPkTN9Od1PQDbeCtw
提取码:6666
其他的自己按照结构去创建就好了。
编码
okey, 现在流程搞清楚了,那么就开始组装代码了。
配置
首先是我们的这个配置项。
import dlib
import os
"""
人脸识别配置
"""
class FACE_FILE(object):
shape_predictor_path='alg/faceRec/data/data_dlib/shape_predictor_68_face_landmarks.dat'
recognition_model_path='alg/faceRec/data/data_dlib/dlib_face_recognition_resnet_model_v1.dat'
csv_base_path='alg/faceRec/data/csv/features.csv'
faceData_path='alg/faceRec/data/faceData/'
points_faceData_path='alg/faceRec/data/faceData_points/'
faceName_path='alg/faceRec/data/faceName.txt'
imgs_folder_path=os.listdir("alg/faceRec/data/faceData/")
FACE_CONFIG={
"max_collection_image": 50,
"get_points_faceData_flag": True,
"import_all_features_flag":True,
"face_needTo_update":[x for x in range(1, 2)], #选择更新脸部的编号,从0开始
"num_of_person_in_lib":len(FACE_FILE.imgs_folder_path),
"recognition_threshold":0.43,
"predictor": dlib.shape_predictor(FACE_FILE.shape_predictor_path),
"recognition_model": dlib.face_recognition_model_v1(FACE_FILE.recognition_model_path),
"detector":dlib.get_frontal_face_detector(),
}
"""
目标检测配置
"""
DECTION_CONFIG={
}
主控函数
之后的,我们先来看到我们的这个主控函数是如何操作的。
我们这边的任何操作,都只能在主控文件同级目录下进行调用,原因无他,这个路径的问题,必须要进行一个统一。
import cv2 as cv
from client.server.alg.faceRec.buildFace import BuildFace
from client.server.alg.faceRec.collection import Collection
from client.server.alg.faceRec.detectFace import DetectFace
def collection_face():
cam = cv.VideoCapture(0)
Collection().collection_cramer(cam)
cam.release()
cv.destroyAllWindows()
print("采集完毕,程序退出!!")
def build_face():
build = BuildFace()
build.building_all()
def detect_face():
cam = cv.VideoCapture(0)
process = DetectFace()
process.detect_from_cam(cam)
if __name__ == '__main__':
# collection_face()
# build_face()
detect_face()
调用的话其实非常简单,所以非常容易就可以做整合,用在具体的功能模块当中。
人脸采集模块
接下来就是我们的这个采集模块,作用很简单,就是读取到视频流,然后呢,去识别到头像,然后裁剪,然后保存在目录下:
"""
负责收集人脸关键点位
"""
import cv2 as cv
import time
import os
from client.server.config import FACE_FILE,FACE_CONFIG
class Collection(object):
"""
提供两种解析方式:
1. 通过opencv的VideoCapture 进行读取,然后得到图像
2. 通过图像进行解析,读取,同时得到框出头像之后的图像
"""
def __init__(self):
self.start_time = 0
self.fps = 0
self.image = None
self.face_img = None
self.face_num = 0
self.last_face_num = 0
self.face_num_change_flag = False
self.quit_flag = False
self.buildNewFolder = False # 按下"n"新建文件夹标志位
self.save_flag = False # 按下“s”保存人脸数据标志位
self.face_flag = False
self.img_num = 0
self.collect_face_data = True
def get_fps(self):
now = time.time()
time_period = now - self.start_time
self.fps = 1.0 / time_period
self.start_time = now
color = (0,255,0)
if self.fps < 15:
color = (0,0,255)
cv.putText(self.image, str(self.fps.__round__(2)), (20, 50), cv.FONT_HERSHEY_DUPLEX, 1, color)
def save_face_image(self,build_path):
buildFile = build_path
if(not build_path):
buildFile = FACE_FILE.faceData_path + 'person_{}'.format(FACE_CONFIG.get("num_of_person_in_lib"))
if(os.path.exists(buildFile)):
self.buildNewFolder = True
else:
os.makedirs(buildFile)
FACE_CONFIG["num_of_person_in_lib"] = FACE_CONFIG.get("num_of_person_in_lib") + 1
print("存放人脸数据的文件夹创建成功!!!")
self.buildNewFolder = True
if (self.collect_face_data == True and self.buildNewFolder == True):
if (self.face_img.size > 0):
cv.imwrite(
FACE_FILE.faceData_path + 'person_{}/{}.png'.format(FACE_CONFIG.get("num_of_person_in_lib") - 1, self.img_num),
self.face_img)
self.img_num += 1
def key_scan(self, key):
if self.collect_face_data == True:
if self.save_flag == True and self.buildNewFolder == True:
if self.face_img.size > 0:
cv.imwrite(
FACE_FILE.faceData_path + 'person_{}/{}.png'.format(FACE_CONFIG.get("num_of_person_in_lib") - 1, self.img_num),
self.face_img)
self.img_num += 1
if key == ord('s'):
self.save_flag = not self.save_flag
if key == ord('n'):
os.makedirs(FACE_FILE.faceData_path + 'person_{}'.format(FACE_CONFIG.get("num_of_person_in_lib")))
FACE_CONFIG["num_of_person_in_lib"] = FACE_CONFIG.get("num_of_person_in_lib")+1
print("新文件夹建立成功!!")
self.buildNewFolder = True
if key == ord('q'): self.quit_flag = True
def face_detecting(self):
face_location = []
all_face_location = []
faces = FACE_CONFIG.get("detector")(self.image, 0)
self.face_num = len(faces)
if self.face_num != self.last_face_num:
self.face_num_change_flag = True
# print("脸数改变,由{}张变为{}张".format(self.last_face_num, self.face_num))
self.check_times = 0
self.last_face_num = self.face_num
else:
self.face_num_change_flag = False
if len(faces) != 0:
self.face_flag = True
for i, face in enumerate(faces):
face_location.append(face)
w, h = (face.right() - face.left()), (face.bottom() - face.top())
left, right, top, bottom = face.left() - w//4, face.right() + w//4, face.top() - h//2, face.bottom() + h//4
all_face_location.append([left, right, top, bottom])
return face_location, all_face_location
else:
self.face_flag = False
return None
def collection_cramer(self, camera,show=True):
"""
:param camera: 摄像头视频/读取视频
:param show: 是否要展示框选出头像
:return:
当处理完毕之后,将保持到好识别出来的头像
"""
while camera.isOpened() and not self.quit_flag:
val, self.image = camera.read()
if val == False: continue
key = cv.waitKey(1)
res = self.face_detecting()
if res is not None:
_, all_face_location = res
for i in range(self.face_num):
[left, right, top, bottom] = all_face_location[i]
self.face_img = self.image[top:bottom, left:right]
cv.rectangle(self.image, (left, top), (right, bottom), (0, 0, 255))
if self.collect_face_data == True:
cv.putText(self.image, "Face", (int((left + right) / 2) - 50, bottom + 20), cv.FONT_HERSHEY_COMPLEX, 1,
(255, 255, 255))
self.key_scan(key)
self.get_fps()
cv.namedWindow('camera', 0)
if(show):
cv.imshow('camera', self.image)
if(self.img_num>=FACE_CONFIG.get("max_collection_image")):
print("采集完毕!!!")
break
camera.release()
cv.destroyAllWindows()
def collection_images(self,images,save_path=None):
"""
:param images: 图片,类型是图片数组,并且对象是opencv读取的图像对象
:param save_path: 图片保存的路径
:return:
如果,传入的图像路径为None的话,那么这里就执行默认的策略,也就是增量修改人物模型
如果传入的图像有路径的话,那么就直接保存到那里面去
"""
for image in images:
self.image = image
res = self.face_detecting()
if res is not None:
_, all_face_location = res
for i in range(self.face_num):
[left, right, top, bottom] = all_face_location[i]
self.face_img = self.image[top:bottom, left:right]
cv.rectangle(self.image, (left, top), (right, bottom), (0, 0, 255))
if self.collect_face_data == True:
cv.putText(self.image, "Face", (int((left + right) / 2) - 50, bottom + 20), cv.FONT_HERSHEY_COMPLEX, 1,
(255, 255, 255))
self.save_face_image(save_path)
特征提取
这个特征向量的获取的话,其实是分两个部分的,当然最后我们暴露出来就只是一个方法罢了。这个特征提取就是建立连接的过程,也就是存储这个特征。
"""
负责读取采集到的人脸图像,然后去构建人脸对应的信息
"""
import cv2 as cv
import os
import numpy as np
import csv
from tqdm import tqdm
import shutil
from client.server.config import FACE_FILE,FACE_CONFIG
class BuildFace():
def write2csv(self,data, mode):
"""
更新csv文件当中的数据(这里面存储的是我们人脸的特征)
:param data:
:param mode:
:return:
"""
with open(FACE_FILE.csv_base_path, mode, newline='') as wf:
csv_writer = csv.writer(wf)
csv_writer.writerow(data)
def get_features_from_csv(self):
features_in_csv = []
with open(FACE_FILE.csv_base_path, 'r') as rf:
csv_reader = csv.reader(rf)
for row in csv_reader:
for i in range(0, 128):
row[i] = float(row[i])
features_in_csv.append(row)
return features_in_csv
def save_select_in_csv(self,data):
"""
选择性更新人脸数据
:param data:
:return:
"""
features_in_csv = self.get_features_from_csv()
with open(FACE_FILE.csv_base_path, 'w', newline='') as wf:
csv_writer = csv.writer(wf)
for index, i in enumerate(FACE_CONFIG.get("face_needTo_update")):
features_in_csv[i] = data[index]
csv_writer.writerow(features_in_csv[0])
with open(FACE_FILE.csv_base_path, 'a+', newline='') as af:
csv_writer = csv.writer(af)
for j in range(1, len(features_in_csv)):
csv_writer.writerow(features_in_csv[j])
print("csv文件更新完成!!")
def get_128_features(self,person_index):
"""
:param person_index: person_index代表第几个人脸数据文件夹
:return:
"""
num = 0
features = []
imgs_folder = FACE_FILE.imgs_folder_path[person_index]
points_faceImage_path = FACE_FILE.points_faceData_path + imgs_folder
imgs_path = FACE_FILE.faceData_path + imgs_folder + '/'
list_imgs = os.listdir(imgs_path)
imgs_num = len(list_imgs)
if os.path.exists(FACE_FILE.points_faceData_path + imgs_folder):
shutil.rmtree(points_faceImage_path)
os.makedirs(points_faceImage_path)
print("人脸点图文件夹建立成功!!")
with tqdm(total=imgs_num) as pbar:
pbar.set_description(str(imgs_folder))
for j in range(imgs_num):
image = cv.imread(os.path.join(imgs_path, list_imgs[j]))
faces = FACE_CONFIG.get("detector")(image, 1)
if len(faces) != 0:
for z, face in enumerate(faces):
shape = FACE_CONFIG.get("predictor")(image, face)
w, h = (face.right() - face.left()), (face.bottom() - face.top())
left, right, top, bottom = face.left() - w // 4, face.right() + w // 4, face.top() - h // 2, face.bottom() + h // 4
im = image
cv.rectangle(im, (left, top), (right, bottom), (0, 0, 255))
cv.imwrite(points_faceImage_path + '/{}.png'.format(j), im)
if (FACE_CONFIG.get("get_points_faceData_flag") == True):
for p in range(0, 68):
cv.circle(image, (shape.part(p).x, shape.part(p).y), 2, (0,0,255))
cv.imwrite(points_faceImage_path + '/{}.png'.format(j), image)
the_features = list(FACE_CONFIG.get("recognition_model").compute_face_descriptor(image, shape)) # 获取128维特征向量
features.append(the_features)
num += 1
pbar.update(1)
np_f = np.array(features)
res = np.median(np_f, axis=0)
return res
def building_form_config(self):
if (FACE_CONFIG.get("import_all_features_flag") == True):
self.building_all()
else:
peoples = FACE_CONFIG.get("face_needTo_update")
self.building_select(peoples)
def building_all(self):
res = self.get_128_features(person_index=0)
self.write2csv(res, 'w')
for i in range(1, FACE_CONFIG.get("num_of_person_in_lib")):
res = self.get_128_features(person_index=i)
self.write2csv(res, 'a+')
def building_select(self,peoples):
"""
更新某几个人脸,传入对应的下标编号,例如:[0,2,4]
:param peoples:
:return:
"""
select_res = []
for i in peoples:
res = self.get_128_features(person_index=i)
select_res.append(res)
self.save_select_in_csv(select_res)
识别
识别的话,这里也是有两个方法,一个是直接给你一张图片,然后返回到里面的人脸名称,还有一个就是视频直接显示读取,这个看你怎么用,如果你是做QT桌面开发,或者web开发并且需要持续显示视频的话,用这个方法不错,但是算力就上去了,我这边还需要跑别的算法,只有当那个算法执行完毕之后,并且被出发了机制我才会进行人脸识别。
"""
负责
"""
import numpy as np
import csv
import cv2 as cv
from client.server.config import FACE_CONFIG,FACE_FILE
from client.server.alg.faceRec.collection import Collection
class DetectFace(Collection):
def __init__(self):
super(DetectFace, self).__init__()
self.available_max_face_num = 50
self.collect_face_data = False
# 人脸识别过程不采集数据,固定为False
self.all_features = []
# 存储库中所有特征向量
self.check_features_from_cam = []
# 存储五次检测过程,每次得到的特征向量
self.person_name = []
# 存储的人名映射
self.all_name = []
# 存储预测到的所有人名
self.all_face_location = None
# 存储一帧中所有人脸的坐标
self.middle_point = None
# 存储一张人脸的中心点坐标
self.last_frame_middle_point = []
# 存储上一帧所有人脸的中心点坐标
self.all_e_distance = []
# 存储当前人脸与库中所有人脸特征的欧氏距离
self.last_now_middlePoint_eDistance = [66666] * (self.available_max_face_num + 10)
# 存储这帧与上一帧每张人脸中心点的欧氏距离
self.init_process()
for i in range(self.available_max_face_num):
self.all_e_distance.append([])
self.person_name.append([])
self.check_features_from_cam.append([])
self.last_frame_middle_point.append([])
def get_feature_in_csv(self):
# 获得库内所有特征向量
datas = csv.reader(open(FACE_FILE.csv_base_path, 'r'))
for row in datas:
for i in range(128):
row[i] = float(row[i])
self.all_features.append(row)
def get_faceName(self):
# 所有对应的人名
with open(FACE_FILE.faceName_path, 'r', encoding='utf-8') as f:
datas = f.readlines()
for line in datas:
self.all_name.append(line[:-1])
print("已经录入的人名有:{}".format(self.all_name))
def calculate_EuclideanDistance(self, feature1, feature2): # 计算欧氏距离
np_feature1 = np.array(feature1)
np_feature2 = np.array(feature2)
EuclideanDistance = np.sqrt(np.sum(np.square(np_feature1 - np_feature2)))
return EuclideanDistance
def meadian_filter(self, the_list, num_of_data):
np_list = np.array(the_list)
feature_max = np.max(np_list, axis=0)
feature_min = np.min(np_list, axis=0)
res = (np.sum(np_list, axis=0) - feature_max - feature_min) / (num_of_data - 2)
res.tolist()
return res
def middle_filter(self, the_list):
np_list = np.array(the_list)
return np.median(np_list, axis=0)
def init_process(self):
self.get_feature_in_csv()
self.get_faceName()
def track_link(self):
# 让后续帧的序号与初始帧的序号对应
for index in range(self.face_num):
self.last_now_middlePoint_eDistance[index] = self.calculate_EuclideanDistance(self.middle_point,
self.last_frame_middle_point[
index])
this_face_index = self.last_now_middlePoint_eDistance.index(min(self.last_now_middlePoint_eDistance))
self.last_frame_middle_point[this_face_index] = self.middle_point
return this_face_index
def detect_from_image(self,image):
"""
直接识别一张图片当中的人脸,这个开销是最小的,当然这个精确度嘛,没有直接读取视频好一点
因为那个的话确定了好几帧的情况,这个的话只是单张图像的。返回的是一个图像的人名列表
但是实际上的话,我们其实送入的图像其实只会有一个人头像,多目标检测,我们也是把一张图像
对多个目标进行截取,然后进行识别,因为需要确定每个人物的序。
:param image:
:param show:
:return:
"""
self.image = image
# self.image = cv.imread('.test_1.jpg')
res = self.face_detecting()
names = []
if res is not None:
face, self.all_face_location = res
max_it = self.face_num if self.face_num < len(res) else len(res)
for i in range(max_it):
[left, right, top, bottom] = self.all_face_location[i]
self.middle_point = [(left + right) / 2, (top + bottom) / 2]
self.face_img = self.image[top:bottom, left:right]
cv.rectangle(self.image, (left, top), (right, bottom), (0, 0, 255))
shape = FACE_CONFIG.get("predictor")(self.image, face[i])
the_features_from_image = list(
FACE_CONFIG.get("recognition_model").compute_face_descriptor(self.image, shape))
e_distance = []
for features in self.all_features:
e_distance.append(self.calculate_EuclideanDistance(the_features_from_image,
features))
if(min(e_distance)<FACE_CONFIG.get("recognition_threshold")):
max_index = int(np.argmin(e_distance))
names.append(self.all_name[max_index])
return names
def detect_from_cam(self,camera):
"""
这里的话,和我们采集是一样的,就是传入这个camera对象就好了
:return:
"""
while camera.isOpened() and not self.quit_flag:
val, self.image = camera.read()
if val == False: continue
key = cv.waitKey(1)
res = self.face_detecting() # 0.038s
if res is not None:
face, self.all_face_location = res
for i in range(self.face_num):
[left, right, top, bottom] = self.all_face_location[i]
self.middle_point = [(left + right) / 2, (top + bottom) / 2]
self.face_img = self.image[top:bottom, left:right]
cv.rectangle(self.image, (left, top), (right, bottom), (0, 0, 255))
shape = FACE_CONFIG.get("predictor")(self.image, face[i]) # 0.002s
if self.face_num_change_flag == True or self.check_times <= 5:
if self.face_num_change_flag == True: # 人脸数量有变化,重新进行五次检测
self.check_times = 0
self.last_now_middlePoint_eDistance = [66666 for _ in range(self.available_max_face_num)]
for z in range(self.available_max_face_num):
self.check_features_from_cam[z] = []
if self.check_times < 5:
the_features_from_cam = list(
FACE_CONFIG.get("recognition_model").compute_face_descriptor(self.image, shape))
if self.check_times == 0: # 初始帧
self.check_features_from_cam[i].append(the_features_from_cam)
self.last_frame_middle_point[i] = self.middle_point
else:
this_face_index = self.track_link() # 后续帧需要与初始帧的人脸序号对应
self.check_features_from_cam[this_face_index].append(the_features_from_cam)
elif self.check_times == 5:
features_after_filter = self.middle_filter(self.check_features_from_cam[i])
self.check_features_from_cam[i] = []
for person in range(FACE_CONFIG.get("num_of_person_in_lib")):
e_distance = self.calculate_EuclideanDistance(self.all_features[person],
features_after_filter)
self.all_e_distance[i].append(e_distance)
if min(self.all_e_distance[i]) < FACE_CONFIG.get("recognition_threshold"):
self.person_name[i] = self.all_name[
self.all_e_distance[i].index(min(self.all_e_distance[i]))]
cv.putText(self.image, self.person_name[i],
(int((left + right) / 2) - 50, bottom + 20),
cv.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
else:
self.person_name[i] = "Unknown"
else:
this_face_index = self.track_link()
cv.putText(self.image, self.person_name[this_face_index],
(int((left + right) / 2) - 50, bottom + 20),
cv.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
self.check_times += 1
for j in range(self.available_max_face_num):
self.all_e_distance[j] = []
"""
在这里的话,n,s是不会触发的,这里只是用一下这个q而已,也就是退出
"""
self.key_scan(key)
self.get_fps()
cv.namedWindow('camera', 0)
cv.imshow('camera', self.image)
camera.release()
cv.destroyAllWindows()
测试
最后的话就是这个测试。