使用YOLOv8等目标检测网络直接识别手势需要标注大量的数据集,效果也不好。本文主要基于一个手部关键点开源库mediapipe
,并用检测到的关键点基于一定策略实现手势识别。
目前支持的手势包括
1
2
3
3_variant
4
5
6
7
8
9
0
fist
good
bad
ok
agree
love
rubbish
despise
wish
swordfinger
Vulcan_salute
stop
Orchid1
Orchid2
catch
6+1
under_control
provocation
seduce
共计30种手势。还有一种手势 unknown
表示未识别。
先看效果:
代码部分:
需要安装的依赖:
pip install numpy
pip install python-opencv
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple mediapipe
新建一个utils.py
文件写入
import numpy as np
import itertools
# 判断点是否在三角形内部
def piont_in_triangle(P, A, B, C):
P = project_point_on_plane(P, A, B, C)
# 计算PAB, PAC的面积
return np.linalg.norm(np.cross(P-A, P-B)) + np.linalg.norm(np.cross(P-A, P-C)) < np.linalg.norm(np.cross(C-A, C-B))
# 判断一根线段是否和三个点构成的平面相交
def segments_cross_planes(p1, p2, A, B, C):
n_vec = np.cross(A - B, A - C)
e1_c, e2_c, e3_c = np.dot(n_vec, A), p1[0]*p2[1]-p2[0]*p1[1], p1[0]*p2[2]-p2[0]*p1[2]
mat1_d = np.array([[n_vec[0], n_vec[1], n_vec[2]],
[p2[1]-p1[1], p1[0]-p2[0], 0],
[p2[2]-p1[2], 0, p1[0]-p2[0]]])
d_d = np.linalg.det(mat1_d)
if d_d == 0:
return False
mat1_x = np.array([[e1_c, n_vec[1], n_vec[2]],
[e2_c, p1[0]-p2[0], 0],
[e3_c, 0, p1[0]-p2[0]]])
mat1_y = np.array([[n_vec[0], e1_c, n_vec[2]],
[p2[1]-p1[1], e2_c, 0],
[p2[2]-p1[2], e3_c, p1[0]-p2[0]]])
mat1_z = np.array([[n_vec[0], n_vec[1], e1_c],
[p2[1]-p1[1], p1[0]-p2[0], e2_c],
[p2[2]-p1[2], 0, e3_c]])
x = np.linalg.det(mat1_x) / d_d
y = np.linalg.det(mat1_y) / d_d
z = np.linalg.det(mat1_z) / d_d
P = np.array([x, y, z])
return piont_in_triangle(P, A, B, C) and np.dot(P - p1, P - p2)<0
# 计算两个向量的夹角
def angle_between_vectors(v1, v2):
dot_product = np.dot(v1, v2)
norm_v1 = np.linalg.norm(v1)
norm_v2 = np.linalg.norm(v2)
if norm_v1 * norm_v2 == 0:
return 0
cos_angle = dot_product / (norm_v1 * norm_v2)
cos_angle = np.clip(cos_angle, -1, 1)
angle_rad = np.arccos(cos_angle)
angle_deg = np.degrees(angle_rad)
return angle_deg
# 计算一个点在另外三个点构成平面的投影点
def project_point_on_plane(P, A, B, C):
# 计算平面的法向量
v1 = B - A
v2 = C - A
normal = np.cross(v1, v2)
if np.linalg.norm(normal) == 0:
return P
normal = normal / np.linalg.norm(normal) # 单位化法向量
# 计算点M到平面的投影向量
v = P - A
distance = np.dot(v, normal) / np.linalg.norm(normal)**2
# 计算投影点坐标
projection = P - distance * normal
return projection
# 计算一个点到另外三个点构成平面的距离
def distance_point_plane(P, A, B, C):
p2 = project_point_on_plane(P, A, B, C)
return np.linalg.norm(p2 - P)
# 计算一个点到两个点构成直线的距离
def distance_point_line(P, A, B):
if P.shape[0] == 2:
P = np.append(P, [0])
if A.shape[0] == 2:
A = np.append(A, [0])
if B.shape[0] == 2:
B = np.append(B, [0])
if np.linalg.norm(A - B) == 0:
return np.linalg.norm(P - B)
elif np.linalg.norm(P - B) == 0 or np.linalg.norm(P - B) == 0:
return 0
else:
return abs(np.linalg.norm(np.cross(P - A, P - B)) / np.linalg.norm(A - B))
# 计算四个点组成两直线的公垂线与其各自的交点,并判断交点是否在线段中
def intersection_in_line_segment(p1, p2, p3, p4):
ln1, ln2 = p2 - p1, p4 - p3
n_ln12 = np.cross(ln1, ln2)
n_p12, n_p34 = np.cross(n_ln12, ln1), np.cross(n_ln12, ln2)
# print(n_ln12)
# print(n_p12)
# 计算p3p4交点
# 方程组右侧系数
e1_c, e2_c, e3_c = np.dot(n_p12, p2), p3[0]*p4[1]-p4[0]*p3[1], p3[0]*p4[2]-p4[0]*p3[2]
mat2_d = np.array([[n_p12[0], n_p12[1], n_p12[2]],
[p4[1]-p3[1], p3[0]-p4[0], 0],
[p4[2]-p3[2], 0, p3[0]-p4[0]]])
mat2_x = np.array([[e1_c, n_p12[1], n_p12[2]],
[e2_c, p3[0]-p4[0], 0],
[e3_c, 0, p3[0]-p4[0]]])
mat2_y = np.array([[n_p12[0], e1_c, n_p12[2]],
[p4[1]-p3[1], e2_c, 0],
[p4[2]-p3[2], e3_c, p3[0]-p4[0]]])
mat2_z = np.array([[n_p12[0], n_p12[1], e1_c],
[p4[1]-p3[1], p3[0]-p4[0], e2_c],
[p4[2]-p3[2], 0, e3_c]])
d2_d = np.linalg.det(mat2_d)
if d2_d == 0:
return False
x2 = np.linalg.det(mat2_x) / d2_d
y2 = np.linalg.det(mat2_y) / d2_d
z2 = np.linalg.det(mat2_z) / d2_d
inter2 = np.array([x2, y2, z2])
# 计算p1p2交点
# 方程组右侧系数
e1_c, e2_c, e3_c = np.dot(n_p34, p3), p1[0]*p2[1]-p2[0]*p1[1], p1[0]*p2[2]-p2[0]*p1[2]
mat1_d = np.array([[n_p34[0], n_p34[1], n_p34[2]],
[p2[1]-p1[1], p1[0]-p2[0], 0],
[p2[2]-p1[2], 0, p1[0]-p2[0]]])
mat1_x = np.array([[e1_c, n_p34[1], n_p34[2]],
[e2_c, p1[0]-p2[0], 0],
[e3_c, 0, p1[0]-p2[0]]])
mat1_y = np.array([[n_p34[0], e1_c, n_p34[2]],
[p2[1]-p1[1], e2_c, 0],
[p2[2]-p1[2], e3_c, p1[0]-p2[0]]])
mat1_z = np.array([[n_p34[0], n_p34[1], e1_c],
[p2[1]-p1[1], p1[0]-p2[0], e2_c],
[p2[2]-p1[2], 0, e3_c]])
d1_d = np.linalg.det(mat1_d)
if d1_d == 0:
return False
x1 = np.linalg.det(mat1_x) / d1_d
y1 = np.linalg.det(mat1_y) / d1_d
z1 = np.linalg.det(mat1_z) / d1_d
inter1 = np.array([x1, y1, z1])
# print(inter1)
# print(inter2)
inter_in_line1 = np.dot(p1 - inter1, p2 - inter1)
inter_in_line2 = np.dot(p3 - inter2, p4 - inter2)
return (inter_in_line1 < 0 and inter_in_line2 < 0)
# 判断单根手指状态
def process_finger(P0, P1, P2, P3):
# 参数是按照手腕和手指关节依次给出
arg1 = angle_between_vectors(P1 - P0, P2 - P1)
arg2 = angle_between_vectors(P2 - P1, P3 - P2)
if (arg1 < 40 and arg2 < 40) or arg1 + arg2 < 50:
return 0 # 直立
elif arg1 < 40 and arg2 >= 40:
return 1 # 半弯折
elif (arg1 >= 40 and arg2 >= 40):
return 2 # 全弯折
elif arg1 >= 40 and arg2 < 40:
return -2 # 大关节弯折前部直立
else:
return -1 # 未定义奇怪的手势
# 返回手势
def process_hand(hand_cds, details=False):
# 检测单独手指姿态
hand_state = [process_finger(hand_cds[0], hand_cds[2], hand_cds[3], hand_cds[4])
,process_finger(hand_cds[0], hand_cds[5], hand_cds[6], hand_cds[7])
,process_finger(hand_cds[0], hand_cds[9], hand_cds[10], hand_cds[11])
,process_finger(hand_cds[0], hand_cds[13], hand_cds[14], hand_cds[15])
,process_finger(hand_cds[0], hand_cds[17], hand_cds[18], hand_cds[19])]
# 计算手平均大小
ids = [(0, 1), (1, 2), (2, 3), (3, 4), # 拇指
(5, 6), (6, 7), (7, 8), # 食指
(9, 10), (10, 11), (11, 12), # 中指
(13, 14), (14, 15), (15, 16), # 无名指
(17, 18), (18, 19), (19, 20), # 小指
(0, 5),(0, 9), (0, 13), (0, 17)]
mean_bone_length = 0
for id in ids:
mean_bone_length += np.linalg.norm(hand_cds[id[0]] - hand_cds[id[1]])
mean_bone_length /= len(ids)
# 求出拇指尖和其他手指关节的最小距离
other_joints = [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
thumb_dis, thumb_dis_2d = [], []
for joint in other_joints:
thumb_dis.append(np.linalg.norm(hand_cds[4] - hand_cds[joint]))
thumb_dis_2d.append(np.linalg.norm(hand_cds[4,:2] - hand_cds[joint,:2]))
min_thumb_dis = min(thumb_dis) / mean_bone_length
min_thumb_dis_2d = min(thumb_dis_2d) / mean_bone_length
# 求出手的方向,合成各个手指关节的向量与竖直方向的夹角
hand_vec = hand_cds[2] + hand_cds[5] + hand_cds[9] + hand_cds[13] + hand_cds[17] - 5 * hand_cds[0]
hand_angle = angle_between_vectors(hand_vec, np.array([0, -1, 0]))
# 求出手的法向量与x轴夹角
hand_normal = np.cross(hand_cds[5] - hand_cds[0], hand_cds[13] - hand_cds[0]) + \
np.cross(hand_cds[17] - hand_cds[0], hand_cds[9] - hand_cds[0])
hand_phi = angle_between_vectors(hand_normal, np.array([0, 0, 1]))
# 7的判断比较复杂,123近+45弯折,1234近+5弯折,12345近,求取条件
dis_m = [[], [], []]
fingertips = [4, 8, 12, 16, 20]
for i in range(3):
for pair in itertools.combinations(fingertips[:3+i], 2):
dis_m[i].append(np.linalg.norm(hand_cds[pair[0],:2] - hand_cds[pair[1],:2]))
# 检查手势
gesture_name = "unknown"
if (segments_cross_planes(hand_cds[3], hand_cds[4], hand_cds[5], hand_cds[6], hand_cds[9]) or\
segments_cross_planes(hand_cds[3], hand_cds[4], hand_cds[6], hand_cds[9], hand_cds[10]) or\
segments_cross_planes(hand_cds[3], hand_cds[4], hand_cds[9], hand_cds[10], hand_cds[13]) or\
segments_cross_planes(hand_cds[3], hand_cds[4], hand_cds[10], hand_cds[13], hand_cds[14]) or\
segments_cross_planes(hand_cds[3], hand_cds[4], hand_cds[13], hand_cds[14], hand_cds[17]) or\
segments_cross_planes(hand_cds[3], hand_cds[4], hand_cds[14], hand_cds[17], hand_cds[18])) and\
np.linalg.norm(hand_cds[8] - hand_cds[0]) / mean_bone_length < 2 and np.linalg.norm(hand_cds[12] - hand_cds[0]) / mean_bone_length < 2 and\
np.linalg.norm(hand_cds[16] - hand_cds[0]) / mean_bone_length < 2 and np.linalg.norm(hand_cds[20] - hand_cds[0]) / mean_bone_length < 2 and \
hand_state[1] in [1, 2, -2] and hand_state[2] in [1, 2, -2] and hand_state[3] in [1, 2, -2] and hand_state[4] in [1, 2, -2]:
gesture_name = "provocation"
elif hand_state[1] in [1, 2, -2] and hand_state[2] in [1, 2, -2] and hand_state[3] in [1, 2, -2] and hand_state[4] in [1, 2, -2] and np.linalg.norm(hand_cds[8,:2] - hand_cds[12,:2]) / mean_bone_length < 0.9 and\
np.linalg.norm(hand_cds[12,:2] - hand_cds[16,:2]) / mean_bone_length < 0.9 and np.linalg.norm(hand_cds[16,:2] - hand_cds[20,:2]) / mean_bone_length < 0.9 and\
angle_between_vectors(hand_cds[8] - hand_cds[7], hand_cds[7] - hand_cds[6]) + angle_between_vectors(hand_cds[7] - hand_cds[6], hand_cds[6] - hand_cds[5]) > 100 and\
angle_between_vectors(hand_cds[12] - hand_cds[11], hand_cds[11] - hand_cds[10]) + angle_between_vectors(hand_cds[11] - hand_cds[10], hand_cds[10] - hand_cds[9]) > 100 and\
angle_between_vectors(hand_cds[16] - hand_cds[15], hand_cds[15] - hand_cds[14]) + angle_between_vectors(hand_cds[15] - hand_cds[14], hand_cds[14] - hand_cds[13]) > 100 and\
angle_between_vectors(hand_cds[20] - hand_cds[19], hand_cds[19] - hand_cds[18]) + angle_between_vectors(hand_cds[19] - hand_cds[18], hand_cds[18] - hand_cds[17]) > 100 and\
np.linalg.norm(hand_cds[8] - hand_cds[0]) / mean_bone_length < 3 and np.linalg.norm(hand_cds[12] - hand_cds[0]) / mean_bone_length < 3 and\
np.linalg.norm(hand_cds[16] - hand_cds[0]) / mean_bone_length < 3 and np.linalg.norm(hand_cds[20] - hand_cds[0]) / mean_bone_length < 3:
if min_thumb_dis > 0.6 and angle_between_vectors(hand_cds[4] - hand_cds[0], np.array([0, -1, 0])) < 80 and\
np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 2:
gesture_name = "good"
elif min_thumb_dis > 1 and angle_between_vectors(hand_cds[4] - hand_cds[0], np.array([0, -1, 0])) > 110 and\
np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 2:
gesture_name = "bad"
elif angle_between_vectors(hand_cds[4] - hand_cds[3], hand_cds[3] - hand_cds[2]) > 20 and\
np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1 and angle_between_vectors(hand_cds[4] - hand_cds[0], np.array([0, -1, 0])) < 80:
gesture_name = "agree"
elif hand_state[0] >= 1 or min_thumb_dis_2d < 0.8 or hand_state[0] == -2:
gesture_name = "fist"
if gesture_name == 'unknown' and (max(dis_m[0]) / mean_bone_length < 1 and np.linalg.norm(hand_cds[16,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[20,:2] - hand_cds[8,:2]) / mean_bone_length > 1) and \
np.linalg.norm(hand_cds[16] - hand_cds[0]) / mean_bone_length < 1.8 and np.linalg.norm(hand_cds[20] - hand_cds[0]) / mean_bone_length < 1.8:
gesture_name = "7"
# elif np.linalg.norm(hand_cds[12,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and\
# np.linalg.norm(hand_cds[8,:2] - hand_cds[16,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[8,:2] - hand_cds[20,:2]) / mean_bone_length > 1 and\
# np.linalg.norm(hand_cds[12,:2] - hand_cds[16,:2]) / mean_bone_length < 0.9 and np.linalg.norm(hand_cds[20,:2] - hand_cds[16,:2]) / mean_bone_length < 0.9 and\
# hand_state[2] >= 1 and hand_state[3] >= 1 and hand_state[4] >= 1 and np.linalg.norm(hand_cds[12] - hand_cds[0]) / mean_bone_length < 2 and np.linalg.norm(hand_cds[16] - hand_cds[0]) / mean_bone_length < 2 and np.linalg.norm(hand_cds[20] - hand_cds[0]) / mean_bone_length < 2 and\
# np.linalg.norm(hand_cds[4,:2] - hand_cds[8,:2]) / mean_bone_length < 0.65:
# pass
elif hand_state[2] >= 1 and hand_state[3] >= 1 and hand_state[4] >= 1 and np.linalg.norm(hand_cds[12] - hand_cds[0]) / mean_bone_length < 2.5 and np.linalg.norm(hand_cds[16] - hand_cds[0]) / mean_bone_length < 2.5 and\
np.linalg.norm(hand_cds[20] - hand_cds[0]) / mean_bone_length < 2.5 and hand_state[0] == 0 and hand_state[1] != 2:
if intersection_in_line_segment(hand_cds[4], hand_cds[3] + 3 * (hand_cds[3] - hand_cds[4]), hand_cds[8], hand_cds[7] + 3 * (hand_cds[7] - hand_cds[8])) and np.linalg.norm(hand_cds[4] - hand_cds[12]) / mean_bone_length > 0.7 and\
np.linalg.norm(hand_cds[8] - hand_cds[12]) / mean_bone_length > 0.7:
gesture_name = "love"
elif np.linalg.norm(hand_cds[4] - hand_cds[8]) / mean_bone_length < 0.7:
gesture_name = "under_control"
if gesture_name == 'unknown' and hand_angle < 100 and (hand_state[0] >= 1 or min_thumb_dis < 0.5 or hand_state[0] == -2) and hand_state[1] in [0, -2] and hand_state[2] >= 1 and hand_state[3] >= 1 and hand_state[4] >= 1 and\
np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1:
gesture_name = "1"
elif hand_angle < 100 and (hand_state[0] >= 1 or min_thumb_dis < 0.5 or hand_state[0] == -2) and hand_state[4] in [0, -2] and hand_state[2] >= 1 and hand_state[3] >= 1 and hand_state[1] >= 1 and\
np.linalg.norm(hand_cds[20,:2] - hand_cds[16,:2]) / mean_bone_length > 1:
gesture_name = "rubbish"
elif hand_angle < 100 and (hand_state[0] >= 1 or min_thumb_dis < 0.5 or hand_state[0] == -2) and hand_state[1] >= 1 and hand_state[2] in [0, -2] and hand_state[3] >= 1 and hand_state[4] >= 1:
gesture_name = "despise"
elif hand_angle < 100 and (hand_state[0] >= 1 or min_thumb_dis < 0.5 or hand_state[0] == -2) and hand_state[1] in [0, -2] and hand_state[2] in [0, -2] and hand_state[3] >= 1 and hand_state[4] >= 1 and\
np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1:
if distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) > distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12]) and\
(not intersection_in_line_segment(hand_cds[12], hand_cds[9], hand_cds[8], hand_cds[5])):
gesture_name = "2"
elif intersection_in_line_segment(hand_cds[12], hand_cds[9], hand_cds[8], hand_cds[5]):
gesture_name = "wish"
else:
gesture_name = "swordfinger"
if gesture_name == 'unknown' and hand_angle < 100 and (hand_state[0] >= 1 or min_thumb_dis < 0.5 or hand_state[0] == -2) and hand_state[1] == 0 and hand_state[2] == 0 and hand_state[3] == 0 and hand_state[4] >= 1 and\
distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) > distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12]) and\
distance_point_line(hand_cds[12], hand_cds[13], hand_cds[16]) > distance_point_line(hand_cds[9], hand_cds[13], hand_cds[16])and\
np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1:
gesture_name = "3"
elif hand_angle < 100 and (hand_state[0] >= 1 or min_thumb_dis < 0.5 or hand_state[0] == -2) and hand_state[1] == 0 and hand_state[2] == 0 and hand_state[3] == 0 and hand_state[4] == 0 and\
distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) > distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12]) and\
distance_point_line(hand_cds[12], hand_cds[13], hand_cds[16]) > distance_point_line(hand_cds[9], hand_cds[13], hand_cds[16]) and\
distance_point_line(hand_cds[16], hand_cds[17], hand_cds[20]) > distance_point_line(hand_cds[13], hand_cds[17], hand_cds[20]) and\
np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1:
gesture_name = "4"
elif hand_angle < 100 and (hand_state[0] == 0 and min_thumb_dis >= 0.5) and hand_state[1] == 0 and hand_state[2] == 0 and hand_state[3] == 0 and hand_state[4] == 0 and\
np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1 and \
not piont_in_triangle(hand_cds[4], hand_cds[8], hand_cds[0], hand_cds[12]) and\
not piont_in_triangle(hand_cds[4], hand_cds[16], hand_cds[0], hand_cds[12]) and\
not piont_in_triangle(hand_cds[4], hand_cds[16], hand_cds[0], hand_cds[20]):
if distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) > distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12]) and\
distance_point_line(hand_cds[12], hand_cds[13], hand_cds[16]) > distance_point_line(hand_cds[9], hand_cds[13], hand_cds[16]) and\
distance_point_line(hand_cds[16], hand_cds[17], hand_cds[20]) > distance_point_line(hand_cds[13], hand_cds[17], hand_cds[20]):
gesture_name = "5"
elif (distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) - distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12])) / mean_bone_length < 0.2 and\
(distance_point_line(hand_cds[12], hand_cds[13], hand_cds[16]) - distance_point_line(hand_cds[9], hand_cds[13], hand_cds[16])) / mean_bone_length > 0.5 and\
(distance_point_line(hand_cds[16], hand_cds[17], hand_cds[20]) - distance_point_line(hand_cds[13], hand_cds[17], hand_cds[20])) / mean_bone_length < 0.2:
gesture_name = "Vulcan_salute"
elif (distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) - distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12])) / mean_bone_length < 0.2 and\
(distance_point_line(hand_cds[12], hand_cds[13], hand_cds[16]) - distance_point_line(hand_cds[9], hand_cds[13], hand_cds[16])) / mean_bone_length < 0.2 and\
(distance_point_line(hand_cds[16], hand_cds[17], hand_cds[20]) - distance_point_line(hand_cds[13], hand_cds[17], hand_cds[20])) / mean_bone_length < 0.2 and\
distance_point_plane(hand_cds[4], hand_cds[0], hand_cds[5], hand_cds[17]) / mean_bone_length < 1 :
gesture_name = "stop"
if gesture_name == 'unknown' and hand_angle < 100 and (hand_state[0] == 0 and min_thumb_dis >= 0.5) and hand_state[1] >= 1 and hand_state[2] >= 1 and hand_state[3] >= 1 and hand_state[4] == 0 and\
np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[12,:2] - hand_cds[4,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[16,:2] - hand_cds[4,:2]) / mean_bone_length > 1.5 and\
np.linalg.norm(hand_cds[8,:2] - hand_cds[20,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[12,:2] - hand_cds[20,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[16,:2] - hand_cds[20,:2]) / mean_bone_length > 1.5:
gesture_name = "6"
elif hand_angle < 100 and (hand_state[0] == 0 and min_thumb_dis >= 0.5) and hand_state[1] == 0 and hand_state[2] >= 1 and hand_state[3] >= 1 and (hand_state[4] == 0 or hand_state[4] == -2) and\
np.linalg.norm(hand_cds[8,:2] - hand_cds[12,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[12,:2] - hand_cds[4,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[16,:2] - hand_cds[4,:2]) / mean_bone_length > 1.5 and\
np.linalg.norm(hand_cds[8,:2] - hand_cds[16,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[12,:2] - hand_cds[20,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[16,:2] - hand_cds[20,:2]) / mean_bone_length > 1.5:
gesture_name = "6+1"
elif hand_angle < 100 and ((hand_state[0] >= 1 or hand_state[0] == -2) and hand_state[1] >= 1 and hand_state[2] == 0 and hand_state[3] == 0 and hand_state[4] == 0) or\
hand_state[2] == 0 and hand_state[3] == 0 and hand_state[4] == 0 and np.linalg.norm(hand_cds[4,:2] - hand_cds[8,:2]) / mean_bone_length < 0.5 and np.linalg.norm(hand_cds[4] - hand_cds[12]) / mean_bone_length > 2 and\
np.linalg.norm(hand_cds[4] - hand_cds[16]) / mean_bone_length > 2 and np.linalg.norm(hand_cds[4] - hand_cds[20]) / mean_bone_length > 2 and\
distance_point_line(hand_cds[12], hand_cds[13], hand_cds[16]) > distance_point_line(hand_cds[9], hand_cds[13], hand_cds[16]) and\
distance_point_line(hand_cds[16], hand_cds[17], hand_cds[20]) > distance_point_line(hand_cds[13], hand_cds[17], hand_cds[20]):
gesture_name = "ok"
elif hand_angle < 100 and (np.linalg.norm(hand_cds[4] - hand_cds[12]) / mean_bone_length < 0.6) and \
np.linalg.norm(hand_cds[4] - hand_cds[8]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[20]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[16]) / mean_bone_length > 1 and\
np.linalg.norm(hand_cds[12] - hand_cds[8]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[12] - hand_cds[20]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[12] - hand_cds[16]) / mean_bone_length > 1.5 and\
hand_state[3] == 0 and hand_state[4] == 0:
gesture_name = "Orchid1"
elif hand_angle < 100 and (np.linalg.norm(hand_cds[4] - hand_cds[12]) / mean_bone_length < 0.6) and \
(np.linalg.norm(hand_cds[4] - hand_cds[16]) / mean_bone_length < 0.6 or np.linalg.norm(hand_cds[3] - hand_cds[16]) / mean_bone_length < 0.6 or np.linalg.norm(hand_cds[2] - hand_cds[16]) / mean_bone_length < 0.6) and\
np.linalg.norm(hand_cds[4] - hand_cds[8]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[20]) / mean_bone_length > 1 and hand_state[4] == 0:
gesture_name = "Orchid2"
elif hand_angle < 100 and (hand_state[0] == 0 and min_thumb_dis >= 0.5) and (hand_state[1] == 0 or hand_state[1] == -2) and hand_state[2] >= 1 and hand_state[3] >= 1 and hand_state[4] >= 1 and\
np.linalg.norm(hand_cds[12,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[16,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and\
np.linalg.norm(hand_cds[20,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 2:
gesture_name = "8"
elif hand_angle < 100 and (hand_state[0] == 0 and min_thumb_dis >= 0.5) and (hand_state[1] == 0 or hand_state[1] == -2) and (hand_state[2] == 0 or hand_state[2] == -2) and hand_state[3] >= 1 and hand_state[4] >= 1 and\
distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) > distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12]) and np.linalg.norm(hand_cds[16,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and\
np.linalg.norm(hand_cds[20,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[12,:2] - hand_cds[16,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[20,:2] - hand_cds[12,:2]) / mean_bone_length > 1 and\
np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 2 and np.linalg.norm(hand_cds[12,:2] - hand_cds[4,:2]) / mean_bone_length > 2:
gesture_name = "3_variant"
elif (hand_state[0] >= 1 or min_thumb_dis < 1 or hand_state[0] == -2) and hand_state[1] == 1 and hand_state[2] == 2 and hand_state[3] == 2 and hand_state[4] == 2 and \
np.linalg.norm(hand_cds[8,:2] - hand_cds[12,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[20]) / mean_bone_length < 1.8:
if hand_angle < 60:
gesture_name = "9"
else:
gesture_name = "seduce"
if gesture_name == 'unknown' and hand_state[1] in [1, 2, -2] and hand_state[2] in [1, 2, -2] and hand_state[3] in [1, 2, -2] and\
distance_point_plane(hand_cds[8], hand_cds[0], hand_cds[5], hand_cds[17]) / mean_bone_length > 0.7 and\
distance_point_plane(hand_cds[12], hand_cds[0], hand_cds[5], hand_cds[17]) / mean_bone_length > 0.7 and\
distance_point_plane(hand_cds[16], hand_cds[0], hand_cds[5], hand_cds[17]) / mean_bone_length > 0.7 and\
distance_point_plane(hand_cds[20], hand_cds[0], hand_cds[5], hand_cds[17]) / mean_bone_length > 0.7:
if np.linalg.norm(hand_cds[4,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[5]) / mean_bone_length > 1 and\
np.linalg.norm(hand_cds[4] - hand_cds[9]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[9]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[17]) / mean_bone_length > 1:
gesture_name = "catch"
elif np.linalg.norm(hand_cds[4,:2] - hand_cds[8,:2]) / mean_bone_length < 1 and np.linalg.norm(hand_cds[4,:2] - hand_cds[12,:2]) / mean_bone_length < 1 and\
np.linalg.norm(hand_cds[4,:2] - hand_cds[16,:2]) / mean_bone_length < 1 and np.linalg.norm(hand_cds[4,:2] - hand_cds[20,:2]) / mean_bone_length < 1:
gesture_name = "0"
if details:
prompt = ["直立", '半弯', '全弯', "根弯", 'unknown']
print(f"手部平均大小{
mean_bone_length:.3f}",
f"手的方向{
hand_angle:.3f}",
f'法向量方向{
hand_phi:.3f}',
"1指"+prompt[hand_state[0]],
"2指"+prompt[hand_state[1]],
"3指"+prompt[hand_state[2]],
"4指"+prompt[hand_state[3]],
"5指"+prompt[hand_state[4]],
"检测结果:",gesture_name )
return gesture_name
# 非极大值抑制
def non_max_suppression(boxes, overlap_thresh):
splits = []
for bbox in boxes:
add_flag = False
bbox_area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
for split in splits:
for split_item in split:
# 计算交集
# 计算两个边界框之间的交集
xA = max(split_item[0], bbox[0])
yA = max(split_item[1], bbox[1])
xB = min(split_item[2], bbox[2])
yB = min(split_item[3], bbox[3])
# 计算交集的宽度和高度
interArea = max(0, xB - xA) * max(0, yB - yA)
item_area = (split_item[2] - split_item[0]) * (split_item[3] - split_item[1])
# overlap = interArea / (item_area + bbox_area - interArea)
overlap = interArea / min(item_area, bbox_area)
if overlap > overlap_thresh:
add_flag = True
break
if add_flag:
split.append(bbox)
break
if not add_flag:
splits.append([bbox])
# print(splits)
nms_boxes, nms_ids = [], []
for split in splits:
max_area = 0
max_area_id = -1
for split_item in split:
item_area = (split_item[2] - split_item[0]) * (split_item[3] - split_item[1])
if item_area > max_area:
max_area = item_area
max_area_id = boxes.index(split_item)
# print(max_area_id)
if max_area_id != -1:
nms_boxes.append(boxes[max_area_id])
nms_ids.append(max_area_id)
# print(nms_boxes)
# print(nms_ids)
return nms_boxes, nms_ids
然后新建一个hand_detection.py
文件写入
import cv2
import mediapipe as mp
import time
import numpy as np
from utils import process_hand, non_max_suppression
import os
class HandDetection():
def __init__(self, max_num_hands=2, # 最多检测几只手
min_detection_confidence=0.8, # 置信度阈值
min_tracking_confidence=0.5, # 追踪阈值
overlap_thresh=0.8, # 非极大值抑制阈值
):
self.mp_hands = mp.solutions.hands
self.hands = self.mp_hands.Hands(static_image_mode=False,
max_num_hands=max_num_hands,
min_detection_confidence=min_detection_confidence,
min_tracking_confidence=min_tracking_confidence
)
self.mpDraw = mp.solutions.drawing_utils
self.overlap_thresh = overlap_thresh
self.frame = None
self.results = None
self.gestures = []
self.nms_id = []
self.nms_gesture = []
# 检测单张图像
def process_frame(self, img, details=False):
self.get_gesture(img, details)
img = self.frame
if self.results.multi_hand_landmarks: # 如果有检测到手
# 遍历每一只检测出的手
# for hand_idx in range(len(self.results.multi_hand_landmarks)):
for hand_idx in self.nms_id:
# self.mpDraw.plot_landmarks(self.results.multi_hand_landmarks[0], self.mp_hands.HAND_CONNECTIONS)
if details:
hand_pixels = self.results.multi_hand_landmarks[hand_idx] # 获取该手的所有关键点坐标
self.mpDraw.draw_landmarks(img, hand_pixels, self.mp_hands.HAND_CONNECTIONS) # 可视化
item = self.gestures[hand_idx]
cv2.line(img, (item[0], item[1]), (item[0] + 20, item[1]), (255, 0, 0), 4)
cv2.line(img, (item[0], item[1]), (item[0], item[1] + 20), (255, 0, 0), 4)
cv2.line(img, (item[2], item[1]), (item[2] - 20, item[1]), (255, 0, 0), 4)
cv2.line(img, (item[2], item[1]), (item[2], item[1] + 20), (255, 0, 0), 4)
cv2.line(img, (item[0], item[3]), (item[0] + 20, item[3]), (255, 0, 0), 4)
cv2.line(img, (item[0], item[3]), (item[0], item[3] - 20), (255, 0, 0), 4)
cv2.line(img, (item[2], item[3]), (item[2] - 20, item[3]), (255, 0, 0), 4)
cv2.line(img, (item[2], item[3]), (item[2], item[3] - 20), (255, 0, 0), 4)
# cv2.rectangle(img, (item[0], item[1]), (item[2], item[3]), (255, 0, 0), 2)
cv2.putText(img, item[4], (item[0], item[1]-8), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 8, cv2.LINE_AA)
cv2.putText(img, item[4], (item[0], item[1]-8), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), 2, cv2.LINE_AA)
return img
# 获得姿态
def get_gesture(self, img, details=False):
self.frame = img
self.results = self.hands.process(img)
self.gestures = []
# 检查是否检测到手部
if self.results.multi_hand_landmarks:
for hand_landmarks in self.results.multi_hand_landmarks:
# 遍历每个关键点
hand_cds = np.zeros((21, 3), dtype='float32')
for idx, landmark in enumerate(hand_landmarks.landmark):
hand_cds[idx] = [landmark.x, landmark.y, landmark.z]
x_min = min(hand_cds[:, 0])
y_min = min(hand_cds[:, 1])
x_max = max(hand_cds[:, 0])
y_max = max(hand_cds[:, 1])
# print(x_min, y_min, x_max, y_max)
# x_min -= 0.1 * (x_max - x_min)
# x_max += 0.1 * (x_max - x_min)
# y_min -= 0.1 * (y_max - y_min)
# y_max += 0.1 * (y_max - y_min)
x_min = max(int(x_min * self.frame.shape[1]),0)
y_min = max(int(y_min * self.frame.shape[0]),0)
x_max = min(int(x_max * self.frame.shape[1]),self.frame.shape[1])
y_max = min(int(y_max * self.frame.shape[0]),self.frame.shape[0])
gesture = process_hand(hand_cds, details)
self.gestures.append([x_min, y_min, x_max, y_max, gesture])
# 非极大值抑制
self.nms_gesture, self.nms_id = non_max_suppression(self.gestures, self.overlap_thresh)
# if len(self.nms_gesture) != len(self.gestures):
# print('nmsed!')
return self.nms_gesture
# 检测3d模型
def draw_3d_module(self, img):
img = cv2.flip(img, 1)
self.results = self.hands.process(img)
if self.results.multi_hand_landmarks:
self.mpDraw.plot_landmarks(self.results.multi_hand_landmarks[0], self.mp_hands.HAND_CONNECTIONS)
def run():
hand_detection = HandDetection()
capture = cv2.VideoCapture(0)
ref, frame = capture.read()
if not ref:
raise ValueError("未能正确读取摄像头(视频),请注意是否正确安装摄像头(是否正确填写视频路径)。")
fps = 0.0
while(True):
t1 = time.time()
# 读取某一帧
ref, frame = capture.read()
if not ref:
break
frame = cv2.flip(frame, 1)
frame = hand_detection.process_frame(frame, details=True)
# if len(hand_detection.gestures):
# print(hand_detection.gestures)
alpha = 0.9
if time.time()-t1 > 0:
fps = alpha*fps + (1-alpha)*(1./(time.time()-t1))
frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow("video",frame)
key = cv2.waitKey(1) & 0xff
if key == 27: # Esc
capture.release()
break
# 按 Q 键保存裁剪图像
if key == ord('q'):
save_root = './results/hand_gestures/'
if not os.path.exists(save_root):
# 如果路径不存在,则创建路径
os.makedirs(save_root)
for gesture in hand_detection.gestures:
i = 0
while os.path.exists(save_root+gesture[4]+'-'+str(i)+'.jpg'):
i += 1
img_crop = frame[gesture[1]:gesture[3], gesture[0]:gesture[2]]
cv2.imwrite(save_root+gesture[4]+'-'+str(i)+'.jpg', img_crop)
print(save_root+gesture[4]+'-'+str(i)+'.jpg','Captured!')
print("Video Detection Done!")
capture.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
run()
直接运行hand_detection.py
即可