算法思路
通过模型可以得到鼻子,左眼,右眼,左耳,右耳,左肩,右肩,左肘,右肘,左腕,右腕,左臀,右臀,左膝,右膝,左踝,右踝的17个关节点,通过这些关节点设计算法即可判断人体姿态。
姿态估计可以直接使用训练好的模型和权重model=“yolo_nas_pose_l”, weights=“coco_pose”。
算法实现
import cv2
import torch
from PIL import Image
from torchvision import transforms
import super_gradients
from super_gradients.training import models
from super_gradients.common.object_names import Models
import math
import matplotlib
matplotlib.use('TkAgg')
point_name = ['鼻子','左眼','右眼','左耳','右耳','左肩','右肩','左肘','右肘','左腕','右腕','左臀','右臀','左膝','右膝','左踝','右踝']
keypoints = [
"nose",
"left_eye",
"right_eye",
"left_ear",
"right_ear",
"left_shoulder",
"right_shoulder",
"left_elbow",
"right_elbow",
"left_wrist",
"right_wrist",
"left_hip",
"right_hip",
"left_knee",
"right_knee",
"left_ankle",
"right_ankle"
]
transform = transforms.Compose([
transforms.Resize((640, 640)),
transforms.ToTensor(),
])
def image_to_tensor(image_path):
img = Image.open(image_path).convert('RGB')
tensor = transform(img)
tensor = tensor.unsqueeze(0)
return tensor
def pose_piont_txt(poses,creen):
for idx,num_joints in enumerate(poses):
x, y, confidence = num_joints
cv2.putText(creen,f'{
keypoints[idx]}',(int(x),int(y)),fontFace=cv2.FONT_HERSHEY_SIMPLEX ,fontScale=1,color=(255,0,0),thickness=1)
def posture_detection(poses):
nose, left_eye, right_eye, left_ear, right_ear, left_shoulder, right_shoulder, \
left_elbow, right_elbow, left_wrist, right_wrist, left_hip, right_hip, \
left_knee, right_knee, left_ankle, right_ankle = poses
mid_hip = ((left_hip[0] + right_hip[0]) / 2, (left_hip[1] + right_hip[1]) / 2)
# 脊柱角度
spine_angle = math.degrees(math.atan2(mid_hip[1] - nose[1], mid_hip[0] - nose[0]))
# 躯干角度
torso_angle = math.degrees(math.atan2(right_hip[1] - right_shoulder[1], right_hip[0] - right_shoulder[0]))
# 左右腿角度
left_leg_angle = math.degrees(math.atan2(left_ankle[1] - left_knee[1], left_ankle[0] - left_knee[0]))
right_leg_angle = math.degrees(math.atan2(right_ankle[1] - right_knee[1], right_ankle[0] - right_knee[0]))
# 肩部与髋部的相对高度
shoulder_hip_diff = (left_shoulder[1] + right_shoulder[1]) / 2 - (left_hip[1] + right_hip[1]) / 2
# 判断姿势
if abs(spine_angle) > 75 and abs(torso_angle) > 75:
return "Lying Down"
elif abs(shoulder_hip_diff) < 50 and abs(torso_angle) < 30 and min(left_leg_angle, right_leg_angle) < 150:
return "Sitting"
elif abs(shoulder_hip_diff) > 100 and abs(torso_angle) < 30 and max(left_leg_angle, right_leg_angle) > 160:
return "Standing"
else:
return "Standing"
def detect_img():
image_path0 = 'data/test0.jpeg'
image_path1 = 'data/test1.jpeg'
image_path2 = 'data/test2.jpg'
# tensor_image = image_to_tensor(image_path)
# print(tensor_image.shape)
# net = torch.jit.load("yolo_nas_m.pt")
# output = net(tensor_image)
# print(output)
yolo_nas = super_gradients.training.models.get("yolo_nas_pose_l", pretrained_weights="coco_pose").cuda()
frame = cv2.imread(image_path2)
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
txt_img = frame.copy()
model_predictions = yolo_nas.predict(img, conf=0.8)
# pose_image = model_predictions.image
prediction = model_predictions.prediction
bboxes = prediction.bboxes_xyxy
poses = prediction.poses
scores = prediction.scores
for idx in range(len(bboxes)):
x1 = int(bboxes[idx][0])
y1 = int(bboxes[idx][1])
x2 = int(bboxes[idx][2])
y2 = int(bboxes[idx][3])
pose_piont_txt(poses[idx], txt_img)
posture = posture_detection(poses[idx])
creen = txt_img[y1:y2,x1:x2,:]
cv2.putText(img, f'{
posture}', (x1+60, y1), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0, 0, 255), thickness=2)
cv2.imshow(f'creen{
idx}',creen)
model_predictions.show()
cv2.waitKey(0)
def detact_vidio():
vidio_path = 'data/cs4.mp4'
yolo_nas = super_gradients.training.models.get("yolo_nas_pose_l", pretrained_weights="coco_pose").cuda()
model_predictions = yolo_nas.predict(vidio_path, conf=0.8)
model_predictions.save('data/cs5.mp4')
if __name__ == '__main__':
detect_img()
# detact_vidio()
效果图