Python OpenCV practice-use mediapipe to make a gesture mouse (simple version)

        Use mediapipe's gesture recognition module to encapsulate a HandDetector, and then make a gesture virtual mouse based on this class.

        Gesture usage: The index finger of one hand (hand0) performs mouse movement, and the index finger of the other hand (hand1) performs click operation.

        For the pure hand training project, I only briefly performed the left-click function, and did not make judgments about left and right hands. The order of hand0 and hand1 will be affected by which hand is detected first, and can be optimized.

        The mouse operation uses pynput, which can be installed directly with pip install pynput.

        Obtaining the screen resolution uses tkinter, which is built-in in Python and does not require special installation.

        Without further ado, here’s the code:

import cv2 as cv
import math
import mediapipe as mp
import time
import ctypes
#使用pynput做鼠标控制,安装使用pip install pynput
from pynput import mouse
#使用tkinter获得Windows显示器的分辨率
import tkinter as tk

#在Windows系统支持应用缩放功能的时候,pynput的click会有点问题,需要用下面的语句进行修复
#https://blog.csdn.net/qq_33303386/article/details/133746983
#https://ask.csdn.net/questions/7471494
PROCESS_PER_MONITOR_DPI_AWARE = 1
ctypes.windll.shcore.SetProcessDpiAwareness(PROCESS_PER_MONITOR_DPI_AWARE)
#获得屏幕缩放比例,如果使用上面的DPI设置后,还是没有获得真实的分辨率,
#请考虑使用缩放比率计算原始屏幕分辨率或者将缩放比率设为100%
#我没有遇到问题,因此这个值没有用到
screenScale = ctypes.windll.shcore.GetScaleFactorForDevice(0)

print("Screen Scaling Factor:", screenScale)

class HandDetector():
    def __init__(self, mode=False,
                 maxNumHands=2,
                 modelComplexity=1,
                 minDetectionConfidence=0.5,
                 minTrackingConfidence=0.5):
        self.mode = mode
        self.maxNumHands = maxNumHands
        self.modelComplexity = modelComplexity
        self.minDetectionConfidence = minDetectionConfidence
        self.minTrackingConfidence = minTrackingConfidence
        #创建mediapipe的solutions.hands对象
        self.mpHands = mp.solutions.hands
        self.handsDetector = self.mpHands.Hands(self.mode, self.maxNumHands, self.modelComplexity, self.minDetectionConfidence, self.minTrackingConfidence)
        #创建mediapipe的绘画工具
        self.mpDrawUtils = mp.solutions.drawing_utils

    def findHands(self, img, drawOnImage=True):
        #mediapipe手部检测器需要输入图像格式为RGB
        #cv默认的格式是BGR,需要转换
        imgRGB = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        #调用手部检测器的process方法进行检测
        self.results = self.handsDetector.process(imgRGB)
    
        #如果multi_hand_landmarks有值表示检测到了手
        if self.results.multi_hand_landmarks:
            #print("Hands Detected:", len(self.results.multi_hand_landmarks))
            #遍历每一只手的landmarks
            for handLandmarks in self.results.multi_hand_landmarks:
                if drawOnImage:
                    self.mpDrawUtils.draw_landmarks(img, handLandmarks, self.mpHands.HAND_CONNECTIONS)
        return img;

    #从结果中查询某只手的landmark list
    def findHandPositions(self, img, handID=0, drawOnImage=True, drawColor=(0,255,0)):
        landmarkList = []
        if self.results.multi_hand_landmarks:
            if (handID >= len(self.results.multi_hand_landmarks)):
                return landmarkList
            handLandmarks = self.results.multi_hand_landmarks[handID]
            for id,landmark in enumerate(handLandmarks.landmark):
                #处理每一个landmark,将landmark里的X,Y(比例)转换为帧数据的XY坐标
                h,w,c = img.shape
                centerX,centerY = int(landmark.x * w), int(landmark.y * h)
                landmarkList.append([id, centerX, centerY])
                if (drawOnImage):
                    #将landmark绘制成圆
                    cv.circle(img, (centerX,centerY), 8, drawColor)
        return landmarkList

def DisplayFPS(img, preTime):
    curTime = time.time()
    if (curTime - preTime == 0):
        return curTime;
    fps = 1 / (curTime - preTime)
    cv.putText(img, "FPS:" + str(int(fps)), (10,70), cv.FONT_HERSHEY_PLAIN,
              3, (0,255,0), 3)
    return curTime

def MouseMoveRel(mouseController, relX,relY):
    mouseController.move(relX, relY)

def MouseMoveAbs(mouseController, x, y):
    mouseController.position = (x,y)

def MouseButtonDown(mouseController, button):
    mouseController.press(button)

def MouseButtonUp(mouseController, button):
    mouseController.release(button)

def GetScreenSize():
    root = tk.Tk()
    screenW = root.winfo_screenwidth()
    screenH = root.winfo_screenheight()
    root.destroy()
    return (screenW,screenH)

#视频帧里鼠标移动区域的坐标(归一化)转换为屏幕的坐标
def FrameXY2ScreenXY(frameX, frameY, vMouseRectInfo, screenW, screenH):
    (x1,y1,x2,y2,w,h) = vMouseRectInfo
    vMouseX = frameX - x1
    if (vMouseX < 0):
        vMouseX = 0
    if (vMouseX > w):
        vMouseX = w

    vMouseY = frameY - y1
    if (vMouseY < 0):
        vMouseY = 0
    if (vMouseY >= h):
        vMouseY = h
        
    #归一化
    vMouseX = vMouseX / w
    vMouseY = vMouseY / h
    #print("vritual mouse at:", vMouseX, vMouseY)
    return (vMouseX * screenW, vMouseY * screenH)

#消除抖动,判断当前鼠标位置和上一次鼠标位置是否在以上一次鼠标位置为中心的一小片范围内(使用的是圆形检测)
def MouseDebounce(curX, curY, lastX, lastY, radius):
    distance = math.hypot(curX - lastX, curY - lastY)
    if distance > radius:
        return (curX, curY)
    else:
        return (lastX, lastY)

videoW = 640
videoH = 480
videoFlipX = True
#食指指尖低于大拇指指尖的距离是多少会产生鼠标按下的event
clickEventThreshold = 15

def main():
    #video = cv.VideoCapture('../../SampleVideos/mouseMove.mp4')
    video = cv.VideoCapture(0)
    #FPS显示
    preTime = 0
    handDetector = HandDetector(minDetectionConfidence=0.7)
    #获得鼠标控制对象
    mouseController = mouse.Controller()
    mouseLastX = -1
    mouseLastY = -1
    #获得屏幕分辨率
    screenW,screenH = GetScreenSize()
    print("Screen Size: " + str(screenW) + "x" + str(screenH))

    #摄像头视频帧的宽和高
    frameW = int(video.get(3))
    frameH = int(video.get(4))
    print("Camera Frame Resolution:", frameW,frameH)
    #虚拟鼠标移动范围是以摄像头画面中心为中点的一个矩形区域(大小可以调整,代码里用的是2/3)
    vMouseMoveAreaRatio = 2/3
    vMouseRectX,vMouseRectY = frameW / 2,frameH / 2
    vMouseRectW,vMouseRectH = frameW * vMouseMoveAreaRatio, frameH * vMouseMoveAreaRatio
    vMouseRectTopLeftX, vMouseRectTopLeftY = int(vMouseRectX - vMouseRectW / 2), int(vMouseRectY - vMouseRectH / 2)
    vMouseRectBtmRightX,vMouseRectBtmRightY = int(vMouseRectX + vMouseRectW / 2), int(vMouseRectY + vMouseRectH / 2)
    vMouseRectInfo = (vMouseRectTopLeftX, vMouseRectTopLeftY, vMouseRectBtmRightX, vMouseRectBtmRightY, vMouseRectW, vMouseRectH)
    mouseButtonDown = False
    while True:
        ret,frame = video.read()
        if ret == False:
            break;
        if (videoFlipX):
            frame = cv.flip(frame, 1)
        frame = handDetector.findHands(frame, drawOnImage=True)
        #规定hand0的食指用于鼠标移动,hand1的食指用于鼠标点击
        hand0Landmarks = handDetector.findHandPositions(frame, handID=0)
        hand1Landmarks = handDetector.findHandPositions(frame, handID=1)
        if (len(hand0Landmarks) != 0):
            #取出食指(8)的指尖的点对应的坐标
            indexFingerX,indexFingerY = hand0Landmarks[8][1],hand0Landmarks[8][2]
            #用实心圆突出显示出这个点
            cv.circle(frame, (indexFingerX,indexFingerY), 18, (0,120,255), cv.FILLED)

            mouseX,mouseY = FrameXY2ScreenXY(indexFingerX, indexFingerY, vMouseRectInfo, screenW, screenH)
            #print("MouseX,mouseY = (" + str(mouseX) + "," + str(mouseY) + ")")
            if (mouseLastX >= 0):               
                mouseX, mouseY = MouseDebounce(mouseX, mouseY, mouseLastX, mouseLastY, 10)
            MouseMoveAbs(mouseController, mouseX, mouseY)
            mouseLastX = mouseX
            mouseLastY = mouseY
            cv.putText(frame, "Mouse XY:(" + str(int(mouseX)) + "," + str(int(mouseY)) + ")", (indexFingerX, indexFingerY), cv.FONT_HERSHEY_PLAIN,
              3, (0,255,0), 3)

        if (len(hand1Landmarks) != 0):
            #去除食指指尖[8]和食指第二关节[6]的landmark
            #当指尖的Y坐标小于食指第二关节的坐标时,产生一个click事件
            fingerTipX,fingerTipY = hand1Landmarks[8][1],hand1Landmarks[8][2]
            fingerJointX,fingerJointY = hand1Landmarks[6][1],hand1Landmarks[6][2]
            cv.circle(frame, (fingerTipX,fingerTipY), 18, (0,255,0), cv.FILLED)
            cv.circle(frame, (fingerJointX,fingerJointY), 18, (0,120,255), cv.FILLED)
            if (fingerTipY > fingerJointY + clickEventThreshold):
                if (mouseButtonDown == False):
                    mouseButtonDown = True
                    mouseController.click(mouse.Button.left)
                    print("Mouse Button Clicked!")
            else:
                if (mouseButtonDown == True):
                    mouseButtonDown = False

        preTime = DisplayFPS(frame, preTime)
        #绘制虚拟鼠标移动区域
        cv.rectangle(frame, (vMouseRectTopLeftX, vMouseRectTopLeftY), (vMouseRectBtmRightX, vMouseRectBtmRightY), (0,255,0), 2, cv.FILLED)
        frame = cv.resize(frame, (videoW, videoH))
        cv.imshow('Virtual Hand Mouse', frame)
        cv.setWindowProperty('Virtual Hand Mouse', cv.WND_PROP_TOPMOST, 1) 	# 设置窗口置顶
        if cv.waitKey(1) & 0xFF == ord('q'):
            break;
    video.release()
    cv.destroyAllWindows()

if __name__ == "__main__":
    main()

operation result:

Video reference to my B station:

Python Opencv practice - mediapipe gesture mouse_bilibili_bilibili

Guess you like

Origin blog.csdn.net/vivo01/article/details/135351086