diff --git a/README.md b/README.md index 9901905..551182e 100644 --- a/README.md +++ b/README.md @@ -350,7 +350,41 @@ When deploying the code on headless environments like [Google Colab](https://git To overcome these errors just use the --novisualization switch to disable visualization windows -## Higher accuracy using OpenPose JSON files + + + +## Higher accuracy with relatively little work using Mediapipe Holistic +------------------------------------------------------------------ +To convert video files ready for use as input to MocapNET in a *relatively* easy way I have included a python converter that uses mediapipe/opencv to create the CSV files needed for MocapNET. + +![MediaPipe Video 2 CSV utility](https://raw.githubusercontent.com/FORTH-ModelBasedTracker/MocapNET/master/doc/mediapipeConverter.jpg) + +You can get mediapipe using the [setup.sh](https://github.com/FORTH-ModelBasedTracker/MocapNET/blob/master/src/python/mediapipe/setup.sh) script or by executing + +``` +pip install --user mediapipe opencv-python +``` + +The converter utility receives an input video stream and creates an output directory with all image frames and the CSV file with 2D joint estimations. + +``` +python3 src/python/mediapipe/mediapipeHolistic2CSV.py --from shuffle.webm -o tester +``` + +After the conversion finishes you can process the generated "dataset" using MocapNET2CSV + +``` +./MocapNET2CSV --from tester-mpdata/2dJoints_mediapipe.csv --show 3 +``` + + + + + + + + +## Higher accuracy with more work deploying Caffe/OpenPose and using OpenPose JSON files ------------------------------------------------------------------ In order to get higher accuracy output compared to the live demo which is more performance oriented, you can use OpenPose and the 2D output JSON files produced by it. The convertOpenPoseJSONToCSV application can convert them to a BVH file. After downloading [OpenPose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) and building it you can use it to acquire 2D JSON body pose data by running : diff --git a/src/python/mediapipe/RHD.py b/src/python/mediapipe/RHD.py old mode 100644 new mode 100755 index bbb31c9..f521e6c --- a/src/python/mediapipe/RHD.py +++ b/src/python/mediapipe/RHD.py @@ -1,3 +1,5 @@ +#!/usr/bin/python3 + import os import sys import cv2 diff --git a/src/python/mediapipe/handsWebcam.py b/src/python/mediapipe/handsWebcam.py old mode 100644 new mode 100755 index 5613912..4589174 --- a/src/python/mediapipe/handsWebcam.py +++ b/src/python/mediapipe/handsWebcam.py @@ -1,3 +1,4 @@ +#!/usr/bin/python3 import cv2 import mediapipe as mp import time diff --git a/src/python/mediapipe/holisticPartNames.py b/src/python/mediapipe/holisticPartNames.py index 10a91cf..7611abb 100644 --- a/src/python/mediapipe/holisticPartNames.py +++ b/src/python/mediapipe/holisticPartNames.py @@ -1,4 +1,62 @@ + +def processPoseLandmarks(mnetPose2D,correctLabels,holisticPose): + itemNumber=0 + if holisticPose is not None: + for item in holisticPose.landmark: + thisLandmarkName = correctLabels[itemNumber].lower() + if (thisLandmarkName!=''): + labelX = "2DX_"+thisLandmarkName + mnetPose2D[labelX]=1.0-item.x #Do Flip X + labelY = "2DY_"+thisLandmarkName + mnetPose2D[labelY]=item.y + labelV = "visible_"+thisLandmarkName + mnetPose2D[labelV]=item.visibility + #print("Joint ",thisLandmarkName,"(",itemNumber,") x=",item.x," y=",item.y," z=",item.z) + itemNumber = itemNumber +1 + return mnetPose2D + + +def guessLandmarks(mnetPose2D): + if mnetPose2D is not None: + if ("2DX_rshoulder" in mnetPose2D) and ("2DY_rshoulder" in mnetPose2D) and ("visible_rshoulder" in mnetPose2D) and ("2DX_lshoulder" in mnetPose2D) and ("2DY_lshoulder" in mnetPose2D) and ("visible_lshoulder" in mnetPose2D) : + #--------------------------------------------- + rX = float(mnetPose2D["2DX_rshoulder"]) + rY = float(mnetPose2D["2DY_rshoulder"]) + rV = float(mnetPose2D["visible_rshoulder"]) + #--------------------------------------------- + lX = float(mnetPose2D["2DX_lshoulder"]) + lY = float(mnetPose2D["2DY_lshoulder"]) + lV = float(mnetPose2D["visible_lshoulder"]) + #--------------------------------------------- + if (rV>0.0) and (lV>0.0): + mnetPose2D["2DX_neck"]=(rX+lX)/2 + mnetPose2D["2DY_neck"]=(rY+lY)/2 + mnetPose2D["visible_neck"]=(rV+lV)/2 + #--------------------------------------------------- + + if ("2DX_rhip" in mnetPose2D) and ("2DY_rhip" in mnetPose2D) and ("visible_rhip" in mnetPose2D) and ("2DX_lhip" in mnetPose2D) and ("2DY_lhip" in mnetPose2D) and ("visible_lhip" in mnetPose2D) : + #--------------------------------------------- + rX = float(mnetPose2D["2DX_rhip"]) + rY = float(mnetPose2D["2DY_rhip"]) + rV = float(mnetPose2D["visible_rhip"]) + #--------------------------------------------- + lX = float(mnetPose2D["2DX_lhip"]) + lY = float(mnetPose2D["2DY_lhip"]) + lV = float(mnetPose2D["visible_lhip"]) + #--------------------------------------------- + if (rV>0.0) and (lV>0.0): + mnetPose2D["2DX_hip"]=(rX+lX)/2 + mnetPose2D["2DY_hip"]=(rY+lY)/2 + mnetPose2D["visible_hip"]=(rV+lV)/2 + #--------------------------------------------------- + return mnetPose2D + + + + + + #MocapNET list of expected inputs #frameNumber,skeletonID,totalSkeletons,2DX_head,2DY_head,visible_head,2DX_neck,2DY_neck,visible_neck,2DX_rshoulder,2DY_rshoulder,visible_rshoulder,2DX_relbow,2DY_relbow,visible_relbow,2DX_rhand,2DY_rhand,visible_rhand,2DX_lshoulder,2DY_lshoulder,visible_lshoulder,2DX_lelbow,2DY_lelbow,visible_lelbow,2DX_lhand,2DY_lhand,visible_lhand,2DX_hip,2DY_hip,visible_hip,2DX_rhip,2DY_rhip,visible_rhip,2DX_rknee,2DY_rknee,visible_rknee,2DX_rfoot,2DY_rfoot,visible_rfoot,2DX_lhip,2DY_lhip,visible_lhip,2DX_lknee,2DY_lknee,visible_lknee,2DX_lfoot,2DY_lfoot,visible_lfoot,2DX_endsite_eye.r,2DY_endsite_eye.r,visible_endsite_eye.r,2DX_endsite_eye.l,2DY_endsite_eye.l,visible_endsite_eye.l,2DX_rear,2DY_rear,visible_rear,2DX_lear,2DY_lear,visible_lear,2DX_endsite_toe1-2.l,2DY_endsite_toe1-2.l,visible_endsite_toe1-2.l,2DX_endsite_toe5-3.l,2DY_endsite_toe5-3.l,visible_endsite_toe5-3.l,2DX_lheel,2DY_lheel,visible_lheel,2DX_endsite_toe1-2.r,2DY_endsite_toe1-2.r,visible_endsite_toe1-2.r,2DX_endsite_toe5-3.r,2DY_endsite_toe5-3.r,visible_endsite_toe5-3.r,2DX_rheel,2DY_rheel,visible_rheel,2DX_bkg,2DY_bkg,visible_bkg, diff --git a/src/python/mediapipe/holisticWebcam.py b/src/python/mediapipe/holisticWebcam.py old mode 100644 new mode 100755 index 9387de8..4fb1008 --- a/src/python/mediapipe/holisticWebcam.py +++ b/src/python/mediapipe/holisticWebcam.py @@ -1,3 +1,5 @@ +#!/usr/bin/python3 + import cv2 import mediapipe as mp import time @@ -33,14 +35,17 @@ end = time.time() # Time elapsed seconds = end - start - #print ("Time taken : {0} seconds".format(seconds)) # Calculate frames per second fps = 1 / seconds - print("{0} fps".format(fps)) - + print("\r Framerate : ",round(fps,2)," fps \r", end="", flush=True) annotated_image = image.copy() - mp_drawing.draw_landmarks(annotated_image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) + #Compensate for name mediapipe change.. + try: + mp_drawing.draw_landmarks(annotated_image, results.face_landmarks , mp_holistic.FACEMESH_TESSELATION) #This used to be called FACE_CONNECTIONS + except: + mp_drawing.draw_landmarks(annotated_image, results.face_landmarks , mp_holistic.FACE_CONNECTIONS) #This used to be called FACE_CONNECTIONS + mp_drawing.draw_landmarks(annotated_image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) mp_drawing.draw_landmarks(annotated_image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Use mp_holistic.UPPER_BODY_POSE_CONNECTIONS for drawing below when diff --git a/src/python/mediapipe/mediapipeHolistic2CSV.py b/src/python/mediapipe/mediapipeHolistic2CSV.py index ca79ce6..8ed1cbc 100755 --- a/src/python/mediapipe/mediapipeHolistic2CSV.py +++ b/src/python/mediapipe/mediapipeHolistic2CSV.py @@ -11,9 +11,8 @@ mp_drawing = mp.solutions.drawing_utils mp_holistic = mp.solutions.holistic - #I have added a seperate list with the joints -from holisticPartNames import getHolisticBodyNameList, getHolisticFaceNameList +from holisticPartNames import getHolisticBodyNameList, getHolisticFaceNameList, processPoseLandmarks, guessLandmarks MEDIAPIPE_POSE_LANDMARK_NAMES=getHolisticBodyNameList() MEDIAPIPE_FACE_LANDMARK_NAMES=getHolisticFaceNameList() @@ -102,60 +101,6 @@ def drawListNumbers(image,lst): -def processPoseLandmarks(mnetPose2D,correctLabels,holisticPose): - itemNumber=0 - if holisticPose is not None: - for item in holisticPose.landmark: - thisLandmarkName = correctLabels[itemNumber].lower() - if (thisLandmarkName!=''): - labelX = "2DX_"+thisLandmarkName - mnetPose2D[labelX]=1.0-item.x #Do Flip X - labelY = "2DY_"+thisLandmarkName - mnetPose2D[labelY]=item.y - labelV = "visible_"+thisLandmarkName - mnetPose2D[labelV]=item.visibility - #print("Joint ",thisLandmarkName,"(",itemNumber,") x=",item.x," y=",item.y," z=",item.z) - itemNumber = itemNumber +1 - return mnetPose2D - - -def guessLandmarks(mnetPose2D): - if mnetPose2D is not None: - if ("2DX_rshoulder" in mnetPose2D) and ("2DY_rshoulder" in mnetPose2D) and ("visible_rshoulder" in mnetPose2D) and ("2DX_lshoulder" in mnetPose2D) and ("2DY_lshoulder" in mnetPose2D) and ("visible_lshoulder" in mnetPose2D) : - #--------------------------------------------- - rX = float(mnetPose2D["2DX_rshoulder"]) - rY = float(mnetPose2D["2DY_rshoulder"]) - rV = float(mnetPose2D["visible_rshoulder"]) - #--------------------------------------------- - lX = float(mnetPose2D["2DX_lshoulder"]) - lY = float(mnetPose2D["2DY_lshoulder"]) - lV = float(mnetPose2D["visible_lshoulder"]) - #--------------------------------------------- - if (rV>0.0) and (lV>0.0): - mnetPose2D["2DX_neck"]=(rX+lX)/2 - mnetPose2D["2DY_neck"]=(rY+lY)/2 - mnetPose2D["visible_neck"]=(rV+lV)/2 - #--------------------------------------------------- - - if ("2DX_rhip" in mnetPose2D) and ("2DY_rhip" in mnetPose2D) and ("visible_rhip" in mnetPose2D) and ("2DX_lhip" in mnetPose2D) and ("2DY_lhip" in mnetPose2D) and ("visible_lhip" in mnetPose2D) : - #--------------------------------------------- - rX = float(mnetPose2D["2DX_rhip"]) - rY = float(mnetPose2D["2DY_rhip"]) - rV = float(mnetPose2D["visible_rhip"]) - #--------------------------------------------- - lX = float(mnetPose2D["2DX_lhip"]) - lY = float(mnetPose2D["2DY_lhip"]) - lV = float(mnetPose2D["visible_lhip"]) - #--------------------------------------------- - if (rV>0.0) and (lV>0.0): - mnetPose2D["2DX_hip"]=(rX+lX)/2 - mnetPose2D["2DY_hip"]=(rY+lY)/2 - mnetPose2D["visible_hip"]=(rV+lV)/2 - #--------------------------------------------------- - return mnetPose2D - - - def convertStreamToMocapNETCSV(): videoFilePath ="shuffle.webm" outputDatasetPath="frames/shuffle.webm" @@ -226,7 +171,12 @@ def convertStreamToMocapNETCSV(): print("\r Frame : ",frameNumber," | ",round(fps,2)," fps \r", end="", flush=True) annotated_image = image.copy() - mp_drawing.draw_landmarks(annotated_image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) + #Compensate for name mediapipe change.. + try: + mp_drawing.draw_landmarks(annotated_image, results.face_landmarks , mp_holistic.FACEMESH_TESSELATION) #This used to be called FACE_CONNECTIONS + except: + mp_drawing.draw_landmarks(annotated_image, results.face_landmarks , mp_holistic.FACE_CONNECTIONS) #This used to be called FACE_CONNECTIONS + mp_drawing.draw_landmarks(annotated_image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) mp_drawing.draw_landmarks(annotated_image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Use mp_holistic.UPPER_BODY_POSE_CONNECTIONS for drawing below when upper_body_only is set to True. diff --git a/src/python/mediapipe/setup.sh b/src/python/mediapipe/setup.sh index 937dcb3..e1e7c0b 100755 --- a/src/python/mediapipe/setup.sh +++ b/src/python/mediapipe/setup.sh @@ -1,4 +1,12 @@ python3 -m venv mp_env source mp_env/bin/activate -pip install mediapipe opencv-python + +#For RPI4 +#sudo apt install libxcb-shm0 libcdio-paranoia-dev libsdl2-2.0-0 libxv1 libtheora0 libva-drm2 libva-x11-2 libvdpau1 libharfbuzz0b libbluray2 libatlas-base-dev libhdf5-103 libgtk-3-0 libdc1394-22 libopenexr23 +#pip install mediapipe-rpi4 opencv-python + +#For Regular x86_64 +python3 -m pip install mediapipe opencv-python python3 holisticWebcam.py + +