Skip to content

Commit

Permalink
updated mediapipe dumper and added it on README
Browse files Browse the repository at this point in the history
  • Loading branch information
AmmarkoV committed Jun 1, 2022
1 parent 06f9d43 commit 8b114f2
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 63 deletions.
36 changes: 35 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,41 @@ When deploying the code on headless environments like [Google Colab](https://git
To overcome these errors just use the --novisualization switch to disable visualization windows


## Higher accuracy using OpenPose JSON files



## Higher accuracy with relatively little work using Mediapipe Holistic
------------------------------------------------------------------
To convert video files ready for use as input to MocapNET in a *relatively* easy way I have included a python converter that uses mediapipe/opencv to create the CSV files needed for MocapNET.

![MediaPipe Video 2 CSV utility](https://raw.githubusercontent.com/FORTH-ModelBasedTracker/MocapNET/master/doc/mediapipeConverter.jpg)

You can get mediapipe using the [setup.sh](https://github.com/FORTH-ModelBasedTracker/MocapNET/blob/master/src/python/mediapipe/setup.sh) script or by executing

```
pip install --user mediapipe opencv-python
```

The converter utility receives an input video stream and creates an output directory with all image frames and the CSV file with 2D joint estimations.

```
python3 src/python/mediapipe/mediapipeHolistic2CSV.py --from shuffle.webm -o tester
```

After the conversion finishes you can process the generated "dataset" using MocapNET2CSV

```
./MocapNET2CSV --from tester-mpdata/2dJoints_mediapipe.csv --show 3
```








## Higher accuracy with more work deploying Caffe/OpenPose and using OpenPose JSON files
------------------------------------------------------------------

In order to get higher accuracy output compared to the live demo which is more performance oriented, you can use OpenPose and the 2D output JSON files produced by it. The convertOpenPoseJSONToCSV application can convert them to a BVH file. After downloading [OpenPose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) and building it you can use it to acquire 2D JSON body pose data by running :
Expand Down
2 changes: 2 additions & 0 deletions src/python/mediapipe/RHD.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/python3

import os
import sys
import cv2
Expand Down
1 change: 1 addition & 0 deletions src/python/mediapipe/handsWebcam.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/python3
import cv2
import mediapipe as mp
import time
Expand Down
58 changes: 58 additions & 0 deletions src/python/mediapipe/holisticPartNames.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,62 @@


def processPoseLandmarks(mnetPose2D,correctLabels,holisticPose):
itemNumber=0
if holisticPose is not None:
for item in holisticPose.landmark:
thisLandmarkName = correctLabels[itemNumber].lower()
if (thisLandmarkName!=''):
labelX = "2DX_"+thisLandmarkName
mnetPose2D[labelX]=1.0-item.x #Do Flip X
labelY = "2DY_"+thisLandmarkName
mnetPose2D[labelY]=item.y
labelV = "visible_"+thisLandmarkName
mnetPose2D[labelV]=item.visibility
#print("Joint ",thisLandmarkName,"(",itemNumber,") x=",item.x," y=",item.y," z=",item.z)
itemNumber = itemNumber +1
return mnetPose2D


def guessLandmarks(mnetPose2D):
if mnetPose2D is not None:
if ("2DX_rshoulder" in mnetPose2D) and ("2DY_rshoulder" in mnetPose2D) and ("visible_rshoulder" in mnetPose2D) and ("2DX_lshoulder" in mnetPose2D) and ("2DY_lshoulder" in mnetPose2D) and ("visible_lshoulder" in mnetPose2D) :
#---------------------------------------------
rX = float(mnetPose2D["2DX_rshoulder"])
rY = float(mnetPose2D["2DY_rshoulder"])
rV = float(mnetPose2D["visible_rshoulder"])
#---------------------------------------------
lX = float(mnetPose2D["2DX_lshoulder"])
lY = float(mnetPose2D["2DY_lshoulder"])
lV = float(mnetPose2D["visible_lshoulder"])
#---------------------------------------------
if (rV>0.0) and (lV>0.0):
mnetPose2D["2DX_neck"]=(rX+lX)/2
mnetPose2D["2DY_neck"]=(rY+lY)/2
mnetPose2D["visible_neck"]=(rV+lV)/2
#---------------------------------------------------

if ("2DX_rhip" in mnetPose2D) and ("2DY_rhip" in mnetPose2D) and ("visible_rhip" in mnetPose2D) and ("2DX_lhip" in mnetPose2D) and ("2DY_lhip" in mnetPose2D) and ("visible_lhip" in mnetPose2D) :
#---------------------------------------------
rX = float(mnetPose2D["2DX_rhip"])
rY = float(mnetPose2D["2DY_rhip"])
rV = float(mnetPose2D["visible_rhip"])
#---------------------------------------------
lX = float(mnetPose2D["2DX_lhip"])
lY = float(mnetPose2D["2DY_lhip"])
lV = float(mnetPose2D["visible_lhip"])
#---------------------------------------------
if (rV>0.0) and (lV>0.0):
mnetPose2D["2DX_hip"]=(rX+lX)/2
mnetPose2D["2DY_hip"]=(rY+lY)/2
mnetPose2D["visible_hip"]=(rV+lV)/2
#---------------------------------------------------
return mnetPose2D






#MocapNET list of expected inputs
#frameNumber,skeletonID,totalSkeletons,2DX_head,2DY_head,visible_head,2DX_neck,2DY_neck,visible_neck,2DX_rshoulder,2DY_rshoulder,visible_rshoulder,2DX_relbow,2DY_relbow,visible_relbow,2DX_rhand,2DY_rhand,visible_rhand,2DX_lshoulder,2DY_lshoulder,visible_lshoulder,2DX_lelbow,2DY_lelbow,visible_lelbow,2DX_lhand,2DY_lhand,visible_lhand,2DX_hip,2DY_hip,visible_hip,2DX_rhip,2DY_rhip,visible_rhip,2DX_rknee,2DY_rknee,visible_rknee,2DX_rfoot,2DY_rfoot,visible_rfoot,2DX_lhip,2DY_lhip,visible_lhip,2DX_lknee,2DY_lknee,visible_lknee,2DX_lfoot,2DY_lfoot,visible_lfoot,2DX_endsite_eye.r,2DY_endsite_eye.r,visible_endsite_eye.r,2DX_endsite_eye.l,2DY_endsite_eye.l,visible_endsite_eye.l,2DX_rear,2DY_rear,visible_rear,2DX_lear,2DY_lear,visible_lear,2DX_endsite_toe1-2.l,2DY_endsite_toe1-2.l,visible_endsite_toe1-2.l,2DX_endsite_toe5-3.l,2DY_endsite_toe5-3.l,visible_endsite_toe5-3.l,2DX_lheel,2DY_lheel,visible_lheel,2DX_endsite_toe1-2.r,2DY_endsite_toe1-2.r,visible_endsite_toe1-2.r,2DX_endsite_toe5-3.r,2DY_endsite_toe5-3.r,visible_endsite_toe5-3.r,2DX_rheel,2DY_rheel,visible_rheel,2DX_bkg,2DY_bkg,visible_bkg,

Expand Down
13 changes: 9 additions & 4 deletions src/python/mediapipe/holisticWebcam.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/python3

import cv2
import mediapipe as mp
import time
Expand Down Expand Up @@ -33,14 +35,17 @@
end = time.time()
# Time elapsed
seconds = end - start
#print ("Time taken : {0} seconds".format(seconds))
# Calculate frames per second
fps = 1 / seconds
print("{0} fps".format(fps))

print("\r Framerate : ",round(fps,2)," fps \r", end="", flush=True)

annotated_image = image.copy()
mp_drawing.draw_landmarks(annotated_image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS)
#Compensate for name mediapipe change..
try:
mp_drawing.draw_landmarks(annotated_image, results.face_landmarks , mp_holistic.FACEMESH_TESSELATION) #This used to be called FACE_CONNECTIONS
except:
mp_drawing.draw_landmarks(annotated_image, results.face_landmarks , mp_holistic.FACE_CONNECTIONS) #This used to be called FACE_CONNECTIONS

mp_drawing.draw_landmarks(annotated_image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
mp_drawing.draw_landmarks(annotated_image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
# Use mp_holistic.UPPER_BODY_POSE_CONNECTIONS for drawing below when
Expand Down
64 changes: 7 additions & 57 deletions src/python/mediapipe/mediapipeHolistic2CSV.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic


#I have added a seperate list with the joints
from holisticPartNames import getHolisticBodyNameList, getHolisticFaceNameList
from holisticPartNames import getHolisticBodyNameList, getHolisticFaceNameList, processPoseLandmarks, guessLandmarks
MEDIAPIPE_POSE_LANDMARK_NAMES=getHolisticBodyNameList()
MEDIAPIPE_FACE_LANDMARK_NAMES=getHolisticFaceNameList()

Expand Down Expand Up @@ -102,60 +101,6 @@ def drawListNumbers(image,lst):



def processPoseLandmarks(mnetPose2D,correctLabels,holisticPose):
itemNumber=0
if holisticPose is not None:
for item in holisticPose.landmark:
thisLandmarkName = correctLabels[itemNumber].lower()
if (thisLandmarkName!=''):
labelX = "2DX_"+thisLandmarkName
mnetPose2D[labelX]=1.0-item.x #Do Flip X
labelY = "2DY_"+thisLandmarkName
mnetPose2D[labelY]=item.y
labelV = "visible_"+thisLandmarkName
mnetPose2D[labelV]=item.visibility
#print("Joint ",thisLandmarkName,"(",itemNumber,") x=",item.x," y=",item.y," z=",item.z)
itemNumber = itemNumber +1
return mnetPose2D


def guessLandmarks(mnetPose2D):
if mnetPose2D is not None:
if ("2DX_rshoulder" in mnetPose2D) and ("2DY_rshoulder" in mnetPose2D) and ("visible_rshoulder" in mnetPose2D) and ("2DX_lshoulder" in mnetPose2D) and ("2DY_lshoulder" in mnetPose2D) and ("visible_lshoulder" in mnetPose2D) :
#---------------------------------------------
rX = float(mnetPose2D["2DX_rshoulder"])
rY = float(mnetPose2D["2DY_rshoulder"])
rV = float(mnetPose2D["visible_rshoulder"])
#---------------------------------------------
lX = float(mnetPose2D["2DX_lshoulder"])
lY = float(mnetPose2D["2DY_lshoulder"])
lV = float(mnetPose2D["visible_lshoulder"])
#---------------------------------------------
if (rV>0.0) and (lV>0.0):
mnetPose2D["2DX_neck"]=(rX+lX)/2
mnetPose2D["2DY_neck"]=(rY+lY)/2
mnetPose2D["visible_neck"]=(rV+lV)/2
#---------------------------------------------------

if ("2DX_rhip" in mnetPose2D) and ("2DY_rhip" in mnetPose2D) and ("visible_rhip" in mnetPose2D) and ("2DX_lhip" in mnetPose2D) and ("2DY_lhip" in mnetPose2D) and ("visible_lhip" in mnetPose2D) :
#---------------------------------------------
rX = float(mnetPose2D["2DX_rhip"])
rY = float(mnetPose2D["2DY_rhip"])
rV = float(mnetPose2D["visible_rhip"])
#---------------------------------------------
lX = float(mnetPose2D["2DX_lhip"])
lY = float(mnetPose2D["2DY_lhip"])
lV = float(mnetPose2D["visible_lhip"])
#---------------------------------------------
if (rV>0.0) and (lV>0.0):
mnetPose2D["2DX_hip"]=(rX+lX)/2
mnetPose2D["2DY_hip"]=(rY+lY)/2
mnetPose2D["visible_hip"]=(rV+lV)/2
#---------------------------------------------------
return mnetPose2D



def convertStreamToMocapNETCSV():
videoFilePath ="shuffle.webm"
outputDatasetPath="frames/shuffle.webm"
Expand Down Expand Up @@ -226,7 +171,12 @@ def convertStreamToMocapNETCSV():
print("\r Frame : ",frameNumber," | ",round(fps,2)," fps \r", end="", flush=True)

annotated_image = image.copy()
mp_drawing.draw_landmarks(annotated_image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS)
#Compensate for name mediapipe change..
try:
mp_drawing.draw_landmarks(annotated_image, results.face_landmarks , mp_holistic.FACEMESH_TESSELATION) #This used to be called FACE_CONNECTIONS
except:
mp_drawing.draw_landmarks(annotated_image, results.face_landmarks , mp_holistic.FACE_CONNECTIONS) #This used to be called FACE_CONNECTIONS

mp_drawing.draw_landmarks(annotated_image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
mp_drawing.draw_landmarks(annotated_image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
# Use mp_holistic.UPPER_BODY_POSE_CONNECTIONS for drawing below when upper_body_only is set to True.
Expand Down
10 changes: 9 additions & 1 deletion src/python/mediapipe/setup.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
python3 -m venv mp_env
source mp_env/bin/activate
pip install mediapipe opencv-python

#For RPI4
#sudo apt install libxcb-shm0 libcdio-paranoia-dev libsdl2-2.0-0 libxv1 libtheora0 libva-drm2 libva-x11-2 libvdpau1 libharfbuzz0b libbluray2 libatlas-base-dev libhdf5-103 libgtk-3-0 libdc1394-22 libopenexr23
#pip install mediapipe-rpi4 opencv-python

#For Regular x86_64
python3 -m pip install mediapipe opencv-python
python3 holisticWebcam.py


0 comments on commit 8b114f2

Please sign in to comment.