-
Notifications
You must be signed in to change notification settings - Fork 126
/
main.py
163 lines (145 loc) · 5.94 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
'''
Main program
@Author: David Vu
To execute simply run:
main.py
To input new user:
main.py --mode "input"
'''
import cv2
from align_custom import AlignCustom
from face_feature import FaceFeature
from mtcnn_detect import MTCNNDetect
from tf_graph import FaceRecGraph
import argparse
import sys
import json
import time
import numpy as np
TIMEOUT = 10 #10 seconds
def main(args):
mode = args.mode
if(mode == "camera"):
camera_recog()
elif mode == "input":
create_manual_data();
else:
raise ValueError("Unimplemented mode")
'''
Description:
Images from Video Capture -> detect faces' regions -> crop those faces and align them
-> each cropped face is categorized in 3 types: Center, Left, Right
-> Extract 128D vectors( face features)
-> Search for matching subjects in the dataset based on the types of face positions.
-> The preexisitng face 128D vector with the shortest distance to the 128D vector of the face on screen is most likely a match
(Distance threshold is 0.6, percentage threshold is 70%)
'''
def camera_recog():
print("[INFO] camera sensor warming up...")
vs = cv2.VideoCapture(0); #get input from webcam
detect_time = time.time()
while True:
_,frame = vs.read();
#u can certainly add a roi here but for the sake of a demo i'll just leave it as simple as this
rects, landmarks = face_detect.detect_face(frame,80);#min face size is set to 80x80
aligns = []
positions = []
for (i, rect) in enumerate(rects):
aligned_face, face_pos = aligner.align(160,frame,landmarks[:,i])
if len(aligned_face) == 160 and len(aligned_face[0]) == 160:
aligns.append(aligned_face)
positions.append(face_pos)
else:
print("Align face failed") #log
if(len(aligns) > 0):
features_arr = extract_feature.get_features(aligns)
recog_data = findPeople(features_arr,positions)
for (i,rect) in enumerate(rects):
cv2.rectangle(frame,(rect[0],rect[1]),(rect[2],rect[3]),(255,0,0)) #draw bounding box for the face
cv2.putText(frame,recog_data[i][0]+" - "+str(recog_data[i][1])+"%",(rect[0],rect[1]),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),1,cv2.LINE_AA)
cv2.imshow("Frame",frame)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
'''
facerec_128D.txt Data Structure:
{
"Person ID": {
"Center": [[128D vector]],
"Left": [[128D vector]],
"Right": [[128D Vector]]
}
}
This function basically does a simple linear search for
^the 128D vector with the min distance to the 128D vector of the face on screen
'''
def findPeople(features_arr, positions, thres = 0.6, percent_thres = 70):
'''
:param features_arr: a list of 128d Features of all faces on screen
:param positions: a list of face position types of all faces on screen
:param thres: distance threshold
:return: person name and percentage
'''
f = open('./facerec_128D.txt','r')
data_set = json.loads(f.read());
returnRes = [];
for (i,features_128D) in enumerate(features_arr):
result = "Unknown";
smallest = sys.maxsize
for person in data_set.keys():
person_data = data_set[person][positions[i]];
for data in person_data:
distance = np.sqrt(np.sum(np.square(data-features_128D)))
if(distance < smallest):
smallest = distance;
result = person;
percentage = min(100, 100 * thres / smallest)
if percentage <= percent_thres :
result = "Unknown"
returnRes.append((result,percentage))
return returnRes
'''
Description:
User input his/her name or ID -> Images from Video Capture -> detect the face -> crop the face and align it
-> face is then categorized in 3 types: Center, Left, Right
-> Extract 128D vectors( face features)
-> Append each newly extracted face 128D vector to its corresponding position type (Center, Left, Right)
-> Press Q to stop capturing
-> Find the center ( the mean) of those 128D vectors in each category. ( np.mean(...) )
-> Save
'''
def create_manual_data():
vs = cv2.VideoCapture(0); #get input from webcam
print("Please input new user ID:")
new_name = input(); #ez python input()
f = open('./facerec_128D.txt','r');
data_set = json.loads(f.read());
person_imgs = {"Left" : [], "Right": [], "Center": []};
person_features = {"Left" : [], "Right": [], "Center": []};
print("Please start turning slowly. Press 'q' to save and add this new user to the dataset");
while True:
_, frame = vs.read();
rects, landmarks = face_detect.detect_face(frame, 80); # min face size is set to 80x80
for (i, rect) in enumerate(rects):
aligned_frame, pos = aligner.align(160,frame,landmarks[:,i]);
if len(aligned_frame) == 160 and len(aligned_frame[0]) == 160:
person_imgs[pos].append(aligned_frame)
cv2.imshow("Captured face", aligned_frame)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
for pos in person_imgs: #there r some exceptions here, but I'll just leave it as this to keep it simple
person_features[pos] = [np.mean(extract_feature.get_features(person_imgs[pos]),axis=0).tolist()]
data_set[new_name] = person_features;
f = open('./facerec_128D.txt', 'w');
f.write(json.dumps(data_set))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--mode", type=str, help="Run camera recognition", default="camera")
args = parser.parse_args(sys.argv[1:]);
FRGraph = FaceRecGraph();
MTCNNGraph = FaceRecGraph();
aligner = AlignCustom();
extract_feature = FaceFeature(FRGraph)
face_detect = MTCNNDetect(MTCNNGraph, scale_factor=2); #scale_factor, rescales image for faster detection
main(args);