forked from sthanhng/yoloface
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request sthanhng#1 from sthanhng/yoloface-gpu
face detection using YOLOv3 with gpu
- Loading branch information
Showing
9 changed files
with
451 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,72 @@ | ||
# YOLOFace | ||
# Face detection using YOLOv3 | ||
|
||
Face detection using YOLOv3 | ||
# Deep learning based Face detection using the YOLOv3 algorithm | ||
|
||
|
||
## Getting started | ||
|
||
The YOLOv3 (You Only Look Once) is a state-of-the-art, real-time object detection algorithm. The published model recognizes 80 different objects in images and videos. For more details, you can refer to this [paper](https://pjreddie.com/media/files/papers/YOLOv3.pdf). | ||
|
||
## YOLOv3's architecture | ||
|
||
![Imgur](assets/yolo-architecture.png) | ||
|
||
Credit: [Ayoosh Kathuria](https://towardsdatascience.com/yolo-v3-object-detection-53fb7d3bfe6b) | ||
|
||
## OpenCV Deep Neural Networks (dnn module) | ||
|
||
OpenCV `dnn` module supports running inference on pre-trained deep learning models from popular frameworks such as TensorFlow, Torch, Darknet and Caffe. | ||
|
||
## Prerequisites | ||
|
||
* tensorflow | ||
* opencv-python | ||
* opencv-contrib-python | ||
* numpy | ||
|
||
Install the required packages by running the following command: | ||
|
||
```bash | ||
$ pip install -r requirements.txt | ||
``` | ||
|
||
**Note:** This repositoty works on Python 3.x. Using Python virtual environment is highly recommended. | ||
|
||
## Usage | ||
|
||
* Clone this repository | ||
```bash | ||
$ git clone https://github.com/sthanhng/yoloface | ||
``` | ||
|
||
* For face detection, you should download the pre-trained YOLOv3 weights file which trained on the [WIDER FACE: A Face Detection Benchmark](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/index.html) dataset from this [link](https://drive.google.com/file/d/1xYasjU52whXMLT5MtF7RCPQkV66993oR/view?usp=sharing) and place it in the `model-weights/` directory. | ||
|
||
* Run the following command: | ||
|
||
>**image input** | ||
```bash | ||
$ python yoloface.py --images samples/outside_000001.jpg --output-dir outputs/ | ||
``` | ||
|
||
>**video input** | ||
```bash | ||
$ python yoloface.py --video samples/subway.mp4 --output-dir outputs/ | ||
``` | ||
|
||
>**webcam** | ||
```bash | ||
$ python yoloface.py --src 1 --output-dir outputs/ | ||
``` | ||
|
||
## Sample outputs | ||
|
||
![Imgur](outputs/outside_000001_yoloface.jpg) | ||
|
||
![Imgur](outputs/meeting_11_304_yoloface.jpg) | ||
|
||
## License | ||
|
||
This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for more details. | ||
|
||
## References | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
# ******************************************************************* | ||
# | ||
# Author : Thanh Nguyen, 2018 | ||
# Email : [email protected] | ||
# Github : https://github.com/sthanhng | ||
# | ||
# Face detection using the YOLOv3 algorithm | ||
# | ||
# Description : YOLO.py | ||
# Contains methods of YOLO | ||
# | ||
# ******************************************************************* | ||
|
||
import os | ||
import colorsys | ||
import numpy as np | ||
|
||
from yolo.model import eval | ||
from yolo.utils import letterbox_image | ||
|
||
from keras import backend as K | ||
from keras.models import load_model | ||
from timeit import default_timer as timer | ||
from PIL import ImageDraw | ||
|
||
|
||
class YOLO(object): | ||
def __init__(self, args): | ||
self.args = args | ||
self.model_path = args.model | ||
self.classes_path = args.classes | ||
self.anchors_path = args.anchors | ||
self.class_names = self._get_class() | ||
self.anchors = self._get_anchors() | ||
self.sess = K.get_session() | ||
self.boxes, self.scores, self.classes = self._generate() | ||
self.model_image_size = args.img_size | ||
|
||
def _get_class(self): | ||
classes_path = os.path.expanduser(self.classes_path) | ||
with open(classes_path) as f: | ||
class_names = f.readlines() | ||
class_names = [c.strip() for c in class_names] | ||
print(class_names) | ||
return class_names | ||
|
||
def _get_anchors(self): | ||
anchors_path = os.path.expanduser(self.anchors_path) | ||
with open(anchors_path) as f: | ||
anchors = f.readline() | ||
anchors = [float(x) for x in anchors.split(',')] | ||
return np.array(anchors).reshape(-1, 2) | ||
|
||
def _generate(self): | ||
model_path = os.path.expanduser(self.model_path) | ||
assert model_path.endswith( | ||
'.h5'), 'Keras model or weights must be a .h5 file' | ||
|
||
# Load model, or construct model and load weights | ||
num_anchors = len(self.anchors) | ||
num_classes = len(self.class_names) | ||
try: | ||
self.yolo_model = load_model(model_path, compile=False) | ||
except: | ||
# make sure model, anchors and classes match | ||
self.yolo_model.load_weights(self.model_path) | ||
else: | ||
assert self.yolo_model.layers[-1].output_shape[-1] == \ | ||
num_anchors / len(self.yolo_model.output) * ( | ||
num_classes + 5), \ | ||
'Mismatch between model and given anchor and class sizes' | ||
|
||
print( | ||
'[i] ==> {} model, anchors, and classes loaded.'.format(model_path)) | ||
|
||
# Generate colors for drawing bounding boxes | ||
hsv_tuples = [(x / len(self.class_names), 1., 1.) | ||
for x in range(len(self.class_names))] | ||
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) | ||
self.colors = list( | ||
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), | ||
self.colors)) | ||
|
||
# Shuffle colors to decorrelate adjacent classes. | ||
np.random.seed(102) | ||
np.random.shuffle(self.colors) | ||
np.random.seed(None) | ||
|
||
# Generate output tensor targets for filtered bounding boxes. | ||
self.input_image_shape = K.placeholder(shape=(2,)) | ||
boxes, scores, classes = eval(self.yolo_model.output, self.anchors, | ||
len(self.class_names), | ||
self.input_image_shape, | ||
score_threshold=self.args.score, | ||
iou_threshold=self.args.iou) | ||
return boxes, scores, classes | ||
|
||
def detect_image(self, image): | ||
start_time = timer() | ||
|
||
if self.model_image_size != (None, None): | ||
assert self.model_image_size[ | ||
0] % 32 == 0, 'Multiples of 32 required' | ||
assert self.model_image_size[ | ||
1] % 32 == 0, 'Multiples of 32 required' | ||
boxed_image = letterbox_image(image, tuple( | ||
reversed(self.model_image_size))) | ||
else: | ||
new_image_size = (image.width - (image.width % 32), | ||
image.height - (image.height % 32)) | ||
boxed_image = letterbox_image(image, new_image_size) | ||
image_data = np.array(boxed_image, dtype='float32') | ||
|
||
print(image_data.shape) | ||
image_data /= 255. | ||
# Add batch dimension | ||
image_data = np.expand_dims(image_data, 0) | ||
|
||
out_boxes, out_scores, out_classes = self.sess.run( | ||
[self.boxes, self.scores, self.classes], | ||
feed_dict={ | ||
self.yolo_model.input: image_data, | ||
self.input_image_shape: [image.size[1], image.size[0]], | ||
K.learning_phase(): 0 | ||
}) | ||
|
||
print('[i] ==> Found {} face(s) for this image'.format(len(out_boxes))) | ||
thickness = (image.size[0] + image.size[1]) // 400 | ||
|
||
for i, c in reversed(list(enumerate(out_classes))): | ||
predicted_class = self.class_names[c] | ||
box = out_boxes[i] | ||
score = out_scores[i] | ||
|
||
text = '{} {:.2f}'.format(predicted_class, score) | ||
draw = ImageDraw.Draw(image) | ||
|
||
top, left, bottom, right = box | ||
top = max(0, np.floor(top + 0.5).astype('int32')) | ||
left = max(0, np.floor(left + 0.5).astype('int32')) | ||
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) | ||
right = min(image.size[0], np.floor(right + 0.5).astype('int32')) | ||
|
||
print(text, (left, top), (right, bottom)) | ||
|
||
for thk in range(thickness): | ||
draw.rectangle( | ||
[left + thk, top + thk, right - thk, bottom - thk], | ||
outline=self.colors[c]) | ||
del draw | ||
|
||
end_time = timer() | ||
print('[i] ==> Processing time: {:.2f}ms'.format((end_time - | ||
start_time) * 1000)) | ||
return image | ||
|
||
def close_session(self): | ||
self.sess.close() |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
face |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,7 @@ | ||
numpy | ||
tensorflow==1.8.0 | ||
opencv-python | ||
opencv-contrib-python | ||
opencv-contrib-python | ||
keras | ||
matplotlib | ||
pillow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
# ******************************************************************* | ||
# | ||
# Author : Thanh Nguyen, 2018 | ||
# Email : [email protected] | ||
# Github : https://github.com/sthanhng | ||
# | ||
# Face detection using the YOLOv3 algorithm | ||
# | ||
# Description : model.py | ||
# The YOLOv3 model defined in Keras framework | ||
# | ||
# ******************************************************************* | ||
|
||
import tensorflow as tf | ||
|
||
from keras import backend as K | ||
|
||
|
||
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): | ||
'''Convert final layer features to bounding box parameters''' | ||
|
||
num_anchors = len(anchors) | ||
|
||
# Reshape to batch, height, width, num_anchors, box_params. | ||
anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) | ||
|
||
# height, width | ||
grid_shape = K.shape(feats)[1:3] | ||
grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), | ||
[1, grid_shape[1], 1, 1]) | ||
grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), | ||
[grid_shape[0], 1, 1, 1]) | ||
grid = K.concatenate([grid_x, grid_y]) | ||
grid = K.cast(grid, K.dtype(feats)) | ||
|
||
feats = K.reshape( | ||
feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) | ||
|
||
# Adjust preditions to each spatial grid point and anchor size. | ||
box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], | ||
K.dtype(feats)) | ||
box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], | ||
K.dtype(feats)) | ||
box_confidence = K.sigmoid(feats[..., 4:5]) | ||
box_class_probs = K.sigmoid(feats[..., 5:]) | ||
|
||
if calc_loss == True: | ||
return grid, feats, box_xy, box_wh | ||
return box_xy, box_wh, box_confidence, box_class_probs | ||
|
||
|
||
def correct_boxes(box_xy, box_wh, input_shape, image_shape): | ||
'''Get corrected boxes''' | ||
|
||
box_yx = box_xy[..., ::-1] | ||
box_hw = box_wh[..., ::-1] | ||
input_shape = K.cast(input_shape, K.dtype(box_yx)) | ||
image_shape = K.cast(image_shape, K.dtype(box_yx)) | ||
new_shape = K.round(image_shape * K.min(input_shape / image_shape)) | ||
offset = (input_shape - new_shape) / 2. / input_shape | ||
scale = input_shape / new_shape | ||
box_yx = (box_yx - offset) * scale | ||
box_hw *= scale | ||
|
||
box_mins = box_yx - (box_hw / 2.) | ||
box_maxes = box_yx + (box_hw / 2.) | ||
boxes = K.concatenate([ | ||
box_mins[..., 0:1], # y_min | ||
box_mins[..., 1:2], # x_min | ||
box_maxes[..., 0:1], # y_max | ||
box_maxes[..., 1:2] # x_max | ||
]) | ||
|
||
# Scale boxes back to original image shape. | ||
boxes *= K.concatenate([image_shape, image_shape]) | ||
return boxes | ||
|
||
|
||
def boxes_and_scores(feats, anchors, num_classes, input_shape, | ||
image_shape): | ||
'''Process Convolutional layer output''' | ||
|
||
box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, | ||
anchors, | ||
num_classes, | ||
input_shape) | ||
boxes = correct_boxes(box_xy, box_wh, input_shape, image_shape) | ||
boxes = K.reshape(boxes, [-1, 4]) | ||
box_scores = box_confidence * box_class_probs | ||
box_scores = K.reshape(box_scores, [-1, num_classes]) | ||
return boxes, box_scores | ||
|
||
|
||
def eval(outputs, anchors, num_classes, image_shape, | ||
max_boxes=20, score_threshold=.6, iou_threshold=.5): | ||
'''Evaluate the YOLO model on given input and return filtered boxes''' | ||
|
||
num_layers = len(outputs) | ||
anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [ | ||
[3, 4, 5], [1, 2, 3]] | ||
input_shape = K.shape(outputs[0])[1:3] * 32 | ||
boxes = [] | ||
box_scores = [] | ||
|
||
for l in range(num_layers): | ||
_boxes, _box_scores = boxes_and_scores(outputs[l], | ||
anchors[anchor_mask[l]], | ||
num_classes, input_shape, | ||
image_shape) | ||
boxes.append(_boxes) | ||
box_scores.append(_box_scores) | ||
|
||
boxes = K.concatenate(boxes, axis=0) | ||
box_scores = K.concatenate(box_scores, axis=0) | ||
|
||
mask = box_scores >= score_threshold | ||
max_boxes_tensor = K.constant(max_boxes, dtype='int32') | ||
boxes_ = [] | ||
scores_ = [] | ||
classes_ = [] | ||
|
||
for c in range(num_classes): | ||
# TODO: use Keras backend instead of tf. | ||
class_boxes = tf.boolean_mask(boxes, mask[:, c]) | ||
class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) | ||
nms_index = tf.image.non_max_suppression( | ||
class_boxes, class_box_scores, max_boxes_tensor, | ||
iou_threshold=iou_threshold) | ||
class_boxes = K.gather(class_boxes, nms_index) | ||
class_box_scores = K.gather(class_box_scores, nms_index) | ||
classes = K.ones_like(class_box_scores, 'int32') * c | ||
boxes_.append(class_boxes) | ||
scores_.append(class_box_scores) | ||
classes_.append(classes) | ||
|
||
boxes_ = K.concatenate(boxes_, axis=0) | ||
scores_ = K.concatenate(scores_, axis=0) | ||
classes_ = K.concatenate(classes_, axis=0) | ||
|
||
return boxes_, scores_, classes_ |
Oops, something went wrong.