Merge pull request sthanhng#1 from sthanhng/yoloface-gpu

face detection using YOLOv3 with gpu
glMa7 · Oct 17, 2018 · 130ce16 · 130ce16
2 parents 4867558 + 4aa74e9
commit 130ce16
Show file tree

Hide file tree

Showing 9 changed files with 451 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -1,4 +1,72 @@
 # YOLOFace
-# Face detection using YOLOv3
 
-Face detection using YOLOv3
+# Deep learning based Face detection using the YOLOv3 algorithm
+
+
+## Getting started
+
+The YOLOv3 (You Only Look Once) is a state-of-the-art, real-time object detection algorithm. The published model recognizes 80 different objects in images and videos. For more details, you can refer to this [paper](https://pjreddie.com/media/files/papers/YOLOv3.pdf).
+
+## YOLOv3's architecture
+
+![Imgur](assets/yolo-architecture.png)
+
+Credit: [Ayoosh Kathuria](https://towardsdatascience.com/yolo-v3-object-detection-53fb7d3bfe6b)
+
+## OpenCV Deep Neural Networks (dnn module)
+
+OpenCV `dnn` module supports running inference on pre-trained deep learning models from popular frameworks such as TensorFlow, Torch, Darknet and Caffe.
+
+## Prerequisites
+
+* tensorflow
+* opencv-python
+* opencv-contrib-python
+* numpy
+
+Install the required packages by running the following command:
+
+```bash
+$ pip install -r requirements.txt
+```
+
+**Note:** This repositoty works on Python 3.x. Using Python virtual environment is highly recommended.
+
+## Usage
+
+* Clone this repository
+```bash
+$ git clone https://github.com/sthanhng/yoloface
+```
+
+* For face detection, you should download the pre-trained YOLOv3 weights file which trained on the [WIDER FACE: A Face Detection Benchmark](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/index.html) dataset from this [link](https://drive.google.com/file/d/1xYasjU52whXMLT5MtF7RCPQkV66993oR/view?usp=sharing) and place it in the `model-weights/` directory.
+
+* Run the following command:
+
+>**image input**
+```bash
+$ python yoloface.py --images samples/outside_000001.jpg --output-dir outputs/
+```
+
+>**video input**
+```bash
+$ python yoloface.py --video samples/subway.mp4 --output-dir outputs/
+```
+
+>**webcam**
+```bash
+$ python yoloface.py --src 1 --output-dir outputs/
+```
+
+## Sample outputs
+
+![Imgur](outputs/outside_000001_yoloface.jpg)
+
+![Imgur](outputs/meeting_11_304_yoloface.jpg)
+
+## License
+
+This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for more details.
+
+## References
+
diff --git a/YOLO.py b/YOLO.py
@@ -0,0 +1,158 @@
+# *******************************************************************
+#
+# Author : Thanh Nguyen, 2018
+# Email  : [email protected]
+# Github : https://github.com/sthanhng
+#
+# Face detection using the YOLOv3 algorithm
+#
+# Description : YOLO.py
+# Contains methods of YOLO
+#
+# *******************************************************************
+
+import os
+import colorsys
+import numpy as np
+
+from yolo.model import eval
+from yolo.utils import letterbox_image
+
+from keras import backend as K
+from keras.models import load_model
+from timeit import default_timer as timer
+from PIL import ImageDraw
+
+
+class YOLO(object):
+    def __init__(self, args):
+        self.args = args
+        self.model_path = args.model
+        self.classes_path = args.classes
+        self.anchors_path = args.anchors
+        self.class_names = self._get_class()
+        self.anchors = self._get_anchors()
+        self.sess = K.get_session()
+        self.boxes, self.scores, self.classes = self._generate()
+        self.model_image_size = args.img_size
+
+    def _get_class(self):
+        classes_path = os.path.expanduser(self.classes_path)
+        with open(classes_path) as f:
+            class_names = f.readlines()
+        class_names = [c.strip() for c in class_names]
+        print(class_names)
+        return class_names
+
+    def _get_anchors(self):
+        anchors_path = os.path.expanduser(self.anchors_path)
+        with open(anchors_path) as f:
+            anchors = f.readline()
+        anchors = [float(x) for x in anchors.split(',')]
+        return np.array(anchors).reshape(-1, 2)
+
+    def _generate(self):
+        model_path = os.path.expanduser(self.model_path)
+        assert model_path.endswith(
+            '.h5'), 'Keras model or weights must be a .h5 file'
+
+        # Load model, or construct model and load weights
+        num_anchors = len(self.anchors)
+        num_classes = len(self.class_names)
+        try:
+            self.yolo_model = load_model(model_path, compile=False)
+        except:
+            # make sure model, anchors and classes match
+            self.yolo_model.load_weights(self.model_path)
+        else:
+            assert self.yolo_model.layers[-1].output_shape[-1] == \
+                   num_anchors / len(self.yolo_model.output) * (
+                           num_classes + 5), \
+                'Mismatch between model and given anchor and class sizes'
+
+        print(
+            '[i] ==> {} model, anchors, and classes loaded.'.format(model_path))
+
+        # Generate colors for drawing bounding boxes
+        hsv_tuples = [(x / len(self.class_names), 1., 1.)
+                      for x in range(len(self.class_names))]
+        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
+        self.colors = list(
+            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
+                self.colors))
+
+        # Shuffle colors to decorrelate adjacent classes.
+        np.random.seed(102)
+        np.random.shuffle(self.colors)
+        np.random.seed(None)
+
+        # Generate output tensor targets for filtered bounding boxes.
+        self.input_image_shape = K.placeholder(shape=(2,))
+        boxes, scores, classes = eval(self.yolo_model.output, self.anchors,
+                                           len(self.class_names),
+                                           self.input_image_shape,
+                                           score_threshold=self.args.score,
+                                           iou_threshold=self.args.iou)
+        return boxes, scores, classes
+
+    def detect_image(self, image):
+        start_time = timer()
+
+        if self.model_image_size != (None, None):
+            assert self.model_image_size[
+                       0] % 32 == 0, 'Multiples of 32 required'
+            assert self.model_image_size[
+                       1] % 32 == 0, 'Multiples of 32 required'
+            boxed_image = letterbox_image(image, tuple(
+                reversed(self.model_image_size)))
+        else:
+            new_image_size = (image.width - (image.width % 32),
+                              image.height - (image.height % 32))
+            boxed_image = letterbox_image(image, new_image_size)
+        image_data = np.array(boxed_image, dtype='float32')
+
+        print(image_data.shape)
+        image_data /= 255.
+        # Add batch dimension
+        image_data = np.expand_dims(image_data, 0)
+
+        out_boxes, out_scores, out_classes = self.sess.run(
+            [self.boxes, self.scores, self.classes],
+            feed_dict={
+                self.yolo_model.input: image_data,
+                self.input_image_shape: [image.size[1], image.size[0]],
+                K.learning_phase(): 0
+            })
+
+        print('[i] ==> Found {} face(s) for this image'.format(len(out_boxes)))
+        thickness = (image.size[0] + image.size[1]) // 400
+
+        for i, c in reversed(list(enumerate(out_classes))):
+            predicted_class = self.class_names[c]
+            box = out_boxes[i]
+            score = out_scores[i]
+
+            text = '{} {:.2f}'.format(predicted_class, score)
+            draw = ImageDraw.Draw(image)
+
+            top, left, bottom, right = box
+            top = max(0, np.floor(top + 0.5).astype('int32'))
+            left = max(0, np.floor(left + 0.5).astype('int32'))
+            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
+            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
+
+            print(text, (left, top), (right, bottom))
+
+            for thk in range(thickness):
+                draw.rectangle(
+                    [left + thk, top + thk, right - thk, bottom - thk],
+                    outline=self.colors[c])
+            del draw
+
+        end_time = timer()
+        print('[i] ==> Processing time: {:.2f}ms'.format((end_time -
+                                                          start_time) * 1000))
+        return image
+
+    def close_session(self):
+        self.sess.close()
diff --git a/assets/yolo-architecture.png b/assets/yolo-architecture.png
diff --git a/cfg/face_classes.txt b/cfg/face_classes.txt
@@ -0,0 +1 @@
+face
diff --git a/cfg/yolo_anchors.txt b/cfg/yolo_anchors.txt
@@ -0,0 +1 @@
+10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,7 @@
 numpy
 tensorflow==1.8.0
 opencv-python
-opencv-contrib-python
+opencv-contrib-python
+keras
+matplotlib
+pillow
diff --git a/yolo/model.py b/yolo/model.py
@@ -0,0 +1,140 @@
+# *******************************************************************
+#
+# Author : Thanh Nguyen, 2018
+# Email  : [email protected]
+# Github : https://github.com/sthanhng
+#
+# Face detection using the YOLOv3 algorithm
+#
+# Description : model.py
+# The YOLOv3 model defined in Keras framework
+#
+# *******************************************************************
+
+import tensorflow as tf
+
+from keras import backend as K
+
+
+def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
+    '''Convert final layer features to bounding box parameters'''
+
+    num_anchors = len(anchors)
+
+    # Reshape to batch, height, width, num_anchors, box_params.
+    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
+
+    # height, width
+    grid_shape = K.shape(feats)[1:3]
+    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
+                    [1, grid_shape[1], 1, 1])
+    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
+                    [grid_shape[0], 1, 1, 1])
+    grid = K.concatenate([grid_x, grid_y])
+    grid = K.cast(grid, K.dtype(feats))
+
+    feats = K.reshape(
+        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
+
+    # Adjust preditions to each spatial grid point and anchor size.
+    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1],
+                                                         K.dtype(feats))
+    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1],
+                                                              K.dtype(feats))
+    box_confidence = K.sigmoid(feats[..., 4:5])
+    box_class_probs = K.sigmoid(feats[..., 5:])
+
+    if calc_loss == True:
+        return grid, feats, box_xy, box_wh
+    return box_xy, box_wh, box_confidence, box_class_probs
+
+
+def correct_boxes(box_xy, box_wh, input_shape, image_shape):
+    '''Get corrected boxes'''
+
+    box_yx = box_xy[..., ::-1]
+    box_hw = box_wh[..., ::-1]
+    input_shape = K.cast(input_shape, K.dtype(box_yx))
+    image_shape = K.cast(image_shape, K.dtype(box_yx))
+    new_shape = K.round(image_shape * K.min(input_shape / image_shape))
+    offset = (input_shape - new_shape) / 2. / input_shape
+    scale = input_shape / new_shape
+    box_yx = (box_yx - offset) * scale
+    box_hw *= scale
+
+    box_mins = box_yx - (box_hw / 2.)
+    box_maxes = box_yx + (box_hw / 2.)
+    boxes = K.concatenate([
+        box_mins[..., 0:1],  # y_min
+        box_mins[..., 1:2],  # x_min
+        box_maxes[..., 0:1],  # y_max
+        box_maxes[..., 1:2]  # x_max
+    ])
+
+    # Scale boxes back to original image shape.
+    boxes *= K.concatenate([image_shape, image_shape])
+    return boxes
+
+
+def boxes_and_scores(feats, anchors, num_classes, input_shape,
+                     image_shape):
+    '''Process Convolutional layer output'''
+
+    box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
+                                                                anchors,
+                                                                num_classes,
+                                                                input_shape)
+    boxes = correct_boxes(box_xy, box_wh, input_shape, image_shape)
+    boxes = K.reshape(boxes, [-1, 4])
+    box_scores = box_confidence * box_class_probs
+    box_scores = K.reshape(box_scores, [-1, num_classes])
+    return boxes, box_scores
+
+
+def eval(outputs, anchors, num_classes, image_shape,
+         max_boxes=20, score_threshold=.6, iou_threshold=.5):
+    '''Evaluate the YOLO model on given input and return filtered boxes'''
+
+    num_layers = len(outputs)
+    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [
+        [3, 4, 5], [1, 2, 3]]
+    input_shape = K.shape(outputs[0])[1:3] * 32
+    boxes = []
+    box_scores = []
+
+    for l in range(num_layers):
+        _boxes, _box_scores = boxes_and_scores(outputs[l],
+                                               anchors[anchor_mask[l]],
+                                               num_classes, input_shape,
+                                               image_shape)
+        boxes.append(_boxes)
+        box_scores.append(_box_scores)
+
+    boxes = K.concatenate(boxes, axis=0)
+    box_scores = K.concatenate(box_scores, axis=0)
+
+    mask = box_scores >= score_threshold
+    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
+    boxes_ = []
+    scores_ = []
+    classes_ = []
+
+    for c in range(num_classes):
+        # TODO: use Keras backend instead of tf.
+        class_boxes = tf.boolean_mask(boxes, mask[:, c])
+        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
+        nms_index = tf.image.non_max_suppression(
+            class_boxes, class_box_scores, max_boxes_tensor,
+            iou_threshold=iou_threshold)
+        class_boxes = K.gather(class_boxes, nms_index)
+        class_box_scores = K.gather(class_box_scores, nms_index)
+        classes = K.ones_like(class_box_scores, 'int32') * c
+        boxes_.append(class_boxes)
+        scores_.append(class_box_scores)
+        classes_.append(classes)
+
+    boxes_ = K.concatenate(boxes_, axis=0)
+    scores_ = K.concatenate(scores_, axis=0)
+    classes_ = K.concatenate(classes_, axis=0)
+
+    return boxes_, scores_, classes_
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326