diff --git a/README.md b/README.md index 11a33d5..28e6f03 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,72 @@ # YOLOFace -# Face detection using YOLOv3 -Face detection using YOLOv3 +# Deep learning based Face detection using the YOLOv3 algorithm + + +## Getting started + +The YOLOv3 (You Only Look Once) is a state-of-the-art, real-time object detection algorithm. The published model recognizes 80 different objects in images and videos. For more details, you can refer to this [paper](https://pjreddie.com/media/files/papers/YOLOv3.pdf). + +## YOLOv3's architecture + +![Imgur](assets/yolo-architecture.png) + +Credit: [Ayoosh Kathuria](https://towardsdatascience.com/yolo-v3-object-detection-53fb7d3bfe6b) + +## OpenCV Deep Neural Networks (dnn module) + +OpenCV `dnn` module supports running inference on pre-trained deep learning models from popular frameworks such as TensorFlow, Torch, Darknet and Caffe. + +## Prerequisites + +* tensorflow +* opencv-python +* opencv-contrib-python +* numpy + +Install the required packages by running the following command: + +```bash +$ pip install -r requirements.txt +``` + +**Note:** This repositoty works on Python 3.x. Using Python virtual environment is highly recommended. + +## Usage + +* Clone this repository +```bash +$ git clone https://github.com/sthanhng/yoloface +``` + +* For face detection, you should download the pre-trained YOLOv3 weights file which trained on the [WIDER FACE: A Face Detection Benchmark](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/index.html) dataset from this [link](https://drive.google.com/file/d/1xYasjU52whXMLT5MtF7RCPQkV66993oR/view?usp=sharing) and place it in the `model-weights/` directory. + +* Run the following command: + +>**image input** +```bash +$ python yoloface.py --images samples/outside_000001.jpg --output-dir outputs/ +``` + +>**video input** +```bash +$ python yoloface.py --video samples/subway.mp4 --output-dir outputs/ +``` + +>**webcam** +```bash +$ python yoloface.py --src 1 --output-dir outputs/ +``` + +## Sample outputs + +![Imgur](outputs/outside_000001_yoloface.jpg) + +![Imgur](outputs/meeting_11_304_yoloface.jpg) + +## License + +This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for more details. + +## References + diff --git a/YOLO.py b/YOLO.py new file mode 100644 index 0000000..6579f75 --- /dev/null +++ b/YOLO.py @@ -0,0 +1,158 @@ +# ******************************************************************* +# +# Author : Thanh Nguyen, 2018 +# Email : sthanhng@gmail.com +# Github : https://github.com/sthanhng +# +# Face detection using the YOLOv3 algorithm +# +# Description : YOLO.py +# Contains methods of YOLO +# +# ******************************************************************* + +import os +import colorsys +import numpy as np + +from yolo.model import eval +from yolo.utils import letterbox_image + +from keras import backend as K +from keras.models import load_model +from timeit import default_timer as timer +from PIL import ImageDraw + + +class YOLO(object): + def __init__(self, args): + self.args = args + self.model_path = args.model + self.classes_path = args.classes + self.anchors_path = args.anchors + self.class_names = self._get_class() + self.anchors = self._get_anchors() + self.sess = K.get_session() + self.boxes, self.scores, self.classes = self._generate() + self.model_image_size = args.img_size + + def _get_class(self): + classes_path = os.path.expanduser(self.classes_path) + with open(classes_path) as f: + class_names = f.readlines() + class_names = [c.strip() for c in class_names] + print(class_names) + return class_names + + def _get_anchors(self): + anchors_path = os.path.expanduser(self.anchors_path) + with open(anchors_path) as f: + anchors = f.readline() + anchors = [float(x) for x in anchors.split(',')] + return np.array(anchors).reshape(-1, 2) + + def _generate(self): + model_path = os.path.expanduser(self.model_path) + assert model_path.endswith( + '.h5'), 'Keras model or weights must be a .h5 file' + + # Load model, or construct model and load weights + num_anchors = len(self.anchors) + num_classes = len(self.class_names) + try: + self.yolo_model = load_model(model_path, compile=False) + except: + # make sure model, anchors and classes match + self.yolo_model.load_weights(self.model_path) + else: + assert self.yolo_model.layers[-1].output_shape[-1] == \ + num_anchors / len(self.yolo_model.output) * ( + num_classes + 5), \ + 'Mismatch between model and given anchor and class sizes' + + print( + '[i] ==> {} model, anchors, and classes loaded.'.format(model_path)) + + # Generate colors for drawing bounding boxes + hsv_tuples = [(x / len(self.class_names), 1., 1.) + for x in range(len(self.class_names))] + self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) + self.colors = list( + map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), + self.colors)) + + # Shuffle colors to decorrelate adjacent classes. + np.random.seed(102) + np.random.shuffle(self.colors) + np.random.seed(None) + + # Generate output tensor targets for filtered bounding boxes. + self.input_image_shape = K.placeholder(shape=(2,)) + boxes, scores, classes = eval(self.yolo_model.output, self.anchors, + len(self.class_names), + self.input_image_shape, + score_threshold=self.args.score, + iou_threshold=self.args.iou) + return boxes, scores, classes + + def detect_image(self, image): + start_time = timer() + + if self.model_image_size != (None, None): + assert self.model_image_size[ + 0] % 32 == 0, 'Multiples of 32 required' + assert self.model_image_size[ + 1] % 32 == 0, 'Multiples of 32 required' + boxed_image = letterbox_image(image, tuple( + reversed(self.model_image_size))) + else: + new_image_size = (image.width - (image.width % 32), + image.height - (image.height % 32)) + boxed_image = letterbox_image(image, new_image_size) + image_data = np.array(boxed_image, dtype='float32') + + print(image_data.shape) + image_data /= 255. + # Add batch dimension + image_data = np.expand_dims(image_data, 0) + + out_boxes, out_scores, out_classes = self.sess.run( + [self.boxes, self.scores, self.classes], + feed_dict={ + self.yolo_model.input: image_data, + self.input_image_shape: [image.size[1], image.size[0]], + K.learning_phase(): 0 + }) + + print('[i] ==> Found {} face(s) for this image'.format(len(out_boxes))) + thickness = (image.size[0] + image.size[1]) // 400 + + for i, c in reversed(list(enumerate(out_classes))): + predicted_class = self.class_names[c] + box = out_boxes[i] + score = out_scores[i] + + text = '{} {:.2f}'.format(predicted_class, score) + draw = ImageDraw.Draw(image) + + top, left, bottom, right = box + top = max(0, np.floor(top + 0.5).astype('int32')) + left = max(0, np.floor(left + 0.5).astype('int32')) + bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) + right = min(image.size[0], np.floor(right + 0.5).astype('int32')) + + print(text, (left, top), (right, bottom)) + + for thk in range(thickness): + draw.rectangle( + [left + thk, top + thk, right - thk, bottom - thk], + outline=self.colors[c]) + del draw + + end_time = timer() + print('[i] ==> Processing time: {:.2f}ms'.format((end_time - + start_time) * 1000)) + return image + + def close_session(self): + self.sess.close() diff --git a/assets/yolo-architecture.png b/assets/yolo-architecture.png new file mode 100644 index 0000000..0e24a9a Binary files /dev/null and b/assets/yolo-architecture.png differ diff --git a/cfg/face_classes.txt b/cfg/face_classes.txt new file mode 100644 index 0000000..54429d9 --- /dev/null +++ b/cfg/face_classes.txt @@ -0,0 +1 @@ +face \ No newline at end of file diff --git a/cfg/yolo_anchors.txt b/cfg/yolo_anchors.txt new file mode 100644 index 0000000..9cdfb96 --- /dev/null +++ b/cfg/yolo_anchors.txt @@ -0,0 +1 @@ +10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 diff --git a/requirements.txt b/requirements.txt index 3c348a1..3b3eb6c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,7 @@ numpy tensorflow==1.8.0 opencv-python -opencv-contrib-python \ No newline at end of file +opencv-contrib-python +keras +matplotlib +pillow \ No newline at end of file diff --git a/yolo/model.py b/yolo/model.py new file mode 100644 index 0000000..ed74468 --- /dev/null +++ b/yolo/model.py @@ -0,0 +1,140 @@ +# ******************************************************************* +# +# Author : Thanh Nguyen, 2018 +# Email : sthanhng@gmail.com +# Github : https://github.com/sthanhng +# +# Face detection using the YOLOv3 algorithm +# +# Description : model.py +# The YOLOv3 model defined in Keras framework +# +# ******************************************************************* + +import tensorflow as tf + +from keras import backend as K + + +def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): + '''Convert final layer features to bounding box parameters''' + + num_anchors = len(anchors) + + # Reshape to batch, height, width, num_anchors, box_params. + anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) + + # height, width + grid_shape = K.shape(feats)[1:3] + grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), + [1, grid_shape[1], 1, 1]) + grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), + [grid_shape[0], 1, 1, 1]) + grid = K.concatenate([grid_x, grid_y]) + grid = K.cast(grid, K.dtype(feats)) + + feats = K.reshape( + feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) + + # Adjust preditions to each spatial grid point and anchor size. + box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], + K.dtype(feats)) + box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], + K.dtype(feats)) + box_confidence = K.sigmoid(feats[..., 4:5]) + box_class_probs = K.sigmoid(feats[..., 5:]) + + if calc_loss == True: + return grid, feats, box_xy, box_wh + return box_xy, box_wh, box_confidence, box_class_probs + + +def correct_boxes(box_xy, box_wh, input_shape, image_shape): + '''Get corrected boxes''' + + box_yx = box_xy[..., ::-1] + box_hw = box_wh[..., ::-1] + input_shape = K.cast(input_shape, K.dtype(box_yx)) + image_shape = K.cast(image_shape, K.dtype(box_yx)) + new_shape = K.round(image_shape * K.min(input_shape / image_shape)) + offset = (input_shape - new_shape) / 2. / input_shape + scale = input_shape / new_shape + box_yx = (box_yx - offset) * scale + box_hw *= scale + + box_mins = box_yx - (box_hw / 2.) + box_maxes = box_yx + (box_hw / 2.) + boxes = K.concatenate([ + box_mins[..., 0:1], # y_min + box_mins[..., 1:2], # x_min + box_maxes[..., 0:1], # y_max + box_maxes[..., 1:2] # x_max + ]) + + # Scale boxes back to original image shape. + boxes *= K.concatenate([image_shape, image_shape]) + return boxes + + +def boxes_and_scores(feats, anchors, num_classes, input_shape, + image_shape): + '''Process Convolutional layer output''' + + box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, + anchors, + num_classes, + input_shape) + boxes = correct_boxes(box_xy, box_wh, input_shape, image_shape) + boxes = K.reshape(boxes, [-1, 4]) + box_scores = box_confidence * box_class_probs + box_scores = K.reshape(box_scores, [-1, num_classes]) + return boxes, box_scores + + +def eval(outputs, anchors, num_classes, image_shape, + max_boxes=20, score_threshold=.6, iou_threshold=.5): + '''Evaluate the YOLO model on given input and return filtered boxes''' + + num_layers = len(outputs) + anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [ + [3, 4, 5], [1, 2, 3]] + input_shape = K.shape(outputs[0])[1:3] * 32 + boxes = [] + box_scores = [] + + for l in range(num_layers): + _boxes, _box_scores = boxes_and_scores(outputs[l], + anchors[anchor_mask[l]], + num_classes, input_shape, + image_shape) + boxes.append(_boxes) + box_scores.append(_box_scores) + + boxes = K.concatenate(boxes, axis=0) + box_scores = K.concatenate(box_scores, axis=0) + + mask = box_scores >= score_threshold + max_boxes_tensor = K.constant(max_boxes, dtype='int32') + boxes_ = [] + scores_ = [] + classes_ = [] + + for c in range(num_classes): + # TODO: use Keras backend instead of tf. + class_boxes = tf.boolean_mask(boxes, mask[:, c]) + class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) + nms_index = tf.image.non_max_suppression( + class_boxes, class_box_scores, max_boxes_tensor, + iou_threshold=iou_threshold) + class_boxes = K.gather(class_boxes, nms_index) + class_box_scores = K.gather(class_box_scores, nms_index) + classes = K.ones_like(class_box_scores, 'int32') * c + boxes_.append(class_boxes) + scores_.append(class_box_scores) + classes_.append(classes) + + boxes_ = K.concatenate(boxes_, axis=0) + scores_ = K.concatenate(scores_, axis=0) + classes_ = K.concatenate(classes_, axis=0) + + return boxes_, scores_, classes_ diff --git a/yolo/utils.py b/yolo/utils.py new file mode 100644 index 0000000..0915d17 --- /dev/null +++ b/yolo/utils.py @@ -0,0 +1,16 @@ +from PIL import Image + + +def letterbox_image(image, size): + '''Resize image with unchanged aspect ratio using padding''' + + img_width, img_height = image.size + w, h = size + scale = min(w / img_width, h / img_height) + nw = int(img_width * scale) + nh = int(img_height * scale) + + image = image.resize((nw, nh), Image.BICUBIC) + new_image = Image.new('RGB', size, (128, 128, 128)) + new_image.paste(image, ((w - nw) // 2, (h - nh) // 2)) + return new_image diff --git a/yoloface_gpu.py b/yoloface_gpu.py new file mode 100644 index 0000000..5ad5aef --- /dev/null +++ b/yoloface_gpu.py @@ -0,0 +1,61 @@ +import argparse + +from PIL import Image +from YOLO import YOLO + + +##################################################################### +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--model', type=str, default='model-weights/YOLO_Face.h5', + help='path to model weights file') + parser.add_argument('--anchors', type=str, default='cfg/yolo_anchors.txt', + help='path to anchor definitions') + parser.add_argument('--classes', type=str, default='cfg/face_classes.txt', + help='path to class definitions') + parser.add_argument('--score', type=float, default=0.5, + help='the score threshold') + parser.add_argument('--iou', type=float, default=0.45, + help='the iou threshold') + parser.add_argument('--img-size', type=list, action='store', + default=(416, 416), help='input image size') + parser.add_argument('--image', default=False, action="store_true", + help='image detection mode') + parser.add_argument('--output', type=str, + default='', help='image/video output path') + args = parser.parse_args() + return args + + +def detect_img(yolo): + while True: + img = input('[i] ==> Input image filename: ') + try: + image = Image.open(img) + except: + print('[!] ==> Open Error! Try again!') + continue + else: + res_image = yolo.detect_image(image) + res_image.show() + + yolo.close_session() + + +def _main(): + # Get the arguments + args = get_args() + + if args.image: + # Image detection mode + print('[i] ==> Image detection mode\n') + detect_img(YOLO(args)) + else: + print('[i] ==> Video detection mode\n') + # Call the detect_video method here + + print('Well done!!!') + + +if __name__ == "__main__": + _main()