diff --git a/yolo/model.py b/yolo/model.py new file mode 100644 index 0000000..ed74468 --- /dev/null +++ b/yolo/model.py @@ -0,0 +1,140 @@ +# ******************************************************************* +# +# Author : Thanh Nguyen, 2018 +# Email : sthanhng@gmail.com +# Github : https://github.com/sthanhng +# +# Face detection using the YOLOv3 algorithm +# +# Description : model.py +# The YOLOv3 model defined in Keras framework +# +# ******************************************************************* + +import tensorflow as tf + +from keras import backend as K + + +def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): + '''Convert final layer features to bounding box parameters''' + + num_anchors = len(anchors) + + # Reshape to batch, height, width, num_anchors, box_params. + anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) + + # height, width + grid_shape = K.shape(feats)[1:3] + grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), + [1, grid_shape[1], 1, 1]) + grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), + [grid_shape[0], 1, 1, 1]) + grid = K.concatenate([grid_x, grid_y]) + grid = K.cast(grid, K.dtype(feats)) + + feats = K.reshape( + feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) + + # Adjust preditions to each spatial grid point and anchor size. + box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], + K.dtype(feats)) + box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], + K.dtype(feats)) + box_confidence = K.sigmoid(feats[..., 4:5]) + box_class_probs = K.sigmoid(feats[..., 5:]) + + if calc_loss == True: + return grid, feats, box_xy, box_wh + return box_xy, box_wh, box_confidence, box_class_probs + + +def correct_boxes(box_xy, box_wh, input_shape, image_shape): + '''Get corrected boxes''' + + box_yx = box_xy[..., ::-1] + box_hw = box_wh[..., ::-1] + input_shape = K.cast(input_shape, K.dtype(box_yx)) + image_shape = K.cast(image_shape, K.dtype(box_yx)) + new_shape = K.round(image_shape * K.min(input_shape / image_shape)) + offset = (input_shape - new_shape) / 2. / input_shape + scale = input_shape / new_shape + box_yx = (box_yx - offset) * scale + box_hw *= scale + + box_mins = box_yx - (box_hw / 2.) + box_maxes = box_yx + (box_hw / 2.) + boxes = K.concatenate([ + box_mins[..., 0:1], # y_min + box_mins[..., 1:2], # x_min + box_maxes[..., 0:1], # y_max + box_maxes[..., 1:2] # x_max + ]) + + # Scale boxes back to original image shape. + boxes *= K.concatenate([image_shape, image_shape]) + return boxes + + +def boxes_and_scores(feats, anchors, num_classes, input_shape, + image_shape): + '''Process Convolutional layer output''' + + box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, + anchors, + num_classes, + input_shape) + boxes = correct_boxes(box_xy, box_wh, input_shape, image_shape) + boxes = K.reshape(boxes, [-1, 4]) + box_scores = box_confidence * box_class_probs + box_scores = K.reshape(box_scores, [-1, num_classes]) + return boxes, box_scores + + +def eval(outputs, anchors, num_classes, image_shape, + max_boxes=20, score_threshold=.6, iou_threshold=.5): + '''Evaluate the YOLO model on given input and return filtered boxes''' + + num_layers = len(outputs) + anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [ + [3, 4, 5], [1, 2, 3]] + input_shape = K.shape(outputs[0])[1:3] * 32 + boxes = [] + box_scores = [] + + for l in range(num_layers): + _boxes, _box_scores = boxes_and_scores(outputs[l], + anchors[anchor_mask[l]], + num_classes, input_shape, + image_shape) + boxes.append(_boxes) + box_scores.append(_box_scores) + + boxes = K.concatenate(boxes, axis=0) + box_scores = K.concatenate(box_scores, axis=0) + + mask = box_scores >= score_threshold + max_boxes_tensor = K.constant(max_boxes, dtype='int32') + boxes_ = [] + scores_ = [] + classes_ = [] + + for c in range(num_classes): + # TODO: use Keras backend instead of tf. + class_boxes = tf.boolean_mask(boxes, mask[:, c]) + class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) + nms_index = tf.image.non_max_suppression( + class_boxes, class_box_scores, max_boxes_tensor, + iou_threshold=iou_threshold) + class_boxes = K.gather(class_boxes, nms_index) + class_box_scores = K.gather(class_box_scores, nms_index) + classes = K.ones_like(class_box_scores, 'int32') * c + boxes_.append(class_boxes) + scores_.append(class_box_scores) + classes_.append(classes) + + boxes_ = K.concatenate(boxes_, axis=0) + scores_ = K.concatenate(scores_, axis=0) + classes_ = K.concatenate(classes_, axis=0) + + return boxes_, scores_, classes_