diff --git a/yoeo/detect.py b/yoeo/detect.py index 76163fc..7f99ac6 100755 --- a/yoeo/detect.py +++ b/yoeo/detect.py @@ -1,7 +1,6 @@ #! /usr/bin/env python3 -from __future__ import division - +from __future__ import division, annotations import os import argparse import tqdm @@ -13,6 +12,8 @@ from torch.utils.data import DataLoader from torch.autograd import Variable +from typing import Optional, List + from imgaug.augmentables.segmaps import SegmentationMapsOnImage from yoeo.models import load_model @@ -26,7 +27,8 @@ def detect_directory(model_path, weights_path, img_path, classes, output_path, - batch_size=8, img_size=416, n_cpu=8, conf_thres=0.5, nms_thres=0.5): + batch_size=8, img_size=416, n_cpu=8, conf_thres=0.5, nms_thres=0.5, + robot_class_ids: Optional[List[int]] = None): """Detects objects on all images in specified directory and saves output images with drawn detections. :param model_path: Path to model definition file (.cfg) @@ -49,6 +51,8 @@ def detect_directory(model_path, weights_path, img_path, classes, output_path, :type conf_thres: float, optional :param nms_thres: IOU threshold for non-maximum suppression, defaults to 0.5 :type nms_thres: float, optional + :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist. + :type robot_class_ids: List[int], optional """ dataloader = _create_data_loader(img_path, batch_size, img_size, n_cpu) model = load_model(model_path, weights_path) @@ -58,14 +62,16 @@ def detect_directory(model_path, weights_path, img_path, classes, output_path, dataloader, output_path, conf_thres, - nms_thres) + nms_thres, + robot_class_ids=robot_class_ids + ) _draw_and_save_output_images( img_detections, segmentations, imgs, img_size, output_path, classes) print(f"---- Detections were saved to: '{output_path}' ----") -def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5): +def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5, robot_class_ids: Optional[List[int]] = None): """Inferences one image with model. :param model: Model for inference @@ -78,6 +84,8 @@ def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5): :type conf_thres: float, optional :param nms_thres: IOU threshold for non-maximum suppression, defaults to 0.5 :type nms_thres: float, optional + :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist. + :type robot_class_ids: List[int], optional :return: Detections on image with each detection in the format: [x1, y1, x2, y2, confidence, class], Segmentation as 2d numpy array with the coresponding class id in each cell :rtype: nd.array, nd.array """ @@ -97,13 +105,13 @@ def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5): # Get detections with torch.no_grad(): detections, segmentations = model(input_img) - detections = non_max_suppression(detections, conf_thres, nms_thres) + detections = non_max_suppression(detections, conf_thres, nms_thres, robot_class_ids=robot_class_ids) detections = rescale_boxes(detections[0], img_size, image.shape[0:2]) segmentations = rescale_segmentation(segmentations, image.shape[0:2]) return detections.numpy(), segmentations.cpu().detach().numpy() -def detect(model, dataloader, output_path, conf_thres, nms_thres): +def detect(model, dataloader, output_path, conf_thres, nms_thres, robot_class_ids: Optional[List[int]] = None): """Inferences images with model. :param model: Model for inference @@ -116,6 +124,8 @@ def detect(model, dataloader, output_path, conf_thres, nms_thres): :type conf_thres: float, optional :param nms_thres: IOU threshold for non-maximum suppression, defaults to 0.5 :type nms_thres: float, optional + :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist. + :type robot_class_ids: List[int], optional :return: List of detections. The coordinates are given for the padded image that is provided by the dataloader. Use `utils.rescale_boxes` to transform them into the desired input image coordinate system before its transformed by the dataloader), List of input image paths @@ -139,7 +149,7 @@ def detect(model, dataloader, output_path, conf_thres, nms_thres): # Get detections with torch.no_grad(): detections, segmentations = model(input_imgs) - detections = non_max_suppression(detections, conf_thres, nms_thres) + detections = non_max_suppression(detections, conf_thres, nms_thres, robot_class_ids=robot_class_ids) # Store image and detections img_detections.extend(detections) @@ -300,12 +310,21 @@ def run(): parser.add_argument("--n_cpu", type=int, default=8, help="Number of cpu threads to use during batch generation") parser.add_argument("--conf_thres", type=float, default=0.5, help="Object confidence threshold") parser.add_argument("--nms_thres", type=float, default=0.4, help="IOU threshold for non-maximum suppression") + parser.add_argument("--multiple_robot_classes", action="store_true", + help="If multiple robot classes exist and nms shall be performed across all robot classes") args = parser.parse_args() print(f"Command line arguments: {args}") # Extract class names from file classes = load_classes(args.classes)['detection'] # List of class names + robot_class_ids = None + if args.multiple_robot_classes: + robot_class_ids = [] + for idx, c in enumerate(classes): + if "robot" in c: + robot_class_ids.append(idx) + detect_directory( args.model, args.weights, @@ -316,7 +335,9 @@ def run(): img_size=args.img_size, n_cpu=args.n_cpu, conf_thres=args.conf_thres, - nms_thres=args.nms_thres) + nms_thres=args.nms_thres, + robot_class_ids=robot_class_ids + ) if __name__ == '__main__': diff --git a/yoeo/test.py b/yoeo/test.py index 4106ec5..d266572 100755 --- a/yoeo/test.py +++ b/yoeo/test.py @@ -1,7 +1,7 @@ #! /usr/bin/env python3 -from __future__ import division -from typing import List +from __future__ import division, annotations +from typing import List, Optional import argparse import tqdm @@ -14,14 +14,16 @@ from torch.autograd import Variable from yoeo.models import load_model -from yoeo.utils.utils import load_classes, ap_per_class, get_batch_statistics, non_max_suppression, to_cpu, xywh2xyxy, print_environment_info, seg_iou +from yoeo.utils.utils import load_classes, ap_per_class, get_batch_statistics, non_max_suppression, to_cpu, xywh2xyxy, \ + print_environment_info, seg_iou from yoeo.utils.datasets import ListDataset from yoeo.utils.transforms import DEFAULT_TRANSFORMS from yoeo.utils.parse_config import parse_data_config def evaluate_model_file(model_path, weights_path, img_path, class_names, batch_size=8, img_size=416, - n_cpu=8, iou_thres=0.5, conf_thres=0.5, nms_thres=0.5, verbose=True): + n_cpu=8, iou_thres=0.5, conf_thres=0.5, nms_thres=0.5, verbose=True, + robot_class_ids: Optional[List[int]] = None): """Evaluate model on validation dataset. :param model_path: Path to model definition file (.cfg) @@ -46,6 +48,8 @@ def evaluate_model_file(model_path, weights_path, img_path, class_names, batch_s :type nms_thres: float, optional :param verbose: If True, prints stats of model, defaults to True :type verbose: bool, optional + :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist. + :type robot_class_ids: List[int], optional :return: Returns precision, recall, AP, f1, ap_class """ dataloader = _create_validation_data_loader( @@ -59,7 +63,8 @@ def evaluate_model_file(model_path, weights_path, img_path, class_names, batch_s iou_thres, conf_thres, nms_thres, - verbose) + verbose, + robot_class_ids=robot_class_ids) return metrics_output, seg_class_ious @@ -77,7 +82,6 @@ def print_eval_stats(metrics_output, seg_class_ious, class_names, verbose): else: print("---- mAP not measured (no detections found by model) ----") - # Print segmentation statistics if verbose: # Print IoU per segmentation class @@ -90,7 +94,8 @@ def print_eval_stats(metrics_output, seg_class_ious, class_names, verbose): print(f"----Average IoU {mean_seg_class_ious:.5f} ----") -def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, nms_thres, verbose): +def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, nms_thres, verbose, + robot_class_ids: Optional[List[int]] = None): """Evaluate model on validation dataset. :param model: Model to evaluate @@ -109,6 +114,8 @@ def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, n :type nms_thres: float :param verbose: If True, prints stats of model :type verbose: bool + :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist. + :type robot_class_ids: List[int], optional :return: Returns precision, recall, AP, f1, ap_class """ model.eval() # Set model to evaluation mode @@ -119,7 +126,7 @@ def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, n sample_metrics = [] # List of tuples (TP, confs, pred) seg_ious = [] import time - times=[] + times = [] for _, imgs, bb_targets, mask_targets in tqdm.tqdm(dataloader, desc="Validating"): # Extract labels labels += bb_targets[:, 1].tolist() @@ -133,7 +140,12 @@ def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, n t1 = time.time() yolo_outputs, segmentation_outputs = model(imgs) times.append(time.time() - t1) - yolo_outputs = non_max_suppression(yolo_outputs, conf_thres=conf_thres, iou_thres=nms_thres) + yolo_outputs = non_max_suppression( + yolo_outputs, + conf_thres=conf_thres, + iou_thres=nms_thres, + robot_class_ids=robot_class_ids + ) sample_metrics += get_batch_statistics(yolo_outputs, bb_targets, iou_threshold=iou_thres) @@ -143,7 +155,7 @@ def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, n print("---- No detections over whole validation set ----") return None - print(f"Times: Mean {1/np.array(times).mean()}fps | Std: {np.array(times).std()} ms") + print(f"Times: Mean {1 / np.array(times).mean()}fps | Std: {np.array(times).std()} ms") # Concatenate sample statistics true_positives, pred_scores, pred_labels = [ @@ -159,7 +171,7 @@ def seg_iou_mean_without_nan(seg_iou: List[float]) -> np.ndarray: :return: Segmentation IOUs without NaN """ seg_iou = np.asarray(seg_iou) - return seg_iou[~np.isnan(seg_iou)].mean() + return seg_iou[~np.isnan(seg_iou)].mean() seg_class_ious = [seg_iou_mean_without_nan(class_ious) for class_ious in list(zip(*seg_ious))] @@ -197,8 +209,10 @@ def _create_validation_data_loader(img_path, batch_size, img_size, n_cpu): def run(): print_environment_info() parser = argparse.ArgumentParser(description="Evaluate validation data.") - parser.add_argument("-m", "--model", type=str, default="config/yoeo.cfg", help="Path to model definition file (.cfg)") - parser.add_argument("-w", "--weights", type=str, default="weights/yoeo.pth", help="Path to weights or checkpoint file (.weights or .pth)") + parser.add_argument("-m", "--model", type=str, default="config/yoeo.cfg", + help="Path to model definition file (.cfg)") + parser.add_argument("-w", "--weights", type=str, default="weights/yoeo.pth", + help="Path to weights or checkpoint file (.weights or .pth)") parser.add_argument("-d", "--data", type=str, default="config/torso.data", help="Path to data config file (.data)") parser.add_argument("-b", "--batch_size", type=int, default=8, help="Size of each image batch") parser.add_argument("-v", "--verbose", action='store_true', help="Makes the validation more verbose") @@ -207,6 +221,9 @@ def run(): parser.add_argument("--iou_thres", type=float, default=0.5, help="IOU threshold required to qualify as detected") parser.add_argument("--conf_thres", type=float, default=0.01, help="Object confidence threshold") parser.add_argument("--nms_thres", type=float, default=0.4, help="IOU threshold for non-maximum suppression") + parser.add_argument("--multiple_robot_classes", action="store_true", + help="If multiple robot classes exist and nms shall be performed across all robot classes") + args = parser.parse_args() print(f"Command line arguments: {args}") @@ -216,6 +233,13 @@ def run(): valid_path = data_config["valid"] class_names = load_classes(data_config["names"]) # Detection and segmentation class names + robot_class_ids = None + if args.multiple_robot_classes: + robot_class_ids = [] + for idx, c in enumerate(class_names["detection"]): + if "robot" in c: + robot_class_ids.append(idx) + evaluate_model_file( args.model, args.weights, @@ -227,7 +251,9 @@ def run(): iou_thres=args.iou_thres, conf_thres=args.conf_thres, nms_thres=args.nms_thres, - verbose=True) + verbose=True, + robot_class_ids=robot_class_ids + ) if __name__ == "__main__": diff --git a/yoeo/train.py b/yoeo/train.py index d8afdbf..ef328cd 100755 --- a/yoeo/train.py +++ b/yoeo/train.py @@ -1,6 +1,6 @@ #! /usr/bin/env python3 -from __future__ import division +from __future__ import division, annotations import os import argparse @@ -13,6 +13,8 @@ import torch.optim as optim from torch.autograd import Variable +from typing import List, Optional + from yoeo.models import load_model from yoeo.utils.logger import Logger from yoeo.utils.utils import to_cpu, load_classes, print_environment_info, provide_determinism, worker_seed_set @@ -78,6 +80,8 @@ def run(): parser.add_argument("--nms_thres", type=float, default=0.5, help="Evaluation: IOU threshold for non-maximum suppression") parser.add_argument("--logdir", type=str, default="logs", help="Directory for training log files (e.g. for TensorBoard)") parser.add_argument("--seed", type=int, default=-1, help="Makes results reproducable. Set -1 to disable.") + parser.add_argument("--multiple_robot_classes", action="store_true", + help="If multiple robot classes exist and nms shall be performed across all robot classes") args = parser.parse_args() print(f"Command line arguments: {args}") @@ -95,6 +99,14 @@ def run(): train_path = data_config["train"] valid_path = data_config["valid"] class_names = load_classes(data_config["names"]) + + robot_class_ids = None + if args.multiple_robot_classes: + robot_class_ids = [] + for idx, c in enumerate(class_names["detection"]): + if "robot" in c: + robot_class_ids.append(idx) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # ############ @@ -249,7 +261,8 @@ def run(): iou_thres=args.iou_thres, conf_thres=args.conf_thres, nms_thres=args.nms_thres, - verbose=args.verbose + verbose=args.verbose, + robot_class_ids=robot_class_ids ) if metrics_output is not None: diff --git a/yoeo/utils/utils.py b/yoeo/utils/utils.py index c384d6b..a11bbe8 100644 --- a/yoeo/utils/utils.py +++ b/yoeo/utils/utils.py @@ -1,4 +1,4 @@ -from __future__ import division +from __future__ import division, annotations from typing import Tuple @@ -11,7 +11,7 @@ import numpy as np import subprocess import random -from typing import List +from typing import List, Optional import yaml @@ -418,7 +418,8 @@ def box_area(box): return inter / (area1[:, None] + area2 - inter) -def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None): +def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, + robot_class_ids: Optional[List[int]] = None): """Performs Non-Maximum Suppression (NMS) on inference results Returns: detections with shape: nx6 (x1, y1, x2, y2, conf, cls) @@ -436,6 +437,8 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non t = time.time() output = [torch.zeros((0, 6), device="cpu")] * prediction.shape[0] + if robot_class_ids: + robot_class_ids = torch.tensor(robot_class_ids, device=prediction.device, dtype=prediction.dtype) for xi, x in enumerate(prediction): # image index, image inference # Apply constraints @@ -473,7 +476,15 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non x = x[x[:, 4].argsort(descending=True)[:max_nms]] # Batched NMS - c = x[:, 5:6] * max_wh # classes + if robot_class_ids is None: + c = x[:, 5:6] * max_wh # classes + else: + # If multiple robot classes are present, all robot classes are treated as one class in order to perform + # nms across all classes and not per class. For this, all robot classes get the same offset. + c = torch.clone(x[:, 5:6]) + c[torch.isin(c, robot_class_ids)] = robot_class_ids[0] + c *= max_wh + # boxes (offset by class), scores boxes, scores = x[:, :4] + c, x[:, 4] i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS