Merge pull request #95 from bit-bots/feature/nms_for_multiple_robot_c…

…lasses Feature/nms for multiple robot classes
bit-bots · Apr 25, 2023 · 1fe580b · 1fe580b
2 parents 3261443 + eb6bd85
commit 1fe580b
Show file tree

Hide file tree

Showing 4 changed files with 100 additions and 29 deletions.
diff --git a/yoeo/detect.py b/yoeo/detect.py
@@ -1,7 +1,6 @@
 #! /usr/bin/env python3
 
-from __future__ import division
-
+from __future__ import division, annotations
 import os
 import argparse
 import tqdm
@@ -13,6 +12,8 @@
 from torch.utils.data import DataLoader
 from torch.autograd import Variable
 
+from typing import Optional, List
+
 from imgaug.augmentables.segmaps import SegmentationMapsOnImage
 
 from yoeo.models import load_model
@@ -26,7 +27,8 @@
 
 
 def detect_directory(model_path, weights_path, img_path, classes, output_path,
-                     batch_size=8, img_size=416, n_cpu=8, conf_thres=0.5, nms_thres=0.5):
+                     batch_size=8, img_size=416, n_cpu=8, conf_thres=0.5, nms_thres=0.5,
+                     robot_class_ids: Optional[List[int]] = None):
     """Detects objects on all images in specified directory and saves output images with drawn detections.
 
     :param model_path: Path to model definition file (.cfg)
@@ -49,6 +51,8 @@ def detect_directory(model_path, weights_path, img_path, classes, output_path,
     :type conf_thres: float, optional
     :param nms_thres: IOU threshold for non-maximum suppression, defaults to 0.5
     :type nms_thres: float, optional
+    :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist.
+    :type robot_class_ids: List[int], optional
     """
     dataloader = _create_data_loader(img_path, batch_size, img_size, n_cpu)
     model = load_model(model_path, weights_path)
@@ -58,14 +62,16 @@ def detect_directory(model_path, weights_path, img_path, classes, output_path,
         dataloader,
         output_path,
         conf_thres,
-        nms_thres)
+        nms_thres,
+        robot_class_ids=robot_class_ids
+    )
     _draw_and_save_output_images(
         img_detections, segmentations, imgs, img_size, output_path, classes)
 
     print(f"---- Detections were saved to: '{output_path}' ----")
 
 
-def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5):
+def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5, robot_class_ids: Optional[List[int]] = None):
     """Inferences one image with model.
 
     :param model: Model for inference
@@ -78,6 +84,8 @@ def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5):
     :type conf_thres: float, optional
     :param nms_thres: IOU threshold for non-maximum suppression, defaults to 0.5
     :type nms_thres: float, optional
+    :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist.
+    :type robot_class_ids: List[int], optional
     :return: Detections on image with each detection in the format: [x1, y1, x2, y2, confidence, class], Segmentation as 2d numpy array with the coresponding class id in each cell
     :rtype: nd.array, nd.array
     """
@@ -97,13 +105,13 @@ def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5):
     # Get detections
     with torch.no_grad():
         detections, segmentations = model(input_img)
-        detections = non_max_suppression(detections, conf_thres, nms_thres)
+        detections = non_max_suppression(detections, conf_thres, nms_thres, robot_class_ids=robot_class_ids)
         detections = rescale_boxes(detections[0], img_size, image.shape[0:2])
         segmentations = rescale_segmentation(segmentations, image.shape[0:2])
     return detections.numpy(), segmentations.cpu().detach().numpy()
 
 
-def detect(model, dataloader, output_path, conf_thres, nms_thres):
+def detect(model, dataloader, output_path, conf_thres, nms_thres, robot_class_ids: Optional[List[int]] = None):
     """Inferences images with model.
 
     :param model: Model for inference
@@ -116,6 +124,8 @@ def detect(model, dataloader, output_path, conf_thres, nms_thres):
     :type conf_thres: float, optional
     :param nms_thres: IOU threshold for non-maximum suppression, defaults to 0.5
     :type nms_thres: float, optional
+    :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist.
+    :type robot_class_ids: List[int], optional
     :return: List of detections. The coordinates are given for the padded image that is provided by the dataloader.
         Use `utils.rescale_boxes` to transform them into the desired input image coordinate system before its transformed by the dataloader),
         List of input image paths
@@ -139,7 +149,7 @@ def detect(model, dataloader, output_path, conf_thres, nms_thres):
         # Get detections
         with torch.no_grad():
             detections, segmentations = model(input_imgs)
-            detections = non_max_suppression(detections, conf_thres, nms_thres)
+            detections = non_max_suppression(detections, conf_thres, nms_thres, robot_class_ids=robot_class_ids)
 
         # Store image and detections
         img_detections.extend(detections)
@@ -300,12 +310,21 @@ def run():
     parser.add_argument("--n_cpu", type=int, default=8, help="Number of cpu threads to use during batch generation")
     parser.add_argument("--conf_thres", type=float, default=0.5, help="Object confidence threshold")
     parser.add_argument("--nms_thres", type=float, default=0.4, help="IOU threshold for non-maximum suppression")
+    parser.add_argument("--multiple_robot_classes", action="store_true",
+                        help="If multiple robot classes exist and nms shall be performed across all robot classes")
     args = parser.parse_args()
     print(f"Command line arguments: {args}")
 
     # Extract class names from file
     classes = load_classes(args.classes)['detection']  # List of class names
 
+    robot_class_ids = None
+    if args.multiple_robot_classes:
+        robot_class_ids = []
+        for idx, c in enumerate(classes):
+            if "robot" in c:
+                robot_class_ids.append(idx)
+
     detect_directory(
         args.model,
         args.weights,
@@ -316,7 +335,9 @@ def run():
         img_size=args.img_size,
         n_cpu=args.n_cpu,
         conf_thres=args.conf_thres,
-        nms_thres=args.nms_thres)
+        nms_thres=args.nms_thres,
+        robot_class_ids=robot_class_ids
+    )
 
 
 if __name__ == '__main__':

diff --git a/yoeo/test.py b/yoeo/test.py
@@ -1,7 +1,7 @@
 #! /usr/bin/env python3
 
-from __future__ import division
-from typing import List
+from __future__ import division, annotations
+from typing import List, Optional
 
 import argparse
 import tqdm
@@ -14,14 +14,16 @@
 from torch.autograd import Variable
 
 from yoeo.models import load_model
-from yoeo.utils.utils import load_classes, ap_per_class, get_batch_statistics, non_max_suppression, to_cpu, xywh2xyxy, print_environment_info, seg_iou
+from yoeo.utils.utils import load_classes, ap_per_class, get_batch_statistics, non_max_suppression, to_cpu, xywh2xyxy, \
+    print_environment_info, seg_iou
 from yoeo.utils.datasets import ListDataset
 from yoeo.utils.transforms import DEFAULT_TRANSFORMS
 from yoeo.utils.parse_config import parse_data_config
 
 
 def evaluate_model_file(model_path, weights_path, img_path, class_names, batch_size=8, img_size=416,
-                        n_cpu=8, iou_thres=0.5, conf_thres=0.5, nms_thres=0.5, verbose=True):
+                        n_cpu=8, iou_thres=0.5, conf_thres=0.5, nms_thres=0.5, verbose=True,
+                        robot_class_ids: Optional[List[int]] = None):
     """Evaluate model on validation dataset.
 
     :param model_path: Path to model definition file (.cfg)
@@ -46,6 +48,8 @@ def evaluate_model_file(model_path, weights_path, img_path, class_names, batch_s
     :type nms_thres: float, optional
     :param verbose: If True, prints stats of model, defaults to True
     :type verbose: bool, optional
+    :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist.
+    :type robot_class_ids: List[int], optional
     :return: Returns precision, recall, AP, f1, ap_class
     """
     dataloader = _create_validation_data_loader(
@@ -59,7 +63,8 @@ def evaluate_model_file(model_path, weights_path, img_path, class_names, batch_s
         iou_thres,
         conf_thres,
         nms_thres,
-        verbose)
+        verbose,
+        robot_class_ids=robot_class_ids)
     return metrics_output, seg_class_ious
 
 
@@ -77,7 +82,6 @@ def print_eval_stats(metrics_output, seg_class_ious, class_names, verbose):
     else:
         print("---- mAP not measured (no detections found by model) ----")
 
-
     # Print segmentation statistics
     if verbose:
         # Print IoU per segmentation class
@@ -90,7 +94,8 @@ def print_eval_stats(metrics_output, seg_class_ious, class_names, verbose):
     print(f"----Average IoU {mean_seg_class_ious:.5f} ----")
 
 
-def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, nms_thres, verbose):
+def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, nms_thres, verbose,
+              robot_class_ids: Optional[List[int]] = None):
     """Evaluate model on validation dataset.
 
     :param model: Model to evaluate
@@ -109,6 +114,8 @@ def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, n
     :type nms_thres: float
     :param verbose: If True, prints stats of model
     :type verbose: bool
+    :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist.
+    :type robot_class_ids: List[int], optional
     :return: Returns precision, recall, AP, f1, ap_class
     """
     model.eval()  # Set model to evaluation mode
@@ -119,7 +126,7 @@ def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, n
     sample_metrics = []  # List of tuples (TP, confs, pred)
     seg_ious = []
     import time
-    times=[]
+    times = []
     for _, imgs, bb_targets, mask_targets in tqdm.tqdm(dataloader, desc="Validating"):
         # Extract labels
         labels += bb_targets[:, 1].tolist()
@@ -133,7 +140,12 @@ def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, n
             t1 = time.time()
             yolo_outputs, segmentation_outputs = model(imgs)
             times.append(time.time() - t1)
-            yolo_outputs = non_max_suppression(yolo_outputs, conf_thres=conf_thres, iou_thres=nms_thres)
+            yolo_outputs = non_max_suppression(
+                yolo_outputs,
+                conf_thres=conf_thres,
+                iou_thres=nms_thres,
+                robot_class_ids=robot_class_ids
+            )
 
         sample_metrics += get_batch_statistics(yolo_outputs, bb_targets, iou_threshold=iou_thres)
 
@@ -143,7 +155,7 @@ def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, n
         print("---- No detections over whole validation set ----")
         return None
 
-    print(f"Times: Mean {1/np.array(times).mean()}fps | Std: {np.array(times).std()} ms")
+    print(f"Times: Mean {1 / np.array(times).mean()}fps | Std: {np.array(times).std()} ms")
 
     # Concatenate sample statistics
     true_positives, pred_scores, pred_labels = [
@@ -159,7 +171,7 @@ def seg_iou_mean_without_nan(seg_iou: List[float]) -> np.ndarray:
         :return: Segmentation IOUs without NaN
         """
         seg_iou = np.asarray(seg_iou)
-        return seg_iou[~np.isnan(seg_iou)].mean() 
+        return seg_iou[~np.isnan(seg_iou)].mean()
 
     seg_class_ious = [seg_iou_mean_without_nan(class_ious) for class_ious in list(zip(*seg_ious))]
 
@@ -197,8 +209,10 @@ def _create_validation_data_loader(img_path, batch_size, img_size, n_cpu):
 def run():
     print_environment_info()
     parser = argparse.ArgumentParser(description="Evaluate validation data.")
-    parser.add_argument("-m", "--model", type=str, default="config/yoeo.cfg", help="Path to model definition file (.cfg)")
-    parser.add_argument("-w", "--weights", type=str, default="weights/yoeo.pth", help="Path to weights or checkpoint file (.weights or .pth)")
+    parser.add_argument("-m", "--model", type=str, default="config/yoeo.cfg",
+                        help="Path to model definition file (.cfg)")
+    parser.add_argument("-w", "--weights", type=str, default="weights/yoeo.pth",
+                        help="Path to weights or checkpoint file (.weights or .pth)")
     parser.add_argument("-d", "--data", type=str, default="config/torso.data", help="Path to data config file (.data)")
     parser.add_argument("-b", "--batch_size", type=int, default=8, help="Size of each image batch")
     parser.add_argument("-v", "--verbose", action='store_true', help="Makes the validation more verbose")
@@ -207,6 +221,9 @@ def run():
     parser.add_argument("--iou_thres", type=float, default=0.5, help="IOU threshold required to qualify as detected")
     parser.add_argument("--conf_thres", type=float, default=0.01, help="Object confidence threshold")
     parser.add_argument("--nms_thres", type=float, default=0.4, help="IOU threshold for non-maximum suppression")
+    parser.add_argument("--multiple_robot_classes", action="store_true",
+                        help="If multiple robot classes exist and nms shall be performed across all robot classes")
+
     args = parser.parse_args()
     print(f"Command line arguments: {args}")
 
@@ -216,6 +233,13 @@ def run():
     valid_path = data_config["valid"]
     class_names = load_classes(data_config["names"])  # Detection and segmentation class names
 
+    robot_class_ids = None
+    if args.multiple_robot_classes:
+        robot_class_ids = []
+        for idx, c in enumerate(class_names["detection"]):
+            if "robot" in c:
+                robot_class_ids.append(idx)
+
     evaluate_model_file(
         args.model,
         args.weights,
@@ -227,7 +251,9 @@ def run():
         iou_thres=args.iou_thres,
         conf_thres=args.conf_thres,
         nms_thres=args.nms_thres,
-        verbose=True)
+        verbose=True,
+        robot_class_ids=robot_class_ids
+    )
 
 
 if __name__ == "__main__":

diff --git a/yoeo/train.py b/yoeo/train.py
@@ -1,6 +1,6 @@
 #! /usr/bin/env python3
 
-from __future__ import division
+from __future__ import division, annotations
 
 import os
 import argparse
@@ -13,6 +13,8 @@
 import torch.optim as optim
 from torch.autograd import Variable
 
+from typing import List, Optional
+
 from yoeo.models import load_model
 from yoeo.utils.logger import Logger
 from yoeo.utils.utils import to_cpu, load_classes, print_environment_info, provide_determinism, worker_seed_set
@@ -78,6 +80,8 @@ def run():
     parser.add_argument("--nms_thres", type=float, default=0.5, help="Evaluation: IOU threshold for non-maximum suppression")
     parser.add_argument("--logdir", type=str, default="logs", help="Directory for training log files (e.g. for TensorBoard)")
     parser.add_argument("--seed", type=int, default=-1, help="Makes results reproducable. Set -1 to disable.")
+    parser.add_argument("--multiple_robot_classes", action="store_true",
+                        help="If multiple robot classes exist and nms shall be performed across all robot classes")
     args = parser.parse_args()
     print(f"Command line arguments: {args}")
 
@@ -95,6 +99,14 @@ def run():
     train_path = data_config["train"]
     valid_path = data_config["valid"]
     class_names = load_classes(data_config["names"])
+
+    robot_class_ids = None
+    if args.multiple_robot_classes:
+        robot_class_ids = []
+        for idx, c in enumerate(class_names["detection"]):
+            if "robot" in c:
+                robot_class_ids.append(idx)
+
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
     # ############
@@ -249,7 +261,8 @@ def run():
                 iou_thres=args.iou_thres,
                 conf_thres=args.conf_thres,
                 nms_thres=args.nms_thres,
-                verbose=args.verbose
+                verbose=args.verbose,
+                robot_class_ids=robot_class_ids
             )
 
             if metrics_output is not None:

diff --git a/yoeo/utils/utils.py b/yoeo/utils/utils.py
@@ -1,4 +1,4 @@
-from __future__ import division
+from __future__ import division, annotations
 
 from typing import Tuple
 
@@ -11,7 +11,7 @@
 import numpy as np
 import subprocess
 import random
-from typing import List
+from typing import List, Optional
 import yaml
 
 
@@ -418,7 +418,8 @@ def box_area(box):
     return inter / (area1[:, None] + area2 - inter)
 
 
-def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None):
+def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None,
+                        robot_class_ids: Optional[List[int]] = None):
     """Performs Non-Maximum Suppression (NMS) on inference results
     Returns:
          detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
@@ -436,6 +437,8 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
 
     t = time.time()
     output = [torch.zeros((0, 6), device="cpu")] * prediction.shape[0]
+    if robot_class_ids:
+        robot_class_ids = torch.tensor(robot_class_ids, device=prediction.device, dtype=prediction.dtype)
 
     for xi, x in enumerate(prediction):  # image index, image inference
         # Apply constraints
@@ -473,7 +476,15 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
             x = x[x[:, 4].argsort(descending=True)[:max_nms]]
 
         # Batched NMS
-        c = x[:, 5:6] * max_wh  # classes
+        if robot_class_ids is None:
+            c = x[:, 5:6] * max_wh  # classes
+        else:
+            # If multiple robot classes are present, all robot classes are treated as one class in order to perform
+            # nms across all classes and not per class. For this, all robot classes get the same offset.
+            c = torch.clone(x[:, 5:6])
+            c[torch.isin(c, robot_class_ids)] = robot_class_ids[0]
+            c *= max_wh
+
         # boxes (offset by class), scores
         boxes, scores = x[:, :4] + c, x[:, 4]
         i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS