una-auxme · SirMDA · Dec 9, 2024 · Nov 26, 2024 · Nov 26, 2024 · Nov 26, 2024
diff --git a/code/perception/launch/perception.launch b/code/perception/launch/perception.launch
@@ -58,10 +58,14 @@
 
       Image-Segmentation:
       - deeplabv3_resnet101
-      - yolov8x-seg 
+      - yolov8x-seg
+      - yolo11n-seg
+      - yolo11s-seg
+      - yolo11m-seg
+      - yolo11l-seg
       -->
 
-    <param name="model" value="rtdetr-l" />
+    <param name="model" value="yolo11n-seg" />
   </node>
 
   <node pkg="perception" type="traffic_light_node.py" name="TrafficLightNode" output="screen">

diff --git a/code/perception/src/__init__.py b/code/perception/src/__init__.py
diff --git a/code/perception/src/vision_node.py b/code/perception/src/vision_node.py
@@ -15,6 +15,7 @@
 )
 import torchvision.transforms as t
 import cv2
+from vision_node_helper import get_carla_class_name, get_carla_color
 from rospy.numpy_msg import numpy_msg
 from sensor_msgs.msg import Image as ImageMsg
 from std_msgs.msg import Header, Float32MultiArray
@@ -24,6 +25,7 @@
 from ultralytics import NAS, YOLO, RTDETR, SAM, FastSAM
 import asyncio
 import rospy
+from ultralytics.utils.ops import scale_masks
 
 
 class VisionNode(CompatibleNode):
@@ -77,6 +79,10 @@ def __init__(self, name, **kwargs):
             "yolov8x-seg": (YOLO, "yolov8x-seg.pt", "segmentation", "ultralytics"),
             "sam_l": (SAM, "sam_l.pt", "detection", "ultralytics"),
             "FastSAM-x": (FastSAM, "FastSAM-x.pt", "detection", "ultralytics"),
+            "yolo11n-seg": (YOLO, "yolo11n-seg.pt", "segmentation", "ultralytics"),
+            "yolo11s-seg": (YOLO, "yolo11s-seg.pt", "segmentation", "ultralytics"),
+            "yolo11m-seg": (YOLO, "yolo11m-seg.pt", "segmentation", "ultralytics"),
+            "yolo11l-seg": (YOLO, "yolo11l-seg.pt", "segmentation", "ultralytics"),
         }
 
         # general setup
@@ -239,7 +245,7 @@ def handle_camera_image(self, image):
             vision_result = self.predict_ultralytics(image)
 
         # publish vision result to rviz
-        img_msg = self.bridge.cv2_to_imgmsg(vision_result, encoding="rgb8")
+        img_msg = self.bridge.cv2_to_imgmsg(vision_result, encoding="bgr8")
         img_msg.header = image.header
 
         # publish img to corresponding angle topic
@@ -341,14 +347,19 @@ def predict_ultralytics(self, image):
         cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
 
         # run model prediction
-        output = self.model(cv_image, half=True, verbose=False)
+        output = self.model.track(cv_image, half=True, verbose=False, imgsz=640)
 
         # handle distance of objects
 
         # set up lists for visualization of distance outputs
         distance_output = []
         c_boxes = []
         c_labels = []
+        c_colors = []
+        if hasattr(output[0], "masks") and output[0].masks is not None:
+            masks = output[0].masks.data
+        else:
+            masks = None
 
         boxes = output[0].boxes
         for box in boxes:
@@ -358,72 +369,74 @@ def predict_ultralytics(self, image):
             # only run distance calc when dist_array is available
             # this if is needed because the lidar starts
             # publishing with a delay
-            if self.dist_arrays is not None:
-
-                # crop bounding box area out of depth image
-                distances = np.asarray(
-                    self.dist_arrays[
-                        int(pixels[1]) : int(pixels[3]) : 1,
-                        int(pixels[0]) : int(pixels[2]) : 1,
-                        ::,
-                    ]
-                )
+            if self.dist_arrays is None:
+                continue
+
+            # crop bounding box area out of depth image
+            distances = np.asarray(
+                self.dist_arrays[
+                    int(pixels[1]) : int(pixels[3]) : 1,
+                    int(pixels[0]) : int(pixels[2]) : 1,
+                    ::,
+                ]
+            )
 
-                # set all 0 (black) values to np.inf (necessary if
-                # you want to search for minimum)
-                # these are all pixels where there is no
-                # corresponding lidar point in the depth image
-                condition = distances[:, :, 0] != 0
-                non_zero_filter = distances[condition]
-                distances_copy = distances.copy()
-                distances_copy[distances_copy == 0] = np.inf
-
-                # only proceed if there is more than one lidar
-                # point in the bounding box
-                if len(non_zero_filter) > 0:
-
-                    """
-                    !Watch out:
-                    The calculation of min x and min abs y is currently
-                    only for center angle
-                    For back, left and right the values are different in the
-                    coordinate system of the lidar.
-                    (Example: the closedt distance on the back view should the
-                    max x since the back view is on the -x axis)
-                    """
-
-                    # copy actual lidar points
-                    obj_dist_min_x = self.min_x(dist_array=distances_copy)
-                    obj_dist_min_abs_y = self.min_abs_y(dist_array=distances_copy)
-
-                    # absolut distance to object for visualization
-                    abs_distance = np.sqrt(
-                        obj_dist_min_x[0] ** 2
-                        + obj_dist_min_x[1] ** 2
-                        + obj_dist_min_x[2] ** 2
-                    )
-
-                    # append class index, min x and min abs y to output array
-                    distance_output.append(float(cls))
-                    distance_output.append(float(obj_dist_min_x[0]))
-                    distance_output.append(float(obj_dist_min_abs_y[1]))
-
-                else:
-                    # fallback values for bounding box if
-                    # no lidar points where found
-                    obj_dist_min_x = (np.inf, np.inf, np.inf)
-                    obj_dist_min_abs_y = (np.inf, np.inf, np.inf)
-                    abs_distance = np.inf
-
-                # add values for visualization
-                c_boxes.append(torch.tensor(pixels))
-                c_labels.append(
-                    f"Class: {cls},"
-                    f"Meters: {round(abs_distance, 2)},"
-                    f"({round(float(obj_dist_min_x[0]), 2)},"
-                    f"{round(float(obj_dist_min_abs_y[1]), 2)})"
+            # set all 0 (black) values to np.inf (necessary if
+            # you want to search for minimum)
+            # these are all pixels where there is no
+            # corresponding lidar point in the depth image
+            condition = distances[:, :, 0] != 0
+            non_zero_filter = distances[condition]
+            distances_copy = distances.copy()
+            distances_copy[distances_copy == 0] = np.inf
+
+            # only proceed if there is more than one lidar
+            # point in the bounding box
+            if len(non_zero_filter) > 0:
+                """
+                !Watch out:
+                The calculation of min x and min abs y is currently
+                only for center angle
+                For back, left and right the values are different in the
+                coordinate system of the lidar.
+                (Example: the closedt distance on the back view should the
+                max x since the back view is on the -x axis)
+                """
+
+                # copy actual lidar points
+                obj_dist_min_x = self.min_x(dist_array=distances_copy)
+                obj_dist_min_abs_y = self.min_abs_y(dist_array=distances_copy)
+
+                # absolut distance to object for visualization
+                abs_distance = np.sqrt(
+                    obj_dist_min_x[0] ** 2
+                    + obj_dist_min_x[1] ** 2
+                    + obj_dist_min_x[2] ** 2
                 )
 
+                # append class index, min x and min abs y to output array
+                distance_output.append(float(cls))
+                distance_output.append(float(obj_dist_min_x[0]))
+                distance_output.append(float(obj_dist_min_abs_y[1]))
+
+            else:
+                # fallback values for bounding box if
+                # no lidar points where found
+                obj_dist_min_x = (np.inf, np.inf, np.inf)
+                obj_dist_min_abs_y = (np.inf, np.inf, np.inf)
+                abs_distance = np.inf
+
+            # add values for visualization
+            c_boxes.append(torch.tensor(pixels))
+            c_labels.append(
+                f"Class: {get_carla_class_name(cls)}, "
+                f"Meters: {round(abs_distance, 2)}, "
+                f"TrackingId: {int(box.id)}, "
+                f"({round(float(obj_dist_min_x[0]), 2)}, "
+                f"{round(float(obj_dist_min_abs_y[1]), 2)})",
+            )
+            c_colors.append(get_carla_color(int(cls)))
+
         # publish list of distances of objects for planning
         self.distance_publisher.publish(Float32MultiArray(data=distance_output))
 
@@ -437,17 +450,29 @@ def predict_ultralytics(self, image):
 
         # draw bounding boxes and distance values on image
         c_boxes = torch.stack(c_boxes)
-        box = draw_bounding_boxes(
+        drawn_images = draw_bounding_boxes(
             image_np_with_detections,
             c_boxes,
             c_labels,
             colors="blue",
             width=3,
             font_size=12,
         )
-        np_box_img = np.transpose(box.detach().numpy(), (1, 2, 0))
-        box_img = cv2.cvtColor(np_box_img, cv2.COLOR_BGR2RGB)
-        return box_img
+        if masks is not None:
+            scaled_masks = np.squeeze(
+                scale_masks(masks.unsqueeze(1), cv_image.shape[:2], True).cpu().numpy(),
+                1,
+            )
+
+            drawn_images = draw_segmentation_masks(
+                drawn_images,
+                torch.from_numpy(scaled_masks > 0),
+                alpha=0.6,
+                colors=c_colors,
+            )
+
+        np_image = np.transpose(drawn_images.detach().numpy(), (1, 2, 0))
+        return cv2.cvtColor(np_image, cv2.COLOR_BGR2RGB)
 
     def min_x(self, dist_array):
         """