diff --git a/code/perception/launch/perception.launch b/code/perception/launch/perception.launch
index 67a9c2ab..1c3f487e 100644
--- a/code/perception/launch/perception.launch
+++ b/code/perception/launch/perception.launch
@@ -58,10 +58,14 @@
Image-Segmentation:
- deeplabv3_resnet101
- - yolov8x-seg
+ - yolov8x-seg
+ - yolo11n-seg
+ - yolo11s-seg
+ - yolo11m-seg
+ - yolo11l-seg
-->
-
+
diff --git a/code/perception/src/__init__.py b/code/perception/src/__init__.py
new file mode 100755
index 00000000..e69de29b
diff --git a/code/perception/src/vision_node.py b/code/perception/src/vision_node.py
index 7039b9f3..1a558df2 100755
--- a/code/perception/src/vision_node.py
+++ b/code/perception/src/vision_node.py
@@ -15,6 +15,7 @@
)
import torchvision.transforms as t
import cv2
+from vision_node_helper import get_carla_class_name, get_carla_color
from rospy.numpy_msg import numpy_msg
from sensor_msgs.msg import Image as ImageMsg
from std_msgs.msg import Header, Float32MultiArray
@@ -24,6 +25,7 @@
from ultralytics import NAS, YOLO, RTDETR, SAM, FastSAM
import asyncio
import rospy
+from ultralytics.utils.ops import scale_masks
class VisionNode(CompatibleNode):
@@ -77,6 +79,10 @@ def __init__(self, name, **kwargs):
"yolov8x-seg": (YOLO, "yolov8x-seg.pt", "segmentation", "ultralytics"),
"sam_l": (SAM, "sam_l.pt", "detection", "ultralytics"),
"FastSAM-x": (FastSAM, "FastSAM-x.pt", "detection", "ultralytics"),
+ "yolo11n-seg": (YOLO, "yolo11n-seg.pt", "segmentation", "ultralytics"),
+ "yolo11s-seg": (YOLO, "yolo11s-seg.pt", "segmentation", "ultralytics"),
+ "yolo11m-seg": (YOLO, "yolo11m-seg.pt", "segmentation", "ultralytics"),
+ "yolo11l-seg": (YOLO, "yolo11l-seg.pt", "segmentation", "ultralytics"),
}
# general setup
@@ -239,7 +245,7 @@ def handle_camera_image(self, image):
vision_result = self.predict_ultralytics(image)
# publish vision result to rviz
- img_msg = self.bridge.cv2_to_imgmsg(vision_result, encoding="rgb8")
+ img_msg = self.bridge.cv2_to_imgmsg(vision_result, encoding="bgr8")
img_msg.header = image.header
# publish img to corresponding angle topic
@@ -341,7 +347,7 @@ def predict_ultralytics(self, image):
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
# run model prediction
- output = self.model(cv_image, half=True, verbose=False)
+ output = self.model.track(cv_image, half=True, verbose=False, imgsz=640)
# handle distance of objects
@@ -349,6 +355,11 @@ def predict_ultralytics(self, image):
distance_output = []
c_boxes = []
c_labels = []
+ c_colors = []
+ if hasattr(output[0], "masks") and output[0].masks is not None:
+ masks = output[0].masks.data
+ else:
+ masks = None
boxes = output[0].boxes
for box in boxes:
@@ -358,72 +369,74 @@ def predict_ultralytics(self, image):
# only run distance calc when dist_array is available
# this if is needed because the lidar starts
# publishing with a delay
- if self.dist_arrays is not None:
-
- # crop bounding box area out of depth image
- distances = np.asarray(
- self.dist_arrays[
- int(pixels[1]) : int(pixels[3]) : 1,
- int(pixels[0]) : int(pixels[2]) : 1,
- ::,
- ]
- )
+ if self.dist_arrays is None:
+ continue
+
+ # crop bounding box area out of depth image
+ distances = np.asarray(
+ self.dist_arrays[
+ int(pixels[1]) : int(pixels[3]) : 1,
+ int(pixels[0]) : int(pixels[2]) : 1,
+ ::,
+ ]
+ )
- # set all 0 (black) values to np.inf (necessary if
- # you want to search for minimum)
- # these are all pixels where there is no
- # corresponding lidar point in the depth image
- condition = distances[:, :, 0] != 0
- non_zero_filter = distances[condition]
- distances_copy = distances.copy()
- distances_copy[distances_copy == 0] = np.inf
-
- # only proceed if there is more than one lidar
- # point in the bounding box
- if len(non_zero_filter) > 0:
-
- """
- !Watch out:
- The calculation of min x and min abs y is currently
- only for center angle
- For back, left and right the values are different in the
- coordinate system of the lidar.
- (Example: the closedt distance on the back view should the
- max x since the back view is on the -x axis)
- """
-
- # copy actual lidar points
- obj_dist_min_x = self.min_x(dist_array=distances_copy)
- obj_dist_min_abs_y = self.min_abs_y(dist_array=distances_copy)
-
- # absolut distance to object for visualization
- abs_distance = np.sqrt(
- obj_dist_min_x[0] ** 2
- + obj_dist_min_x[1] ** 2
- + obj_dist_min_x[2] ** 2
- )
-
- # append class index, min x and min abs y to output array
- distance_output.append(float(cls))
- distance_output.append(float(obj_dist_min_x[0]))
- distance_output.append(float(obj_dist_min_abs_y[1]))
-
- else:
- # fallback values for bounding box if
- # no lidar points where found
- obj_dist_min_x = (np.inf, np.inf, np.inf)
- obj_dist_min_abs_y = (np.inf, np.inf, np.inf)
- abs_distance = np.inf
-
- # add values for visualization
- c_boxes.append(torch.tensor(pixels))
- c_labels.append(
- f"Class: {cls},"
- f"Meters: {round(abs_distance, 2)},"
- f"({round(float(obj_dist_min_x[0]), 2)},"
- f"{round(float(obj_dist_min_abs_y[1]), 2)})"
+ # set all 0 (black) values to np.inf (necessary if
+ # you want to search for minimum)
+ # these are all pixels where there is no
+ # corresponding lidar point in the depth image
+ condition = distances[:, :, 0] != 0
+ non_zero_filter = distances[condition]
+ distances_copy = distances.copy()
+ distances_copy[distances_copy == 0] = np.inf
+
+ # only proceed if there is more than one lidar
+ # point in the bounding box
+ if len(non_zero_filter) > 0:
+ """
+ !Watch out:
+ The calculation of min x and min abs y is currently
+ only for center angle
+ For back, left and right the values are different in the
+ coordinate system of the lidar.
+ (Example: the closedt distance on the back view should the
+ max x since the back view is on the -x axis)
+ """
+
+ # copy actual lidar points
+ obj_dist_min_x = self.min_x(dist_array=distances_copy)
+ obj_dist_min_abs_y = self.min_abs_y(dist_array=distances_copy)
+
+ # absolut distance to object for visualization
+ abs_distance = np.sqrt(
+ obj_dist_min_x[0] ** 2
+ + obj_dist_min_x[1] ** 2
+ + obj_dist_min_x[2] ** 2
)
+ # append class index, min x and min abs y to output array
+ distance_output.append(float(cls))
+ distance_output.append(float(obj_dist_min_x[0]))
+ distance_output.append(float(obj_dist_min_abs_y[1]))
+
+ else:
+ # fallback values for bounding box if
+ # no lidar points where found
+ obj_dist_min_x = (np.inf, np.inf, np.inf)
+ obj_dist_min_abs_y = (np.inf, np.inf, np.inf)
+ abs_distance = np.inf
+
+ # add values for visualization
+ c_boxes.append(torch.tensor(pixels))
+ c_labels.append(
+ f"Class: {get_carla_class_name(cls)}, "
+ f"Meters: {round(abs_distance, 2)}, "
+ f"TrackingId: {int(box.id)}, "
+ f"({round(float(obj_dist_min_x[0]), 2)}, "
+ f"{round(float(obj_dist_min_abs_y[1]), 2)})",
+ )
+ c_colors.append(get_carla_color(int(cls)))
+
# publish list of distances of objects for planning
self.distance_publisher.publish(Float32MultiArray(data=distance_output))
@@ -437,7 +450,7 @@ def predict_ultralytics(self, image):
# draw bounding boxes and distance values on image
c_boxes = torch.stack(c_boxes)
- box = draw_bounding_boxes(
+ drawn_images = draw_bounding_boxes(
image_np_with_detections,
c_boxes,
c_labels,
@@ -445,9 +458,21 @@ def predict_ultralytics(self, image):
width=3,
font_size=12,
)
- np_box_img = np.transpose(box.detach().numpy(), (1, 2, 0))
- box_img = cv2.cvtColor(np_box_img, cv2.COLOR_BGR2RGB)
- return box_img
+ if masks is not None:
+ scaled_masks = np.squeeze(
+ scale_masks(masks.unsqueeze(1), cv_image.shape[:2], True).cpu().numpy(),
+ 1,
+ )
+
+ drawn_images = draw_segmentation_masks(
+ drawn_images,
+ torch.from_numpy(scaled_masks > 0),
+ alpha=0.6,
+ colors=c_colors,
+ )
+
+ np_image = np.transpose(drawn_images.detach().numpy(), (1, 2, 0))
+ return cv2.cvtColor(np_image, cv2.COLOR_BGR2RGB)
def min_x(self, dist_array):
"""
diff --git a/code/perception/src/vision_node_helper.py b/code/perception/src/vision_node_helper.py
new file mode 100644
index 00000000..6778f7b6
--- /dev/null
+++ b/code/perception/src/vision_node_helper.py
@@ -0,0 +1,158 @@
+from typing import List, Union
+
+
+# Carla-Farben
+carla_colors = [
+ [0, 0, 0], # 0: None
+ [70, 70, 70], # 1: Buildings
+ [190, 153, 153], # 2: Fences
+ [72, 0, 90], # 3: Other
+ [220, 20, 60], # 4: Pedestrians
+ [153, 153, 153], # 5: Poles
+ [157, 234, 50], # 6: RoadLines
+ [128, 64, 128], # 7: Roads
+ [244, 35, 232], # 8: Sidewalks
+ [107, 142, 35], # 9: Vegetation
+ [0, 0, 255], # 10: Vehicles
+ [102, 102, 156], # 11: Walls
+ [220, 220, 0], # 12: TrafficSigns
+]
+
+carla_class_names = [
+ "None", # 0
+ "Buildings", # 1
+ "Fences", # 2
+ "Other", # 3
+ "Pedestrians", # 4
+ "Poles", # 5
+ "RoadLines", # 6
+ "Roads", # 7
+ "Sidewalks", # 8
+ "Vegetation", # 9
+ "Vehicles", # 10
+ "Walls", # 11
+ "TrafficSigns", # 12
+]
+
+# COCO-Klassen → Carla-Klassen Mapping
+coco_to_carla = [
+ 4, # 0: Person -> Pedestrians
+ 10, # 1: Bicycle -> Vehicles
+ 10, # 2: Car -> Vehicles
+ 10, # 3: Motorbike -> Vehicles
+ 10, # 4: Airplane -> Vehicles
+ 10, # 5: Bus -> Vehicles
+ 10, # 6: Train -> Vehicles
+ 10, # 7: Truck -> Vehicles
+ 10, # 8: Boat -> Vehicles
+ 12, # 9: Traffic Light -> TrafficSigns
+ 3, # 10: Fire Hydrant -> Other
+ 12, # 11: Stop Sign -> TrafficSigns
+ 3, # 12: Parking Meter -> Other
+ 3, # 13: Bench -> Other
+ 3, # 14: Bird -> Other
+ 3, # 15: Cat -> Other
+ 3, # 16: Dog -> Other
+ 3, # 17: Horse -> Other
+ 3, # 18: Sheep -> Other
+ 3, # 19: Cow -> Other
+ 3, # 20: Elephant -> Other
+ 3, # 21: Bear -> Other
+ 3, # 22: Zebra -> Other
+ 3, # 23: Giraffe -> Other
+ 3, # 24: Backpack -> Other
+ 3, # 25: Umbrella -> Other
+ 3, # 26: Handbag -> Other
+ 3, # 27: Tie -> Other
+ 3, # 28: Suitcase -> Other
+ 3, # 29: Frisbee -> Other
+ 3, # 30: Skis -> Other
+ 3, # 31: Snowboard -> Other
+ 3, # 32: Sports Ball -> Other
+ 3, # 33: Kite -> Other
+ 3, # 34: Baseball Bat -> Other
+ 3, # 35: Baseball Glove -> Other
+ 3, # 36: Skateboard -> Other
+ 3, # 37: Surfboard -> Other
+ 3, # 38: Tennis Racket -> Other
+ 3, # 39: Bottle -> Other
+ 3, # 40: Wine Glass -> Other
+ 3, # 41: Cup -> Other
+ 3, # 42: Fork -> Other
+ 3, # 43: Knife -> Other
+ 3, # 44: Spoon -> Other
+ 3, # 45: Bowl -> Other
+ 3, # 46: Banana -> Other
+ 3, # 47: Apple -> Other
+ 3, # 48: Sandwich -> Other
+ 3, # 49: Orange -> Other
+ 3, # 50: Broccoli -> Other
+ 3, # 51: Carrot -> Other
+ 3, # 52: Hot Dog -> Other
+ 3, # 53: Pizza -> Other
+ 3, # 54: Donut -> Other
+ 3, # 55: Cake -> Other
+ 3, # 56: Chair -> Other
+ 3, # 57: Couch -> Other
+ 3, # 58: Potted Plant -> Other
+ 3, # 59: Bed -> Other
+ 3, # 60: Dining Table -> Other
+ 3, # 61: Toilet -> Other
+ 3, # 62: TV -> Other
+ 3, # 63: Laptop -> Other
+ 3, # 64: Mouse -> Other
+ 3, # 65: Remote -> Other
+ 3, # 66: Keyboard -> Other
+ 3, # 67: Cell Phone -> Other
+ 3, # 68: Microwave -> Other
+ 3, # 69: Oven -> Other
+ 3, # 70: Toaster -> Other
+ 3, # 71: Sink -> Other
+ 3, # 72: Refrigerator -> Other
+ 3, # 73: Book -> Other
+ 3, # 74: Clock -> Other
+ 3, # 75: Vase -> Other
+ 3, # 76: Scissors -> Other
+ 3, # 77: Teddy Bear -> Other
+ 3, # 78: Hair Drier -> Other
+ 3, # 79: Toothbrush -> Other
+]
+
+COCO_CLASS_COUNT = 80
+
+
+def get_carla_color(coco_class: Union[int, float]) -> List[int]:
+ """Get the Carla color for a given COCO class.
+ Args:
+ coco_class: COCO class index (0-79)
+
+ Returns:
+ RGB color values for the corresponding Carla class
+
+ Raises:
+ ValueError: If coco_class is out of valid range
+ """
+ coco_idx = int(coco_class)
+ if not 0 <= coco_idx < COCO_CLASS_COUNT:
+ raise ValueError(f"Invalid COCO class index: {coco_idx}")
+ carla_class = coco_to_carla[coco_idx]
+ return carla_colors[carla_class]
+
+
+def get_carla_class_name(coco_class: Union[int, float]) -> str:
+ """Get the Carla class name for a given COCO class.
+ Args:
+ coco_class: COCO class index (0-79)
+
+ Returns:
+ Name of the corresponding Carla class
+
+ Raises:
+ ValueError: If coco_class is out of valid range
+ """
+ coco_idx = int(coco_class)
+
+ if not 0 <= coco_idx < COCO_CLASS_COUNT:
+ raise ValueError(f"Invalid COCO class index: {coco_idx}")
+ carla_class = coco_to_carla[coco_idx]
+ return carla_class_names[carla_class]
diff --git a/code/requirements.txt b/code/requirements.txt
index cbe57476..4f69a920 100644
--- a/code/requirements.txt
+++ b/code/requirements.txt
@@ -13,7 +13,7 @@ scipy==1.10.1
xmltodict==0.13.0
py-trees==2.2.3
numpy==1.23.5
-ultralytics==8.1.11
+ultralytics==8.3.32
scikit-learn>=0.18
pandas==2.0.3
debugpy==1.8.7