diff --git a/code/perception/launch/perception.launch b/code/perception/launch/perception.launch index 67a9c2ab..1c3f487e 100644 --- a/code/perception/launch/perception.launch +++ b/code/perception/launch/perception.launch @@ -58,10 +58,14 @@ Image-Segmentation: - deeplabv3_resnet101 - - yolov8x-seg + - yolov8x-seg + - yolo11n-seg + - yolo11s-seg + - yolo11m-seg + - yolo11l-seg --> - + diff --git a/code/perception/src/__init__.py b/code/perception/src/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/code/perception/src/vision_node.py b/code/perception/src/vision_node.py index 7039b9f3..1a558df2 100755 --- a/code/perception/src/vision_node.py +++ b/code/perception/src/vision_node.py @@ -15,6 +15,7 @@ ) import torchvision.transforms as t import cv2 +from vision_node_helper import get_carla_class_name, get_carla_color from rospy.numpy_msg import numpy_msg from sensor_msgs.msg import Image as ImageMsg from std_msgs.msg import Header, Float32MultiArray @@ -24,6 +25,7 @@ from ultralytics import NAS, YOLO, RTDETR, SAM, FastSAM import asyncio import rospy +from ultralytics.utils.ops import scale_masks class VisionNode(CompatibleNode): @@ -77,6 +79,10 @@ def __init__(self, name, **kwargs): "yolov8x-seg": (YOLO, "yolov8x-seg.pt", "segmentation", "ultralytics"), "sam_l": (SAM, "sam_l.pt", "detection", "ultralytics"), "FastSAM-x": (FastSAM, "FastSAM-x.pt", "detection", "ultralytics"), + "yolo11n-seg": (YOLO, "yolo11n-seg.pt", "segmentation", "ultralytics"), + "yolo11s-seg": (YOLO, "yolo11s-seg.pt", "segmentation", "ultralytics"), + "yolo11m-seg": (YOLO, "yolo11m-seg.pt", "segmentation", "ultralytics"), + "yolo11l-seg": (YOLO, "yolo11l-seg.pt", "segmentation", "ultralytics"), } # general setup @@ -239,7 +245,7 @@ def handle_camera_image(self, image): vision_result = self.predict_ultralytics(image) # publish vision result to rviz - img_msg = self.bridge.cv2_to_imgmsg(vision_result, encoding="rgb8") + img_msg = self.bridge.cv2_to_imgmsg(vision_result, encoding="bgr8") img_msg.header = image.header # publish img to corresponding angle topic @@ -341,7 +347,7 @@ def predict_ultralytics(self, image): cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) # run model prediction - output = self.model(cv_image, half=True, verbose=False) + output = self.model.track(cv_image, half=True, verbose=False, imgsz=640) # handle distance of objects @@ -349,6 +355,11 @@ def predict_ultralytics(self, image): distance_output = [] c_boxes = [] c_labels = [] + c_colors = [] + if hasattr(output[0], "masks") and output[0].masks is not None: + masks = output[0].masks.data + else: + masks = None boxes = output[0].boxes for box in boxes: @@ -358,72 +369,74 @@ def predict_ultralytics(self, image): # only run distance calc when dist_array is available # this if is needed because the lidar starts # publishing with a delay - if self.dist_arrays is not None: - - # crop bounding box area out of depth image - distances = np.asarray( - self.dist_arrays[ - int(pixels[1]) : int(pixels[3]) : 1, - int(pixels[0]) : int(pixels[2]) : 1, - ::, - ] - ) + if self.dist_arrays is None: + continue + + # crop bounding box area out of depth image + distances = np.asarray( + self.dist_arrays[ + int(pixels[1]) : int(pixels[3]) : 1, + int(pixels[0]) : int(pixels[2]) : 1, + ::, + ] + ) - # set all 0 (black) values to np.inf (necessary if - # you want to search for minimum) - # these are all pixels where there is no - # corresponding lidar point in the depth image - condition = distances[:, :, 0] != 0 - non_zero_filter = distances[condition] - distances_copy = distances.copy() - distances_copy[distances_copy == 0] = np.inf - - # only proceed if there is more than one lidar - # point in the bounding box - if len(non_zero_filter) > 0: - - """ - !Watch out: - The calculation of min x and min abs y is currently - only for center angle - For back, left and right the values are different in the - coordinate system of the lidar. - (Example: the closedt distance on the back view should the - max x since the back view is on the -x axis) - """ - - # copy actual lidar points - obj_dist_min_x = self.min_x(dist_array=distances_copy) - obj_dist_min_abs_y = self.min_abs_y(dist_array=distances_copy) - - # absolut distance to object for visualization - abs_distance = np.sqrt( - obj_dist_min_x[0] ** 2 - + obj_dist_min_x[1] ** 2 - + obj_dist_min_x[2] ** 2 - ) - - # append class index, min x and min abs y to output array - distance_output.append(float(cls)) - distance_output.append(float(obj_dist_min_x[0])) - distance_output.append(float(obj_dist_min_abs_y[1])) - - else: - # fallback values for bounding box if - # no lidar points where found - obj_dist_min_x = (np.inf, np.inf, np.inf) - obj_dist_min_abs_y = (np.inf, np.inf, np.inf) - abs_distance = np.inf - - # add values for visualization - c_boxes.append(torch.tensor(pixels)) - c_labels.append( - f"Class: {cls}," - f"Meters: {round(abs_distance, 2)}," - f"({round(float(obj_dist_min_x[0]), 2)}," - f"{round(float(obj_dist_min_abs_y[1]), 2)})" + # set all 0 (black) values to np.inf (necessary if + # you want to search for minimum) + # these are all pixels where there is no + # corresponding lidar point in the depth image + condition = distances[:, :, 0] != 0 + non_zero_filter = distances[condition] + distances_copy = distances.copy() + distances_copy[distances_copy == 0] = np.inf + + # only proceed if there is more than one lidar + # point in the bounding box + if len(non_zero_filter) > 0: + """ + !Watch out: + The calculation of min x and min abs y is currently + only for center angle + For back, left and right the values are different in the + coordinate system of the lidar. + (Example: the closedt distance on the back view should the + max x since the back view is on the -x axis) + """ + + # copy actual lidar points + obj_dist_min_x = self.min_x(dist_array=distances_copy) + obj_dist_min_abs_y = self.min_abs_y(dist_array=distances_copy) + + # absolut distance to object for visualization + abs_distance = np.sqrt( + obj_dist_min_x[0] ** 2 + + obj_dist_min_x[1] ** 2 + + obj_dist_min_x[2] ** 2 ) + # append class index, min x and min abs y to output array + distance_output.append(float(cls)) + distance_output.append(float(obj_dist_min_x[0])) + distance_output.append(float(obj_dist_min_abs_y[1])) + + else: + # fallback values for bounding box if + # no lidar points where found + obj_dist_min_x = (np.inf, np.inf, np.inf) + obj_dist_min_abs_y = (np.inf, np.inf, np.inf) + abs_distance = np.inf + + # add values for visualization + c_boxes.append(torch.tensor(pixels)) + c_labels.append( + f"Class: {get_carla_class_name(cls)}, " + f"Meters: {round(abs_distance, 2)}, " + f"TrackingId: {int(box.id)}, " + f"({round(float(obj_dist_min_x[0]), 2)}, " + f"{round(float(obj_dist_min_abs_y[1]), 2)})", + ) + c_colors.append(get_carla_color(int(cls))) + # publish list of distances of objects for planning self.distance_publisher.publish(Float32MultiArray(data=distance_output)) @@ -437,7 +450,7 @@ def predict_ultralytics(self, image): # draw bounding boxes and distance values on image c_boxes = torch.stack(c_boxes) - box = draw_bounding_boxes( + drawn_images = draw_bounding_boxes( image_np_with_detections, c_boxes, c_labels, @@ -445,9 +458,21 @@ def predict_ultralytics(self, image): width=3, font_size=12, ) - np_box_img = np.transpose(box.detach().numpy(), (1, 2, 0)) - box_img = cv2.cvtColor(np_box_img, cv2.COLOR_BGR2RGB) - return box_img + if masks is not None: + scaled_masks = np.squeeze( + scale_masks(masks.unsqueeze(1), cv_image.shape[:2], True).cpu().numpy(), + 1, + ) + + drawn_images = draw_segmentation_masks( + drawn_images, + torch.from_numpy(scaled_masks > 0), + alpha=0.6, + colors=c_colors, + ) + + np_image = np.transpose(drawn_images.detach().numpy(), (1, 2, 0)) + return cv2.cvtColor(np_image, cv2.COLOR_BGR2RGB) def min_x(self, dist_array): """ diff --git a/code/perception/src/vision_node_helper.py b/code/perception/src/vision_node_helper.py new file mode 100644 index 00000000..6778f7b6 --- /dev/null +++ b/code/perception/src/vision_node_helper.py @@ -0,0 +1,158 @@ +from typing import List, Union + + +# Carla-Farben +carla_colors = [ + [0, 0, 0], # 0: None + [70, 70, 70], # 1: Buildings + [190, 153, 153], # 2: Fences + [72, 0, 90], # 3: Other + [220, 20, 60], # 4: Pedestrians + [153, 153, 153], # 5: Poles + [157, 234, 50], # 6: RoadLines + [128, 64, 128], # 7: Roads + [244, 35, 232], # 8: Sidewalks + [107, 142, 35], # 9: Vegetation + [0, 0, 255], # 10: Vehicles + [102, 102, 156], # 11: Walls + [220, 220, 0], # 12: TrafficSigns +] + +carla_class_names = [ + "None", # 0 + "Buildings", # 1 + "Fences", # 2 + "Other", # 3 + "Pedestrians", # 4 + "Poles", # 5 + "RoadLines", # 6 + "Roads", # 7 + "Sidewalks", # 8 + "Vegetation", # 9 + "Vehicles", # 10 + "Walls", # 11 + "TrafficSigns", # 12 +] + +# COCO-Klassen → Carla-Klassen Mapping +coco_to_carla = [ + 4, # 0: Person -> Pedestrians + 10, # 1: Bicycle -> Vehicles + 10, # 2: Car -> Vehicles + 10, # 3: Motorbike -> Vehicles + 10, # 4: Airplane -> Vehicles + 10, # 5: Bus -> Vehicles + 10, # 6: Train -> Vehicles + 10, # 7: Truck -> Vehicles + 10, # 8: Boat -> Vehicles + 12, # 9: Traffic Light -> TrafficSigns + 3, # 10: Fire Hydrant -> Other + 12, # 11: Stop Sign -> TrafficSigns + 3, # 12: Parking Meter -> Other + 3, # 13: Bench -> Other + 3, # 14: Bird -> Other + 3, # 15: Cat -> Other + 3, # 16: Dog -> Other + 3, # 17: Horse -> Other + 3, # 18: Sheep -> Other + 3, # 19: Cow -> Other + 3, # 20: Elephant -> Other + 3, # 21: Bear -> Other + 3, # 22: Zebra -> Other + 3, # 23: Giraffe -> Other + 3, # 24: Backpack -> Other + 3, # 25: Umbrella -> Other + 3, # 26: Handbag -> Other + 3, # 27: Tie -> Other + 3, # 28: Suitcase -> Other + 3, # 29: Frisbee -> Other + 3, # 30: Skis -> Other + 3, # 31: Snowboard -> Other + 3, # 32: Sports Ball -> Other + 3, # 33: Kite -> Other + 3, # 34: Baseball Bat -> Other + 3, # 35: Baseball Glove -> Other + 3, # 36: Skateboard -> Other + 3, # 37: Surfboard -> Other + 3, # 38: Tennis Racket -> Other + 3, # 39: Bottle -> Other + 3, # 40: Wine Glass -> Other + 3, # 41: Cup -> Other + 3, # 42: Fork -> Other + 3, # 43: Knife -> Other + 3, # 44: Spoon -> Other + 3, # 45: Bowl -> Other + 3, # 46: Banana -> Other + 3, # 47: Apple -> Other + 3, # 48: Sandwich -> Other + 3, # 49: Orange -> Other + 3, # 50: Broccoli -> Other + 3, # 51: Carrot -> Other + 3, # 52: Hot Dog -> Other + 3, # 53: Pizza -> Other + 3, # 54: Donut -> Other + 3, # 55: Cake -> Other + 3, # 56: Chair -> Other + 3, # 57: Couch -> Other + 3, # 58: Potted Plant -> Other + 3, # 59: Bed -> Other + 3, # 60: Dining Table -> Other + 3, # 61: Toilet -> Other + 3, # 62: TV -> Other + 3, # 63: Laptop -> Other + 3, # 64: Mouse -> Other + 3, # 65: Remote -> Other + 3, # 66: Keyboard -> Other + 3, # 67: Cell Phone -> Other + 3, # 68: Microwave -> Other + 3, # 69: Oven -> Other + 3, # 70: Toaster -> Other + 3, # 71: Sink -> Other + 3, # 72: Refrigerator -> Other + 3, # 73: Book -> Other + 3, # 74: Clock -> Other + 3, # 75: Vase -> Other + 3, # 76: Scissors -> Other + 3, # 77: Teddy Bear -> Other + 3, # 78: Hair Drier -> Other + 3, # 79: Toothbrush -> Other +] + +COCO_CLASS_COUNT = 80 + + +def get_carla_color(coco_class: Union[int, float]) -> List[int]: + """Get the Carla color for a given COCO class. + Args: + coco_class: COCO class index (0-79) + + Returns: + RGB color values for the corresponding Carla class + + Raises: + ValueError: If coco_class is out of valid range + """ + coco_idx = int(coco_class) + if not 0 <= coco_idx < COCO_CLASS_COUNT: + raise ValueError(f"Invalid COCO class index: {coco_idx}") + carla_class = coco_to_carla[coco_idx] + return carla_colors[carla_class] + + +def get_carla_class_name(coco_class: Union[int, float]) -> str: + """Get the Carla class name for a given COCO class. + Args: + coco_class: COCO class index (0-79) + + Returns: + Name of the corresponding Carla class + + Raises: + ValueError: If coco_class is out of valid range + """ + coco_idx = int(coco_class) + + if not 0 <= coco_idx < COCO_CLASS_COUNT: + raise ValueError(f"Invalid COCO class index: {coco_idx}") + carla_class = coco_to_carla[coco_idx] + return carla_class_names[carla_class] diff --git a/code/requirements.txt b/code/requirements.txt index cbe57476..4f69a920 100644 --- a/code/requirements.txt +++ b/code/requirements.txt @@ -13,7 +13,7 @@ scipy==1.10.1 xmltodict==0.13.0 py-trees==2.2.3 numpy==1.23.5 -ultralytics==8.1.11 +ultralytics==8.3.32 scikit-learn>=0.18 pandas==2.0.3 debugpy==1.8.7