diff --git a/code/perception/launch/perception.launch b/code/perception/launch/perception.launch index 67a9c2ab..cca0dc68 100644 --- a/code/perception/launch/perception.launch +++ b/code/perception/launch/perception.launch @@ -1,6 +1,6 @@ - + - + diff --git a/code/perception/src/__init__.py b/code/perception/src/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/code/perception/src/lidar_distance.py b/code/perception/src/lidar_distance.py index 04394ee2..5c937be0 100755 --- a/code/perception/src/lidar_distance.py +++ b/code/perception/src/lidar_distance.py @@ -1,15 +1,15 @@ #!/usr/bin/env python +from joblib import Parallel, delayed import rospy import ros_numpy import numpy as np -import lidar_filter_utility -from sensor_msgs.msg import PointCloud2 +from lidar_filter_utility import bounding_box, remove_field_name +from sensor_msgs.msg import PointCloud2, Image as ImageMsg +from sklearn.cluster import DBSCAN +from cv_bridge import CvBridge # from mpl_toolkits.mplot3d import Axes3D # from itertools import combinations -from sensor_msgs.msg import Image as ImageMsg -from cv_bridge import CvBridge - # from matplotlib.colors import LinearSegmentedColormap @@ -18,234 +18,416 @@ class LidarDistance: how to configute this node """ - def callback(self, data): - """Callback function, filters a PontCloud2 message - by restrictions defined in the launchfile. + def __init__(self): + self.cluster_buffer = [] - Publishes a Depth image for the specified camera angle. - Each angle has do be delt with differently since the signs of the - coordinate system change with the view angle. - - :param data: a PointCloud2 + def callback(self, data): """ - coordinates = ros_numpy.point_cloud2.pointcloud2_to_array(data) + Callback function that processes LiDAR point cloud data. - # Center - reconstruct_bit_mask_center = lidar_filter_utility.bounding_box( - coordinates, - max_x=np.inf, - min_x=0.0, - min_z=-1.6, - ) - reconstruct_coordinates_center = coordinates[reconstruct_bit_mask_center] - reconstruct_coordinates_xyz_center = np.array( - lidar_filter_utility.remove_field_name( - reconstruct_coordinates_center, "intensity" - ).tolist() - ) - dist_array_center = self.reconstruct_img_from_lidar( - reconstruct_coordinates_xyz_center, focus="Center" - ) - dist_array_center_msg = self.bridge.cv2_to_imgmsg( - dist_array_center, encoding="passthrough" - ) - dist_array_center_msg.header = data.header - self.dist_array_center_publisher.publish(dist_array_center_msg) - - # Back - reconstruct_bit_mask_back = lidar_filter_utility.bounding_box( - coordinates, - max_x=0.0, - min_x=-np.inf, - min_z=-1.6, - ) - reconstruct_coordinates_back = coordinates[reconstruct_bit_mask_back] - reconstruct_coordinates_xyz_back = np.array( - lidar_filter_utility.remove_field_name( - reconstruct_coordinates_back, "intensity" - ).tolist() - ) - dist_array_back = self.reconstruct_img_from_lidar( - reconstruct_coordinates_xyz_back, focus="Back" - ) - dist_array_back_msg = self.bridge.cv2_to_imgmsg( - dist_array_back, encoding="passthrough" - ) - dist_array_back_msg.header = data.header - self.dist_array_back_publisher.publish(dist_array_back_msg) - - # Left - reconstruct_bit_mask_left = lidar_filter_utility.bounding_box( - coordinates, - max_y=np.inf, - min_y=0.0, - min_z=-1.6, - ) - reconstruct_coordinates_left = coordinates[reconstruct_bit_mask_left] - reconstruct_coordinates_xyz_left = np.array( - lidar_filter_utility.remove_field_name( - reconstruct_coordinates_left, "intensity" - ).tolist() - ) - dist_array_left = self.reconstruct_img_from_lidar( - reconstruct_coordinates_xyz_left, focus="Left" - ) - dist_array_left_msg = self.bridge.cv2_to_imgmsg( - dist_array_left, encoding="passthrough" - ) - dist_array_left_msg.header = data.header - self.dist_array_left_publisher.publish(dist_array_left_msg) + Executes clustering and image calculations for the provided point cloud. - # Right - reconstruct_bit_mask_right = lidar_filter_utility.bounding_box( - coordinates, max_y=-0.0, min_y=-np.inf, min_z=-1.6 - ) - reconstruct_coordinates_right = coordinates[reconstruct_bit_mask_right] - reconstruct_coordinates_xyz_right = np.array( - lidar_filter_utility.remove_field_name( - reconstruct_coordinates_right, "intensity" - ).tolist() - ) - dist_array_right = self.reconstruct_img_from_lidar( - reconstruct_coordinates_xyz_right, focus="Right" - ) - dist_array_right_msg = self.bridge.cv2_to_imgmsg( - dist_array_right, encoding="passthrough" - ) - dist_array_right_msg.header = data.header - self.dist_array_right_publisher.publish(dist_array_right_msg) + :param data: LiDAR point cloud as a ROS PointCloud2 message. + """ + self.start_clustering(data) + self.start_image_calculation(data) def listener(self): """ - Initializes the node and it's publishers + Initializes the ROS node, creates publishers/subscribers, and keeps it active. """ - # run simultaneously. rospy.init_node("lidar_distance") - self.bridge = CvBridge() + self.bridge = CvBridge() # OpenCV bridge for image conversions + # Publisher for filtered point clouds self.pub_pointcloud = rospy.Publisher( rospy.get_param( "~point_cloud_topic", "/carla/hero/" + rospy.get_namespace() + "_filtered", ), PointCloud2, - queue_size=10, + queue_size=1, ) - # publisher for dist_array + # Publishers for distance images in various directions self.dist_array_center_publisher = rospy.Publisher( rospy.get_param("~image_distance_topic", "/paf/hero/Center/dist_array"), ImageMsg, queue_size=10, ) - - # publisher for dist_array self.dist_array_back_publisher = rospy.Publisher( rospy.get_param("~image_distance_topic", "/paf/hero/Back/dist_array"), ImageMsg, queue_size=10, ) - - # publisher for dist_array + self.dist_array_lidar_publisher = rospy.Publisher( + rospy.get_param( + "~image_distance_topic_cluster", "/paf/hero/dist_clustered" + ), + PointCloud2, + queue_size=10, + ) + rospy.loginfo("dist_array_lidar_publisher successfully created.") self.dist_array_left_publisher = rospy.Publisher( rospy.get_param("~image_distance_topic", "/paf/hero/Left/dist_array"), ImageMsg, queue_size=10, ) - - # publisher for dist_array self.dist_array_right_publisher = rospy.Publisher( rospy.get_param("~image_distance_topic", "/paf/hero/Right/dist_array"), ImageMsg, queue_size=10, ) + # Subscriber for LiDAR data (point clouds) rospy.Subscriber( rospy.get_param("~source_topic", "/carla/hero/LIDAR"), PointCloud2, self.callback, ) + rospy.loginfo("Lidar Processor Node started.") rospy.spin() - def reconstruct_img_from_lidar(self, coordinates_xyz, focus): + def start_clustering(self, data): + """ + Filters LiDAR point clouds, performs clustering, + and publishes the combined clusters. + + :param data: LiDAR point clouds in ROS PointCloud2 format. + """ + + # Filter point clouds to remove irrelevant data + coordinates = ros_numpy.point_cloud2.pointcloud2_to_array(data) + filtered_coordinates = coordinates[ + ~( + (coordinates["x"] >= -2) + & (coordinates["x"] <= 2) + & (coordinates["y"] >= -1) + & (coordinates["y"] <= 1) + ) # Exclude points related to the ego vehicle + & ( + coordinates["z"] > -1.7 + 0.05 + ) # Minimum height in z to exclude the road + ] + + # Compute cluster data from the filtered coordinates + clustered_points = cluster_lidar_data_from_pointcloud( + coordinates=filtered_coordinates + ) + + # Only store valid cluster data + if clustered_points: + self.cluster_buffer.append(clustered_points) + else: + rospy.logwarn("No cluster data generated.") + + # Combine clusters + combined_clusters = combine_clusters(self.cluster_buffer) + + self.cluster_buffer = [] + + # Publish the combined clusters + self.publish_clusters(combined_clusters, data.header) + + def publish_clusters(self, combined_clusters, data_header): + """ + Publishes combined clusters as a ROS PointCloud2 message. + + :param combined_clusters: Combined point clouds of the clusters as a structured + NumPy array. + :param data_header: Header information for the ROS message. + """ + # Convert to a PointCloud2 message + pointcloud_msg = ros_numpy.point_cloud2.array_to_pointcloud2(combined_clusters) + pointcloud_msg.header = data_header + pointcloud_msg.header.stamp = rospy.Time.now() + # Publish the clusters + self.dist_array_lidar_publisher.publish(pointcloud_msg) + + def start_image_calculation(self, data): + """ + Computes distance images based on LiDAR data and publishes them. + + :param data: LiDAR point cloud in ROS PointCloud2 format. + """ + coordinates = ros_numpy.point_cloud2.pointcloud2_to_array(data) + + # Directions to process + directions = ["Center", "Back", "Left", "Right"] + + # Process images for all directions + processed_images = { + direction: self.calculate_image(coordinates, focus=direction) + for direction in directions + } + + # Publish the processed images + self.publish_images(processed_images, data.header) + + def calculate_image(self, coordinates, focus): + """ + Calculates a distance image for a specific focus (view direction) from + LiDAR coordinates. + + :param coordinates: Filtered LiDAR coordinates as a NumPy array. + :param focus: The focus direction ("Center", "Back", "Left", "Right"). + :return: Distance image as a 2D array. + """ + # Define bounding box parameters based on focus direction + bounding_box_params = { + "Center": {"max_x": np.inf, "min_x": 0.0, "min_z": -1.6}, + "Back": {"max_x": 0.0, "min_x": -np.inf, "min_z": -1.6}, + "Left": {"max_y": np.inf, "min_y": 0.0, "min_z": -1.6}, + "Right": {"max_y": -0.0, "min_y": -np.inf, "min_z": -1.6}, + } + + # Select parameters for the given focus + params = bounding_box_params.get(focus) + if not params: + rospy.logwarn(f"Unknown focus: {focus}. Skipping image calculation.") + return None + + # Apply bounding box filter + reconstruct_bit_mask = bounding_box(coordinates, **params) + reconstruct_coordinates = coordinates[reconstruct_bit_mask] + + # Remove the "intensity" field and convert to a NumPy array + reconstruct_coordinates_xyz = np.array( + remove_field_name(reconstruct_coordinates, "intensity").tolist() + ) + + # Reconstruct the image based on the focus + return self.reconstruct_img_from_lidar(reconstruct_coordinates_xyz, focus=focus) + + def publish_images(self, processed_images, data_header): """ - reconstruct 3d LIDAR-Data and calculate 2D Pixel - according to Camera-World + Publishes distance images for various directions as ROS image messages. + + :param processed_images: Dictionary with directions ("Center", "Back", etc.) + as keys and image arrays as values. + :param data_header: Header for the ROS image messages. + """ + # Process only valid NumPy arrays + for direction, image_array in processed_images.items(): + if not isinstance(image_array, np.ndarray): + continue + + # Convert the image into a ROS image message + dist_array_msg = self.bridge.cv2_to_imgmsg( + image_array, encoding="passthrough" + ) + dist_array_msg.header = data_header - Args: - coordinates_xyz (np.array): filtered lidar points - focus (String): Camera Angle + if direction == "Center": + self.dist_array_center_publisher.publish(dist_array_msg) + if direction == "Back": + self.dist_array_back_publisher.publish(dist_array_msg) + if direction == "Left": + self.dist_array_left_publisher.publish(dist_array_msg) + if direction == "Right": + self.dist_array_right_publisher.publish(dist_array_msg) - Returns: - image: depth image for camera angle + def reconstruct_img_from_lidar(self, coordinates_xyz, focus): + """ + Reconstructs a 2D image from 3D LiDAR data for a given camera perspective. + + :param coordinates_xyz: 3D coordinates of the filtered LiDAR points. + :param focus: Camera view (e.g., "Center", "Back"). + :return: Reconstructed image as a 2D array. """ - # intrinsic matrix for camera: - # width -> 300, height -> 200, fov -> 100 (agent.py) + # Create the intrinsic camera matrix based on image parameters (FOV, resolution) im = np.identity(3) - im[0, 2] = 1280 / 2.0 - im[1, 2] = 720 / 2.0 - im[0, 0] = im[1, 1] = 1280 / (2.0 * np.tan(100 * np.pi / 360.0)) + im[0, 2] = 1280 / 2.0 # x-offset (image center) + im[1, 2] = 720 / 2.0 # y-offset (image center) + im[0, 0] = im[1, 1] = 1280 / ( + 2.0 * np.tan(100 * np.pi / 360.0) + ) # Scale factor based on FOV - # extrinsic matrix for camera + # Create the extrinsic camera matrix (identity matrix for no transformation) ex = np.zeros(shape=(3, 4)) - ex[0][0] = 1 - ex[1][1] = 1 - ex[2][2] = 1 - m = np.matmul(im, ex) + ex[0][0] = ex[1][1] = ex[2][2] = 1 + m = np.matmul(im, ex) # Combine intrinsic and extrinsic matrices - # reconstruct camera image with LIDAR-Data + # Initialize empty images for reconstruction img = np.zeros(shape=(720, 1280), dtype=np.float32) dist_array = np.zeros(shape=(720, 1280, 3), dtype=np.float32) - for c in coordinates_xyz: - # center depth image - if focus == "Center": - point = np.array([c[1], c[2], c[0], 1]) - pixel = np.matmul(m, point) - x, y = int(pixel[0] / pixel[2]), int(pixel[1] / pixel[2]) - if x >= 0 and x <= 1280 and y >= 0 and y <= 720: - img[719 - y][1279 - x] = c[0] - dist_array[719 - y][1279 - x] = np.array( - [c[0], c[1], c[2]], dtype=np.float32 - ) - - # back depth image - if focus == "Back": - point = np.array([c[1], c[2], c[0], 1]) - pixel = np.matmul(m, point) - x, y = int(pixel[0] / pixel[2]), int(pixel[1] / pixel[2]) - if x >= 0 and x <= 1280 and y >= 0 and y < 720: - img[y][1279 - x] = -c[0] - dist_array[y][1279 - x] = np.array( - [-c[0], c[1], c[2]], dtype=np.float32 - ) - - # left depth image - if focus == "Left": - point = np.array([c[0], c[2], c[1], 1]) - pixel = np.matmul(m, point) - x, y = int(pixel[0] / pixel[2]), int(pixel[1] / pixel[2]) - if x >= 0 and x <= 1280 and y >= 0 and y <= 720: - img[719 - y][1279 - x] = c[1] - dist_array[y][1279 - x] = np.array( - [c[0], c[1], c[2]], dtype=np.float32 - ) - - # right depth image - if focus == "Right": - point = np.array([c[0], c[2], c[1], 1]) - pixel = np.matmul(m, point) - x, y = int(pixel[0] / pixel[2]), int(pixel[1] / pixel[2]) - if x >= 0 and x < 1280 and y >= 0 and y < 720: - img[y][1279 - x] = -c[1] - dist_array[y][1279 - x] = np.array( - [c[0], c[1], c[2]], dtype=np.float32 - ) + + # Prepare points based on focus + if focus in ["Center", "Back"]: + points = np.column_stack( + ( + coordinates_xyz[:, 1], + coordinates_xyz[:, 2], + coordinates_xyz[:, 0], + np.ones(coordinates_xyz.shape[0]), + ) + ) + elif focus in ["Left", "Right"]: + points = np.column_stack( + ( + coordinates_xyz[:, 0], + coordinates_xyz[:, 2], + coordinates_xyz[:, 1], + np.ones(coordinates_xyz.shape[0]), + ) + ) + else: + rospy.logwarn(f"Unknown focus: {focus}. Skipping image calculation.") + return None + + # Project 3D points to 2D image coordinates + pixels = np.dot(m, points.T).T + x = (pixels[:, 0] / pixels[:, 2]).astype(int) + y = (pixels[:, 1] / pixels[:, 2]).astype(int) + + # Filter valid coordinates + valid_indices = (x >= 0) & (x < 1280) & (y >= 0) & (y < 720) + x = x[valid_indices] + y = y[valid_indices] + valid_coordinates = coordinates_xyz[valid_indices] + + if focus == "Center": + img[719 - y, 1279 - x] = valid_coordinates[:, 0] + dist_array[719 - y, 1279 - x] = valid_coordinates + elif focus == "Back": + img[y, 1279 - x] = -valid_coordinates[:, 0] + dist_array[y, 1279 - x] = np.column_stack( + ( + -valid_coordinates[:, 0], + valid_coordinates[:, 1], + valid_coordinates[:, 2], + ) + ) + elif focus == "Left": + img[719 - y, 1279 - x] = valid_coordinates[:, 1] + dist_array[719 - y, 1279 - x] = valid_coordinates + elif focus == "Right": + img[y, 1279 - x] = -valid_coordinates[:, 1] + dist_array[y, 1279 - x] = np.column_stack( + ( + valid_coordinates[:, 0], + -valid_coordinates[:, 1], + valid_coordinates[:, 2], + ) + ) return dist_array +def array_to_pointcloud2(points, header="hero/Lidar"): + """ + Converts an array of points into a ROS PointCloud2 message. + + :param points: Array of points with [x, y, z] coordinates. + :param header: Header information for the ROS PointCloud2 message. + :return: ROS PointCloud2 message. + """ + # Ensure the input is a NumPy array + points_array = np.array(points) + + # Convert the points into a structured array with fields "x", "y", "z" + points_structured = np.array( + [(p[0], p[1], p[2]) for p in points_array], + dtype=[("x", "f4"), ("y", "f4"), ("z", "f4")], + ) + + # Create a PointCloud2 message from the structured array + pointcloud_msg = ros_numpy.point_cloud2.array_to_pointcloud2(points_structured) + + # Set the timestamp and header for the message + pointcloud_msg.header.stamp = rospy.Time.now() + pointcloud_msg.header = header + + return pointcloud_msg + + +def combine_clusters(cluster_buffer): + """ + Combines clusters from multiple point clouds into a structured NumPy array. + + :param cluster_buffer: List of dictionaries containing cluster IDs and point clouds. + :return: Combined structured NumPy array with fields "x", "y", "z", "cluster_id". + """ + points_list = [] + cluster_ids_list = [] + + for clustered_points in cluster_buffer: + for cluster_id, points in clustered_points.items(): + if points.size > 0: # Ignore empty clusters + points_list.append(points) + # Create an array with the cluster ID for all points in the cluster + cluster_ids_list.append( + np.full(points.shape[0], cluster_id, dtype=np.float32) + ) + + if not points_list: # If no points are present + return np.array( + [], dtype=[("x", "f4"), ("y", "f4"), ("z", "f4"), ("cluster_id", "f4")] + ) + + # Combine all points and cluster IDs into two separate arrays + all_points = np.vstack(points_list) + all_cluster_ids = np.concatenate(cluster_ids_list) + + # Create a structured array for the combined data + combined_points = np.zeros( + all_points.shape[0], + dtype=[("x", "f4"), ("y", "f4"), ("z", "f4"), ("cluster_id", "f4")], + ) + combined_points["x"] = all_points[:, 0] + combined_points["y"] = all_points[:, 1] + combined_points["z"] = all_points[:, 2] + combined_points["cluster_id"] = all_cluster_ids + + return combined_points + + +def cluster_lidar_data_from_pointcloud(coordinates, eps=0.3, min_samples=10): + """ + Performs clustering on LiDAR data using DBSCAN and returns the clusters. + + :param coordinates: LiDAR point cloud as a NumPy array with "x", "y", "z". + :param eps: Maximum distance between points to group them into a cluster. + :param min_samples: Minimum number of points required to form a cluster. + :return: Dictionary with cluster IDs and their corresponding point clouds. + """ + if coordinates.shape[0] == 0: + rospy.logerr("The input array 'coordinates' is empty.") + return {} + + # Extract x, y, and z from the coordinates for DBSCAN clustering + xyz = np.column_stack((coordinates["x"], coordinates["y"], coordinates["z"])) + + if xyz.shape[0] == 0: + rospy.logwarn("No data points available for DBSCAN. Skipping clustering.") + return {} + + # Apply DBSCAN to compute cluster labels for the point cloud + clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(xyz) + labels = clustering.labels_ + + # Remove noise (cluster ID: -1) and identify valid cluster IDs + unique_labels = np.unique(labels) + valid_labels = unique_labels[unique_labels != -1] + + # Create a dictionary with cluster IDs and their corresponding points + clusters = Parallel(n_jobs=-1)( + delayed(lambda cluster_label: (cluster_label, xyz[labels == cluster_label]))( + label + ) + for label in valid_labels + ) + + clusters = dict(clusters) + + return clusters + + if __name__ == "__main__": + """ + Initialisiert die LidarDistance-Klasse und startet die Listener-Methode. + """ lidar_distance = LidarDistance() lidar_distance.listener() diff --git a/code/perception/src/vision_node.py b/code/perception/src/vision_node.py index 7039b9f3..1a558df2 100755 --- a/code/perception/src/vision_node.py +++ b/code/perception/src/vision_node.py @@ -15,6 +15,7 @@ ) import torchvision.transforms as t import cv2 +from vision_node_helper import get_carla_class_name, get_carla_color from rospy.numpy_msg import numpy_msg from sensor_msgs.msg import Image as ImageMsg from std_msgs.msg import Header, Float32MultiArray @@ -24,6 +25,7 @@ from ultralytics import NAS, YOLO, RTDETR, SAM, FastSAM import asyncio import rospy +from ultralytics.utils.ops import scale_masks class VisionNode(CompatibleNode): @@ -77,6 +79,10 @@ def __init__(self, name, **kwargs): "yolov8x-seg": (YOLO, "yolov8x-seg.pt", "segmentation", "ultralytics"), "sam_l": (SAM, "sam_l.pt", "detection", "ultralytics"), "FastSAM-x": (FastSAM, "FastSAM-x.pt", "detection", "ultralytics"), + "yolo11n-seg": (YOLO, "yolo11n-seg.pt", "segmentation", "ultralytics"), + "yolo11s-seg": (YOLO, "yolo11s-seg.pt", "segmentation", "ultralytics"), + "yolo11m-seg": (YOLO, "yolo11m-seg.pt", "segmentation", "ultralytics"), + "yolo11l-seg": (YOLO, "yolo11l-seg.pt", "segmentation", "ultralytics"), } # general setup @@ -239,7 +245,7 @@ def handle_camera_image(self, image): vision_result = self.predict_ultralytics(image) # publish vision result to rviz - img_msg = self.bridge.cv2_to_imgmsg(vision_result, encoding="rgb8") + img_msg = self.bridge.cv2_to_imgmsg(vision_result, encoding="bgr8") img_msg.header = image.header # publish img to corresponding angle topic @@ -341,7 +347,7 @@ def predict_ultralytics(self, image): cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) # run model prediction - output = self.model(cv_image, half=True, verbose=False) + output = self.model.track(cv_image, half=True, verbose=False, imgsz=640) # handle distance of objects @@ -349,6 +355,11 @@ def predict_ultralytics(self, image): distance_output = [] c_boxes = [] c_labels = [] + c_colors = [] + if hasattr(output[0], "masks") and output[0].masks is not None: + masks = output[0].masks.data + else: + masks = None boxes = output[0].boxes for box in boxes: @@ -358,72 +369,74 @@ def predict_ultralytics(self, image): # only run distance calc when dist_array is available # this if is needed because the lidar starts # publishing with a delay - if self.dist_arrays is not None: - - # crop bounding box area out of depth image - distances = np.asarray( - self.dist_arrays[ - int(pixels[1]) : int(pixels[3]) : 1, - int(pixels[0]) : int(pixels[2]) : 1, - ::, - ] - ) + if self.dist_arrays is None: + continue + + # crop bounding box area out of depth image + distances = np.asarray( + self.dist_arrays[ + int(pixels[1]) : int(pixels[3]) : 1, + int(pixels[0]) : int(pixels[2]) : 1, + ::, + ] + ) - # set all 0 (black) values to np.inf (necessary if - # you want to search for minimum) - # these are all pixels where there is no - # corresponding lidar point in the depth image - condition = distances[:, :, 0] != 0 - non_zero_filter = distances[condition] - distances_copy = distances.copy() - distances_copy[distances_copy == 0] = np.inf - - # only proceed if there is more than one lidar - # point in the bounding box - if len(non_zero_filter) > 0: - - """ - !Watch out: - The calculation of min x and min abs y is currently - only for center angle - For back, left and right the values are different in the - coordinate system of the lidar. - (Example: the closedt distance on the back view should the - max x since the back view is on the -x axis) - """ - - # copy actual lidar points - obj_dist_min_x = self.min_x(dist_array=distances_copy) - obj_dist_min_abs_y = self.min_abs_y(dist_array=distances_copy) - - # absolut distance to object for visualization - abs_distance = np.sqrt( - obj_dist_min_x[0] ** 2 - + obj_dist_min_x[1] ** 2 - + obj_dist_min_x[2] ** 2 - ) - - # append class index, min x and min abs y to output array - distance_output.append(float(cls)) - distance_output.append(float(obj_dist_min_x[0])) - distance_output.append(float(obj_dist_min_abs_y[1])) - - else: - # fallback values for bounding box if - # no lidar points where found - obj_dist_min_x = (np.inf, np.inf, np.inf) - obj_dist_min_abs_y = (np.inf, np.inf, np.inf) - abs_distance = np.inf - - # add values for visualization - c_boxes.append(torch.tensor(pixels)) - c_labels.append( - f"Class: {cls}," - f"Meters: {round(abs_distance, 2)}," - f"({round(float(obj_dist_min_x[0]), 2)}," - f"{round(float(obj_dist_min_abs_y[1]), 2)})" + # set all 0 (black) values to np.inf (necessary if + # you want to search for minimum) + # these are all pixels where there is no + # corresponding lidar point in the depth image + condition = distances[:, :, 0] != 0 + non_zero_filter = distances[condition] + distances_copy = distances.copy() + distances_copy[distances_copy == 0] = np.inf + + # only proceed if there is more than one lidar + # point in the bounding box + if len(non_zero_filter) > 0: + """ + !Watch out: + The calculation of min x and min abs y is currently + only for center angle + For back, left and right the values are different in the + coordinate system of the lidar. + (Example: the closedt distance on the back view should the + max x since the back view is on the -x axis) + """ + + # copy actual lidar points + obj_dist_min_x = self.min_x(dist_array=distances_copy) + obj_dist_min_abs_y = self.min_abs_y(dist_array=distances_copy) + + # absolut distance to object for visualization + abs_distance = np.sqrt( + obj_dist_min_x[0] ** 2 + + obj_dist_min_x[1] ** 2 + + obj_dist_min_x[2] ** 2 ) + # append class index, min x and min abs y to output array + distance_output.append(float(cls)) + distance_output.append(float(obj_dist_min_x[0])) + distance_output.append(float(obj_dist_min_abs_y[1])) + + else: + # fallback values for bounding box if + # no lidar points where found + obj_dist_min_x = (np.inf, np.inf, np.inf) + obj_dist_min_abs_y = (np.inf, np.inf, np.inf) + abs_distance = np.inf + + # add values for visualization + c_boxes.append(torch.tensor(pixels)) + c_labels.append( + f"Class: {get_carla_class_name(cls)}, " + f"Meters: {round(abs_distance, 2)}, " + f"TrackingId: {int(box.id)}, " + f"({round(float(obj_dist_min_x[0]), 2)}, " + f"{round(float(obj_dist_min_abs_y[1]), 2)})", + ) + c_colors.append(get_carla_color(int(cls))) + # publish list of distances of objects for planning self.distance_publisher.publish(Float32MultiArray(data=distance_output)) @@ -437,7 +450,7 @@ def predict_ultralytics(self, image): # draw bounding boxes and distance values on image c_boxes = torch.stack(c_boxes) - box = draw_bounding_boxes( + drawn_images = draw_bounding_boxes( image_np_with_detections, c_boxes, c_labels, @@ -445,9 +458,21 @@ def predict_ultralytics(self, image): width=3, font_size=12, ) - np_box_img = np.transpose(box.detach().numpy(), (1, 2, 0)) - box_img = cv2.cvtColor(np_box_img, cv2.COLOR_BGR2RGB) - return box_img + if masks is not None: + scaled_masks = np.squeeze( + scale_masks(masks.unsqueeze(1), cv_image.shape[:2], True).cpu().numpy(), + 1, + ) + + drawn_images = draw_segmentation_masks( + drawn_images, + torch.from_numpy(scaled_masks > 0), + alpha=0.6, + colors=c_colors, + ) + + np_image = np.transpose(drawn_images.detach().numpy(), (1, 2, 0)) + return cv2.cvtColor(np_image, cv2.COLOR_BGR2RGB) def min_x(self, dist_array): """ diff --git a/code/perception/src/vision_node_helper.py b/code/perception/src/vision_node_helper.py new file mode 100644 index 00000000..6778f7b6 --- /dev/null +++ b/code/perception/src/vision_node_helper.py @@ -0,0 +1,158 @@ +from typing import List, Union + + +# Carla-Farben +carla_colors = [ + [0, 0, 0], # 0: None + [70, 70, 70], # 1: Buildings + [190, 153, 153], # 2: Fences + [72, 0, 90], # 3: Other + [220, 20, 60], # 4: Pedestrians + [153, 153, 153], # 5: Poles + [157, 234, 50], # 6: RoadLines + [128, 64, 128], # 7: Roads + [244, 35, 232], # 8: Sidewalks + [107, 142, 35], # 9: Vegetation + [0, 0, 255], # 10: Vehicles + [102, 102, 156], # 11: Walls + [220, 220, 0], # 12: TrafficSigns +] + +carla_class_names = [ + "None", # 0 + "Buildings", # 1 + "Fences", # 2 + "Other", # 3 + "Pedestrians", # 4 + "Poles", # 5 + "RoadLines", # 6 + "Roads", # 7 + "Sidewalks", # 8 + "Vegetation", # 9 + "Vehicles", # 10 + "Walls", # 11 + "TrafficSigns", # 12 +] + +# COCO-Klassen → Carla-Klassen Mapping +coco_to_carla = [ + 4, # 0: Person -> Pedestrians + 10, # 1: Bicycle -> Vehicles + 10, # 2: Car -> Vehicles + 10, # 3: Motorbike -> Vehicles + 10, # 4: Airplane -> Vehicles + 10, # 5: Bus -> Vehicles + 10, # 6: Train -> Vehicles + 10, # 7: Truck -> Vehicles + 10, # 8: Boat -> Vehicles + 12, # 9: Traffic Light -> TrafficSigns + 3, # 10: Fire Hydrant -> Other + 12, # 11: Stop Sign -> TrafficSigns + 3, # 12: Parking Meter -> Other + 3, # 13: Bench -> Other + 3, # 14: Bird -> Other + 3, # 15: Cat -> Other + 3, # 16: Dog -> Other + 3, # 17: Horse -> Other + 3, # 18: Sheep -> Other + 3, # 19: Cow -> Other + 3, # 20: Elephant -> Other + 3, # 21: Bear -> Other + 3, # 22: Zebra -> Other + 3, # 23: Giraffe -> Other + 3, # 24: Backpack -> Other + 3, # 25: Umbrella -> Other + 3, # 26: Handbag -> Other + 3, # 27: Tie -> Other + 3, # 28: Suitcase -> Other + 3, # 29: Frisbee -> Other + 3, # 30: Skis -> Other + 3, # 31: Snowboard -> Other + 3, # 32: Sports Ball -> Other + 3, # 33: Kite -> Other + 3, # 34: Baseball Bat -> Other + 3, # 35: Baseball Glove -> Other + 3, # 36: Skateboard -> Other + 3, # 37: Surfboard -> Other + 3, # 38: Tennis Racket -> Other + 3, # 39: Bottle -> Other + 3, # 40: Wine Glass -> Other + 3, # 41: Cup -> Other + 3, # 42: Fork -> Other + 3, # 43: Knife -> Other + 3, # 44: Spoon -> Other + 3, # 45: Bowl -> Other + 3, # 46: Banana -> Other + 3, # 47: Apple -> Other + 3, # 48: Sandwich -> Other + 3, # 49: Orange -> Other + 3, # 50: Broccoli -> Other + 3, # 51: Carrot -> Other + 3, # 52: Hot Dog -> Other + 3, # 53: Pizza -> Other + 3, # 54: Donut -> Other + 3, # 55: Cake -> Other + 3, # 56: Chair -> Other + 3, # 57: Couch -> Other + 3, # 58: Potted Plant -> Other + 3, # 59: Bed -> Other + 3, # 60: Dining Table -> Other + 3, # 61: Toilet -> Other + 3, # 62: TV -> Other + 3, # 63: Laptop -> Other + 3, # 64: Mouse -> Other + 3, # 65: Remote -> Other + 3, # 66: Keyboard -> Other + 3, # 67: Cell Phone -> Other + 3, # 68: Microwave -> Other + 3, # 69: Oven -> Other + 3, # 70: Toaster -> Other + 3, # 71: Sink -> Other + 3, # 72: Refrigerator -> Other + 3, # 73: Book -> Other + 3, # 74: Clock -> Other + 3, # 75: Vase -> Other + 3, # 76: Scissors -> Other + 3, # 77: Teddy Bear -> Other + 3, # 78: Hair Drier -> Other + 3, # 79: Toothbrush -> Other +] + +COCO_CLASS_COUNT = 80 + + +def get_carla_color(coco_class: Union[int, float]) -> List[int]: + """Get the Carla color for a given COCO class. + Args: + coco_class: COCO class index (0-79) + + Returns: + RGB color values for the corresponding Carla class + + Raises: + ValueError: If coco_class is out of valid range + """ + coco_idx = int(coco_class) + if not 0 <= coco_idx < COCO_CLASS_COUNT: + raise ValueError(f"Invalid COCO class index: {coco_idx}") + carla_class = coco_to_carla[coco_idx] + return carla_colors[carla_class] + + +def get_carla_class_name(coco_class: Union[int, float]) -> str: + """Get the Carla class name for a given COCO class. + Args: + coco_class: COCO class index (0-79) + + Returns: + Name of the corresponding Carla class + + Raises: + ValueError: If coco_class is out of valid range + """ + coco_idx = int(coco_class) + + if not 0 <= coco_idx < COCO_CLASS_COUNT: + raise ValueError(f"Invalid COCO class index: {coco_idx}") + carla_class = coco_to_carla[coco_idx] + return carla_class_names[carla_class] diff --git a/code/planning/CMakeLists.txt b/code/planning/CMakeLists.txt index 34c3ccd0..c6288e03 100755 --- a/code/planning/CMakeLists.txt +++ b/code/planning/CMakeLists.txt @@ -13,8 +13,11 @@ project(planning) find_package(catkin REQUIRED COMPONENTS perception rospy + rosout roslaunch std_msgs + geometry_msgs + message_generation ) roslaunch_add_file_check(launch) @@ -48,9 +51,11 @@ catkin_python_setup() ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) ## Generate messages in the 'msg' folder -# add_message_files( -# MinDistance.msg -# ) + add_message_files( + FILES + NavigationPoint.msg + Trajectory.msg + ) ## Generate services in the 'srv' folder # add_service_files( @@ -67,9 +72,12 @@ catkin_python_setup() # ) ## Generate added messages and services with any dependencies listed here -# generate_messages( -# perception -# ) + generate_messages( + DEPENDENCIES + std_msgs + perception + geometry_msgs + ) ################################################ ## Declare ROS dynamic reconfigure parameters ## @@ -105,7 +113,7 @@ catkin_package( # LIBRARIES planning # CATKIN_DEPENDS other_catkin_pkg # DEPENDS system_lib - CATKIN_DEPENDS perception rospy + CATKIN_DEPENDS perception rospy message_runtime ) ########### diff --git a/code/planning/msg/NavigationPoint.msg b/code/planning/msg/NavigationPoint.msg new file mode 100644 index 00000000..e9eb53c9 --- /dev/null +++ b/code/planning/msg/NavigationPoint.msg @@ -0,0 +1,3 @@ +geometry_msgs/Point position +float32 speed +uint8 behaviour \ No newline at end of file diff --git a/code/planning/msg/Trajectory.msg b/code/planning/msg/Trajectory.msg new file mode 100644 index 00000000..94060cdd --- /dev/null +++ b/code/planning/msg/Trajectory.msg @@ -0,0 +1,2 @@ +Header header +planning/NavigationPoint[] navigationPoints \ No newline at end of file diff --git a/code/planning/package.xml b/code/planning/package.xml index a321a109..daf36bc6 100755 --- a/code/planning/package.xml +++ b/code/planning/package.xml @@ -47,6 +47,9 @@ + message_generation + message_runtime + rospy roslaunch @@ -57,6 +60,7 @@ carla_msgs sensor_msgs std_msgs + geometry_msgs perception perception diff --git a/code/planning/src/BehaviourEnum.py b/code/planning/src/BehaviourEnum.py new file mode 100644 index 00000000..361b0b43 --- /dev/null +++ b/code/planning/src/BehaviourEnum.py @@ -0,0 +1,8 @@ +from enum import IntEnum + +# getattr(Behaviour, "name").value returns the ID +# Behaviour(id) returns Behaviour.NAME + + +class BehaviourEnum(IntEnum): + CRUISING = 0 diff --git a/code/requirements.txt b/code/requirements.txt index cbe57476..4f69a920 100644 --- a/code/requirements.txt +++ b/code/requirements.txt @@ -13,7 +13,7 @@ scipy==1.10.1 xmltodict==0.13.0 py-trees==2.2.3 numpy==1.23.5 -ultralytics==8.1.11 +ultralytics==8.3.32 scikit-learn>=0.18 pandas==2.0.3 debugpy==1.8.7 diff --git a/doc/assets/research_assets/ACC_FLC_Example_1.PNG b/doc/assets/research_assets/ACC_FLC_Example_1.PNG new file mode 100644 index 00000000..e6382981 Binary files /dev/null and b/doc/assets/research_assets/ACC_FLC_Example_1.PNG differ diff --git a/doc/development/project_management.md b/doc/development/project_management.md index ef5e45d7..ec772185 100644 --- a/doc/development/project_management.md +++ b/doc/development/project_management.md @@ -50,4 +50,4 @@ Merge the pull request after: After merging, remember to delete the source branch to keep the repository clean. ->[!INFO] For more information about the review process, see [Review process](./review_guideline.md). +>[!TIP] For more information about the review process, see [Review process](./review_guideline.md). diff --git a/doc/development/review_guideline.md b/doc/development/review_guideline.md index 840adb48..64a502c9 100644 --- a/doc/development/review_guideline.md +++ b/doc/development/review_guideline.md @@ -95,7 +95,7 @@ If a comment of a review was resolved by either, a new commit or a discussion be If a new commit took place it is encouraged to comment the commit SHA to have a connection between comment and resolving commit ![img.png](../assets/Resolve_conversation.png) -> [!INFO] All conversations should be resolved before merging the pull request. +> [!TIP] All conversations should be resolved before merging the pull request. ## 5. Merging a Pull Request @@ -120,7 +120,7 @@ Long story short, the reviewer who approves the PR should merge. Only approve if Before merging a pull request, the request checks by the CI/CD pipeline should be successful. If the checks fail, the pull request should not be merged. -> [!INFO] An exception can be made for a PR that only addresses the documentation and the `driving` check is not yet completed. +> [!TIP] An exception can be made for a PR that only addresses the documentation and the `driving` check is not yet completed. ### 5.3. Deleting the branch diff --git a/doc/research/paf24/planning/ACC.md b/doc/research/paf24/planning/ACC.md new file mode 100644 index 00000000..366a02c5 --- /dev/null +++ b/doc/research/paf24/planning/ACC.md @@ -0,0 +1,115 @@ +# ACC + +This file gives a general overview on possible ACC implementations and reflects the current state of the ACC. + +## General + +The main goal of an ACC (Adaptive Cruise Control) is to follow a car driving in front while keeping a safe distance to it. This can be achieved by adjusting the speed to the speed of the car in front. +In general, classic ACC systems are designed for higher velocity (e.g. > 40 km/h). Apart from that, there are Stop & Go systems that support lower velocites (e.g. < 40 km/h). In our case, both systems might be needed and it might be reasonable to develop two different systems for ACC and Stop & Go. +The threshold to distinguish between the two systems has to be chosen reasonably. +There are basically three different techniques that can be used to implement an ACC: PID Control, Model Predictive Control and Fuzzy Logic Control. Another option is CACC (Cooperative Adaptive Cruise Control) but this is not relevant for our project since it requires communication between the vehicles. + +### PID Control + +The PID Controller consists of three terms: the proportional term, the integral term and the derivative term. One possible simple controller model looks as follows: + +$$ v_f(t) = v_f(t - t_s) + k_p e(t-t_s) + k_i \int_{0}^{t} e(\tau) d\tau + k_d \dot{e}(t - t_s) $$ +$$ e(t-t_s) = \Delta x(t - t_s) - t_{hw,d} v_f (t - t_s) $$ + +- $v_f$: follower vehicle velocity (transmitted to the acting component) +- $t_s$: sampling time +- $k_p$ and $k_i$ and $k_d$: coefficients for proportional, integral and derivative terms +- $e$: distance error (difference between actual distance $\Delta x$ and desired distance $\Delta x_d$) +- $t_{hw,d}$: desired time headway (duration between the arrival of the first car at a certain waypoint and the arrival of the following car at the same waypoint) + +### Model Predictive Control (MPC) + +It calculates the current control action by solving an online, iterative and finite-horizon optimization of the model. +Procedure: + +1. Prediction of future system states based on current states +2. Computation of the cost function for a finite time horizon in the future +3. Implementation of the first step of the solved control sequence +4. Application of the feedback control loop to compensate for the predictive error and model inaccuracy +5. Sampling of new current states and repitition of the process + +### Fuzzy Logic Control (FLC) + +Provides a unified control framework to offer both functions: ACC and Stop & Go. + +![Example of a FLC control algorithm showing input variables (relative distance, host vehicle speed), fuzzy rules processing, and output variable](../../../assets/research_assets/ACC_FLC_Example_1.PNG) + +## ACC in our project + +### Current implementation + +- General behaviour: + checks for obstacle distance < safety distance + + calculates new speed based on obstacle distance + + else keep current speed + +- publishes speed to acc_velocity +- safe distance calculation is currently not correct, uses speed + (speed * 0.36)² which results in wrong distances +- if car in front of us ignores speed limits we ignore them as well +- some parts of unstuck routine are in ACC and need to be refactored +- same goes for publishing of current waypoint, this should not be in ACC + +In summary, the current implementation is not sufficient and needs major refactoring. + +### Concept for new implementation + +The new concept for the ACC is to take the trajectory, look at all or a limited subset of the next points and add a target velocity and the current behaviour to each point. +This way the Acting has more knowledge about what the car is doing and can adjust accordingly in a local manner. +For this a new trajectory message type was implemented in #511. + +![trajectoryMsg](https://github.com/user-attachments/assets/0b452f1a-4c60-45b2-882f-3a50118c9cb9) + +Since behaviour is passed as an ID a new enum for behaviours was implemented in utils.py as well. + +The general idea for speeds above the 40 km/h mark is to calculate a proper safety distance, a general target velocity and velocity targets based on PID. For speeds lower than that a stop and go system can be discussed if it is really needed. + +For safety distance we would like to calculate it like FLC but that most likely needs some adjustments as setting the distance to 100m when there is no vehicle ahead seems unreasonable. +We can just directly set the velocity to the speed limit in that case. + +For a general speed target we either take the speed of the car in front or the speed limit, whichever is lower. In cases where the car in front is substantially slower than the speed limit ACC could inititate overtaking. + +Since we want to calculate the desired speed at each point of the trajectory, the way PID calculates velocity seems reasonable since we can treat the trajectory points as different points in time for the sampling time. +For example let's say we sample every fifth point and calculate the velocity for that, then we can just interpolate every other point inbetween. + +$v_f(t - t_s)$ would then simply be the velocity of the fifth point before the current one. Theoretically this allows us to dynamically adjust the sampling time as well if needed. + +For the distance error we can use the safety distance as the desired distance. The distance at time t needs to be predicted based on the (predicted) distance at the prior sample point and the calculated speed at the prior sample point. + +We calculate velocities like that up to the point where the actual distance is within 5% of the optimal safety distance. For points further than that we simply use the desired general speed. + +![accDiagram](https://github.com/user-attachments/assets/9a7b4572-f041-4da0-900c-51ab20d1904b) + +### Possible next steps + +A seperate file for the new ACC should be created to not disturb the system. + +The parts that might get cut from ACC like current waypoint and unstuck routine need to be evaluated for necessity and if need be moved to somewhere more fitting. + +Implement publisher for new message type. + +Start implementing safety distance and general target speed logic. Subscriber logic could be taken from old implementation. + +Implement PID logic + +### Requirements + +- obstacle speed +- obstacle distance + +## Discussion + +- How to test adaptations of the ACC? (Suggestion: Create test scenarios which represent different situations like driving straight forward behing a leading vehicle, no leading vehicle, sudden breaking, etc.) +- Which output should be transfered to the Acting component? (Suggestion: The desired speed is published in the new trajectory. No acceleration data is needed, the acceleration is handled by the acting component.) +- Which input do we get from the Perception component? (Suggestion: The distance and velocity of the car in front would be really helpful.) + +## Sources + +He, Yinglong et al. (2019). Adaptive Cruise Control Strategies Implemented on Experimental Vehicles: A Review. IFAC-PapersOnLine. 52. 21-27. 10.1016/j.ifacol.2019.09.004. +[Link](https://www.researchgate.net/publication/335934496_Adaptive_Cruise_Control_Strategies_Implemented_on_Experimental_Vehicles_A_Review)