Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

538 feature update and use ultralytics yolov11 for object detection #543

Merged
8 changes: 6 additions & 2 deletions code/perception/launch/perception.launch
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,14 @@

Image-Segmentation:
- deeplabv3_resnet101
- yolov8x-seg
- yolov8x-seg
- yolo11n-seg
- yolo11s-seg
- yolo11m-seg
- yolo11l-seg
-->

<param name="model" value="rtdetr-l" />
<param name="model" value="yolo11n-seg" />
</node>

<node pkg="perception" type="traffic_light_node.py" name="TrafficLightNode" output="screen">
Expand Down
Empty file added code/perception/src/__init__.py
Empty file.
163 changes: 94 additions & 69 deletions code/perception/src/vision_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)
import torchvision.transforms as t
import cv2
from vision_node_helper import get_carla_class_name, get_carla_color
from rospy.numpy_msg import numpy_msg
from sensor_msgs.msg import Image as ImageMsg
from std_msgs.msg import Header, Float32MultiArray
Expand All @@ -24,6 +25,7 @@
from ultralytics import NAS, YOLO, RTDETR, SAM, FastSAM
import asyncio
import rospy
from ultralytics.utils.ops import scale_masks


class VisionNode(CompatibleNode):
Expand Down Expand Up @@ -77,6 +79,10 @@ def __init__(self, name, **kwargs):
"yolov8x-seg": (YOLO, "yolov8x-seg.pt", "segmentation", "ultralytics"),
"sam_l": (SAM, "sam_l.pt", "detection", "ultralytics"),
"FastSAM-x": (FastSAM, "FastSAM-x.pt", "detection", "ultralytics"),
"yolo11n-seg": (YOLO, "yolo11n-seg.pt", "segmentation", "ultralytics"),
"yolo11s-seg": (YOLO, "yolo11s-seg.pt", "segmentation", "ultralytics"),
"yolo11m-seg": (YOLO, "yolo11m-seg.pt", "segmentation", "ultralytics"),
"yolo11l-seg": (YOLO, "yolo11l-seg.pt", "segmentation", "ultralytics"),
SirMDA marked this conversation as resolved.
Show resolved Hide resolved
}

# general setup
Expand Down Expand Up @@ -239,7 +245,7 @@ def handle_camera_image(self, image):
vision_result = self.predict_ultralytics(image)

# publish vision result to rviz
img_msg = self.bridge.cv2_to_imgmsg(vision_result, encoding="rgb8")
img_msg = self.bridge.cv2_to_imgmsg(vision_result, encoding="bgr8")
img_msg.header = image.header

# publish img to corresponding angle topic
Expand Down Expand Up @@ -341,14 +347,19 @@ def predict_ultralytics(self, image):
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)

# run model prediction
output = self.model(cv_image, half=True, verbose=False)
output = self.model.track(cv_image, half=True, verbose=False, imgsz=640)
SirMDA marked this conversation as resolved.
Show resolved Hide resolved

# handle distance of objects

# set up lists for visualization of distance outputs
distance_output = []
c_boxes = []
c_labels = []
c_colors = []
if hasattr(output[0], "masks") and output[0].masks is not None:
masks = output[0].masks.data
else:
masks = None

boxes = output[0].boxes
for box in boxes:
Expand All @@ -358,72 +369,74 @@ def predict_ultralytics(self, image):
# only run distance calc when dist_array is available
# this if is needed because the lidar starts
# publishing with a delay
if self.dist_arrays is not None:

# crop bounding box area out of depth image
distances = np.asarray(
self.dist_arrays[
int(pixels[1]) : int(pixels[3]) : 1,
int(pixels[0]) : int(pixels[2]) : 1,
::,
]
)
if self.dist_arrays is None:
continue

# crop bounding box area out of depth image
distances = np.asarray(
self.dist_arrays[
int(pixels[1]) : int(pixels[3]) : 1,
int(pixels[0]) : int(pixels[2]) : 1,
::,
]
)

# set all 0 (black) values to np.inf (necessary if
# you want to search for minimum)
# these are all pixels where there is no
# corresponding lidar point in the depth image
condition = distances[:, :, 0] != 0
non_zero_filter = distances[condition]
distances_copy = distances.copy()
distances_copy[distances_copy == 0] = np.inf

# only proceed if there is more than one lidar
# point in the bounding box
if len(non_zero_filter) > 0:

"""
!Watch out:
The calculation of min x and min abs y is currently
only for center angle
For back, left and right the values are different in the
coordinate system of the lidar.
(Example: the closedt distance on the back view should the
max x since the back view is on the -x axis)
"""

# copy actual lidar points
obj_dist_min_x = self.min_x(dist_array=distances_copy)
obj_dist_min_abs_y = self.min_abs_y(dist_array=distances_copy)

# absolut distance to object for visualization
abs_distance = np.sqrt(
obj_dist_min_x[0] ** 2
+ obj_dist_min_x[1] ** 2
+ obj_dist_min_x[2] ** 2
)

# append class index, min x and min abs y to output array
distance_output.append(float(cls))
distance_output.append(float(obj_dist_min_x[0]))
distance_output.append(float(obj_dist_min_abs_y[1]))

else:
# fallback values for bounding box if
# no lidar points where found
obj_dist_min_x = (np.inf, np.inf, np.inf)
obj_dist_min_abs_y = (np.inf, np.inf, np.inf)
abs_distance = np.inf

# add values for visualization
c_boxes.append(torch.tensor(pixels))
c_labels.append(
f"Class: {cls},"
f"Meters: {round(abs_distance, 2)},"
f"({round(float(obj_dist_min_x[0]), 2)},"
f"{round(float(obj_dist_min_abs_y[1]), 2)})"
# set all 0 (black) values to np.inf (necessary if
# you want to search for minimum)
# these are all pixels where there is no
# corresponding lidar point in the depth image
condition = distances[:, :, 0] != 0
non_zero_filter = distances[condition]
distances_copy = distances.copy()
distances_copy[distances_copy == 0] = np.inf

# only proceed if there is more than one lidar
# point in the bounding box
if len(non_zero_filter) > 0:
"""
!Watch out:
The calculation of min x and min abs y is currently
only for center angle
For back, left and right the values are different in the
coordinate system of the lidar.
(Example: the closedt distance on the back view should the
max x since the back view is on the -x axis)
"""

# copy actual lidar points
obj_dist_min_x = self.min_x(dist_array=distances_copy)
obj_dist_min_abs_y = self.min_abs_y(dist_array=distances_copy)

# absolut distance to object for visualization
abs_distance = np.sqrt(
obj_dist_min_x[0] ** 2
+ obj_dist_min_x[1] ** 2
+ obj_dist_min_x[2] ** 2
)

# append class index, min x and min abs y to output array
distance_output.append(float(cls))
distance_output.append(float(obj_dist_min_x[0]))
distance_output.append(float(obj_dist_min_abs_y[1]))

else:
# fallback values for bounding box if
# no lidar points where found
obj_dist_min_x = (np.inf, np.inf, np.inf)
obj_dist_min_abs_y = (np.inf, np.inf, np.inf)
abs_distance = np.inf

SirMDA marked this conversation as resolved.
Show resolved Hide resolved
SirMDA marked this conversation as resolved.
Show resolved Hide resolved
# add values for visualization
c_boxes.append(torch.tensor(pixels))
c_labels.append(
f"Class: {get_carla_class_name(cls)}, "
f"Meters: {round(abs_distance, 2)}, "
f"TrackingId: {int(box.id)}, "
f"({round(float(obj_dist_min_x[0]), 2)}, "
f"{round(float(obj_dist_min_abs_y[1]), 2)})",
)
c_colors.append(get_carla_color(int(cls)))

# publish list of distances of objects for planning
self.distance_publisher.publish(Float32MultiArray(data=distance_output))

Expand All @@ -437,17 +450,29 @@ def predict_ultralytics(self, image):

# draw bounding boxes and distance values on image
c_boxes = torch.stack(c_boxes)
box = draw_bounding_boxes(
drawn_images = draw_bounding_boxes(
image_np_with_detections,
c_boxes,
c_labels,
colors="blue",
width=3,
font_size=12,
)
np_box_img = np.transpose(box.detach().numpy(), (1, 2, 0))
box_img = cv2.cvtColor(np_box_img, cv2.COLOR_BGR2RGB)
return box_img
if masks is not None:
scaled_masks = np.squeeze(
scale_masks(masks.unsqueeze(1), cv_image.shape[:2], True).cpu().numpy(),
1,
)

drawn_images = draw_segmentation_masks(
drawn_images,
torch.from_numpy(scaled_masks > 0),
alpha=0.6,
colors=c_colors,
)

np_image = np.transpose(drawn_images.detach().numpy(), (1, 2, 0))
return cv2.cvtColor(np_image, cv2.COLOR_BGR2RGB)

def min_x(self, dist_array):
"""
Expand Down
Loading
Loading