diff --git a/build/docker-compose.yml b/build/docker-compose.yml
index a68e97c4..1545f945 100644
--- a/build/docker-compose.yml
+++ b/build/docker-compose.yml
@@ -57,8 +57,8 @@ services:
tty: true
shm_size: 2gb
#command: bash -c "sleep 10 && python3 /opt/leaderboard/leaderboard/leaderboard_evaluator.py --debug=0 --routes=/opt/leaderboard/data/routes_devtest.xml --agent=/opt/leaderboard/leaderboard/autoagents/npc_agent.py --host=carla-simulator --track=SENSORS"
- #command: bash -c "sleep 10 && roslaunch agent/launch/dev.launch"
- command: bash -c "sleep 10 && sudo chown -R carla:carla ../code/ && sudo chmod -R a+w ../code/ && python3 /opt/leaderboard/leaderboard/leaderboard_evaluator.py --debug=0 --routes=/opt/leaderboard/data/routes_devtest.xml --agent=/workspace/code/agent/src/agent/agent.py --host=carla-simulator --track=MAP"
+ command: bash -c "sleep 10 && roslaunch agent/launch/dev.launch"
+ #command: bash -c "sleep 10 && sudo chown -R carla:carla ../code/ && sudo chmod -R a+w ../code/ && python3 /opt/leaderboard/leaderboard/leaderboard_evaluator.py --debug=0 --routes=/opt/leaderboard/data/routes_devtest.xml --agent=/workspace/code/agent/src/agent/agent.py --host=carla-simulator --track=MAP"
logging:
driver: "local"
environment:
diff --git a/code/agent/config/rviz_config.rviz b/code/agent/config/rviz_config.rviz
index 7064cc88..5c10eca9 100644
--- a/code/agent/config/rviz_config.rviz
+++ b/code/agent/config/rviz_config.rviz
@@ -63,11 +63,11 @@ Visualization Manager:
Unreliable: false
Value: true
Visibility:
- Grid: true
- Imu: true
- Path: true
- PointCloud2: true
- Value: true
+ Grid: false
+ Imu: false
+ Path: false
+ PointCloud2: false
+ Value: false
Zoom Factor: 1
- Class: rviz/Image
Enabled: true
@@ -327,4 +327,4 @@ Window Geometry:
collapsed: false
Width: 2488
X: 1992
- Y: 27
+ Y: 27
\ No newline at end of file
diff --git a/code/perception/launch/perception.launch b/code/perception/launch/perception.launch
index 0a24ba5e..3adc596e 100644
--- a/code/perception/launch/perception.launch
+++ b/code/perception/launch/perception.launch
@@ -33,15 +33,30 @@
-
-
+ - deeplabv3_resnet101
+ - yolov8x-seg
+ -->
+
+
diff --git a/code/perception/src/vision_node.py b/code/perception/src/vision_node.py
index 726ea4de..d736253a 100755
--- a/code/perception/src/vision_node.py
+++ b/code/perception/src/vision_node.py
@@ -19,6 +19,7 @@
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
import numpy as np
from time import perf_counter
+from ultralytics import NAS, YOLO, RTDETR, SAM, FastSAM
"""
VisionNode:
@@ -55,17 +56,32 @@ def __init__(self, name, **kwargs):
weights=DeepLabV3_ResNet101_Weights.DEFAULT),
DeepLabV3_ResNet101_Weights.DEFAULT,
"segmentation",
- "pyTorch")
+ "pyTorch"),
+ 'yolov8n': (YOLO, "yolov8n.pt", "detection", "ultralytics"),
+ 'yolov8s': (YOLO, "yolov8s.pt", "detection", "ultralytics"),
+ 'yolov8m': (YOLO, "yolov8m.pt", "detection", "ultralytics"),
+ 'yolov8l': (YOLO, "yolov8l.pt", "detection", "ultralytics"),
+ 'yolov8x': (YOLO, "yolov8x.pt", "detection", "ultralytics"),
+ 'yolo_nas_l': (NAS, "yolo_nas_l.pt", "detection", "ultralytics"),
+ 'yolo_nas_m': (NAS, "yolo_nas_m.pt", "detection", "ultralytics"),
+ 'yolo_nas_s': (NAS, "yolo_nas_s.pt", "detection", "ultralytics"),
+ 'rtdetr-l': (RTDETR, "rtdetr-l.pt", "detection", "ultralytics"),
+ 'rtdetr-x': (RTDETR, "rtdetr-x.pt", "detection", "ultralytics"),
+ 'yolov8x-seg': (YOLO, "yolov8x-seg.pt", "segmentation",
+ "ultralytics"),
+ 'sam_l': (SAM, "sam_l.pt", "detection", "ultralytics"),
+ 'FastSAM-x': (FastSAM, "FastSAM-x.pt", "detection", "ultralytics"),
+
}
+ print(torch.__version__)
+
# general setup
self.bridge = CvBridge()
self.role_name = self.get_param("role_name", "hero")
self.side = self.get_param("side", "Center")
- # self.device = torch.device("cuda"
- # if torch.cuda.is_available() else "cpu") Cuda Memory Issues
- self.device = torch.device("cpu")
- print("VisionNode working on: ", self.device)
+ self.device = torch.device("cuda"
+ if torch.cuda.is_available() else "cpu")
# publish / subscribe setup
self.setup_camera_subscriptions()
@@ -80,9 +96,22 @@ def __init__(self, name, **kwargs):
self.type = model_info[2]
self.framework = model_info[3]
print("Vision Node Configuration:")
+ print("Device -> ", self.device)
print(f"Model -> {self.get_param('model')},")
print(f"Type -> {self.type}, Framework -> {self.framework}")
- self.model.to(self.device)
+ torch.cuda.memory.set_per_process_memory_fraction(0.1)
+
+ # pyTorch and CUDA setup
+ if self.framework == "pyTorch":
+ for param in self.model.parameters():
+ param.requires_grad = False
+ self.model.to(self.device)
+
+ # ultralytics setup
+ if self.framework == "ultralytics":
+ self.model = self.model(self.weights)
+
+ # tensorflow setup
def setup_camera_subscriptions(self):
self.new_subscription(
@@ -101,6 +130,30 @@ def setup_camera_publishers(self):
def handle_camera_image(self, image):
startTime = perf_counter()
+
+ # free up cuda memory
+ if self.device == "cuda":
+ torch.cuda.empty_cache()
+
+ print("Before Model: ", perf_counter() - startTime)
+
+ if self.framework == "pyTorch":
+ vision_result = self.predict_torch(image)
+
+ if self.framework == "ultralytics":
+ vision_result = self.predict_ultralytics(image)
+
+ print("After Model: ", perf_counter() - startTime)
+
+ # publish image to rviz
+ img_msg = self.bridge.cv2_to_imgmsg(vision_result,
+ encoding="passthrough")
+ img_msg.header = image.header
+ self.publisher.publish(img_msg)
+
+ pass
+
+ def predict_torch(self, image):
self.model.eval()
cv_image = self.bridge.imgmsg_to_cv2(img_msg=image,
desired_encoding='passthrough')
@@ -114,39 +167,41 @@ def handle_camera_image(self, image):
input_image = preprocess(cv_image).unsqueeze(dim=0)
input_image = input_image.to(self.device)
- print("Before Model: ", perf_counter() - startTime)
prediction = self.model(input_image)
- print("After Model: ", perf_counter() - startTime)
+
if (self.type == "detection"):
vision_result = self.apply_bounding_boxes(cv_image, prediction[0])
if (self.type == "segmentation"):
vision_result = self.create_mask(cv_image, prediction['out'])
- img_msg = self.bridge.cv2_to_imgmsg(vision_result,
- encoding="passthrough")
- img_msg.header = image.header
+ return vision_result
- self.publisher.publish(img_msg)
- print("After Publish: ", perf_counter() - startTime)
+ def predict_ultralytics(self, image):
+ cv_image = self.bridge.imgmsg_to_cv2(img_msg=image,
+ desired_encoding='passthrough')
+ cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
+ print(cv_image.shape)
- pass
+ output = self.model(cv_image)
+
+ return output[0].plot()
def create_mask(self, input_image, model_output):
output_predictions = torch.argmax(model_output, dim=0)
-
for i in range(21):
output_predictions[i] = output_predictions[i] == i
output_predictions = output_predictions.to(dtype=torch.bool)
- input_image = t.ToTensor()(input_image)
- input_image = input_image.to(dtype=torch.uint8)
print(output_predictions.shape)
- print(input_image.shape)
- segmented_image = draw_segmentation_masks(input_image,
- output_predictions)
- cv_segmented = cv2.cvtColor(segmented_image.detach().numpy(),
- cv2.COLOR_BGR2RGB)
+ transposed_image = np.transpose(input_image, (2, 0, 1))
+ tensor_image = torch.tensor(transposed_image)
+ tensor_image = tensor_image.to(dtype=torch.uint8)
+ segmented_image = draw_segmentation_masks(tensor_image,
+ output_predictions,
+ alpha=0.6)
+ cv_segmented = segmented_image.detach().cpu().numpy()
+ cv_segmented = np.transpose(cv_segmented, (1, 2, 0))
return cv_segmented
def apply_bounding_boxes(self, input_image, model_output):
diff --git a/code/requirements.txt b/code/requirements.txt
index c15e4287..3b0948d0 100644
--- a/code/requirements.txt
+++ b/code/requirements.txt
@@ -11,3 +11,4 @@ scipy==1.10.0
xmltodict==0.13.0
py-trees==2.1.6
numpy==1.23.5
+ultralytics==8.0.220
\ No newline at end of file
diff --git a/doc/06_perception/07_vision_node.md b/doc/06_perception/07_vision_node.md
index 1b72ead4..2b9b7bd3 100644
--- a/doc/06_perception/07_vision_node.md
+++ b/doc/06_perception/07_vision_node.md
@@ -1,25 +1,39 @@
# Vision Node
-The Visison Node serves as a replacement for the previous segmentation-node.
-It provides an adaptive interface that is able to perform object-detection or image-segmentation
+The Visison Node provides an adaptive interface that is able to perform object-detection and/or image-segmentation
on several different models. The model can be specified as a parameter in the perception.launch file.
+The VisionNode is currently using the yolov8x-seg model.
+
## Usage
The following code shows how the Vision-Node is specified in perception.launch
`
-
+
-
+
`
@@ -31,19 +45,65 @@ The Vision-Node will automatically switch between object-detection, imagesegment
For now the Vision-Node only supports pyTorch models. Within the next sprint it should be able to
accept other frameworks aswell. It should also be possible to run object-detection and image-segmentation at the same time.
+## Model overview
+
+| Model | Type | Stable | Comments |
+|---------------------------------------|--------------|--------|---------------------------------------|
+| fasterrcnn_resnet50_fpn_v2 | detection | no | CUDA-Problems |
+| fasterrcnn_mobilenet_v3_large_320_fpn | detection | no | CUDA-Problems |
+| yolov8n | detection | yes | |
+| yolov8s | detection | yes | |
+| yolov8m | detection | yes | |
+| yolov8l | detection | yes | |
+| yolov8x | detection | yes | |
+| yolo_nas_l | detection | no | Missing super_gradients package error |
+| yolo_nas_m | detection | no | Missing super_gradients package error |
+| yolo_nas_s | detection | no | Missing super_gradients package error |
+| rtdetr-l | detection | yes | |
+| rtdetr-x | detection | yes | |
+| sam_l | detection | no | Ultralytics Error |
+| FastSAM-x | detection | no | CUDA Problems |
+| deeplabv3_resnet101 | segmentation | no | CUDA Problems, Segmentation Problems |
+| yolov8x-seg | segmentation | yes | |
+
## How it works
### Initialization
The Vision-Node contains a Dictionary with all it's models. Depending on the model parameter it will initialize the correct model and weights.
-`
-self.model_dict = {
- "fasterrcnn_resnet50_fpn_v2": (fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT), FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, "detection", "pyTorch"),
- "fasterrcnn_mobilenet_v3_large_320_fpn": (fasterrcnn_mobilenet_v3_large_320_fpn(weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT), FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT, "detection", "pyTorch"),
- "deeplabv3_resnet101": (deeplabv3_resnet101(weights=DeepLabV3_ResNet101_Weights.DEFAULT), DeepLabV3_ResNet101_Weights.DEFAULT, "segmentation", "pyTorch")
- }
-`
+`self.model_dict = {
+ "fasterrcnn_resnet50_fpn_v2":
+ (fasterrcnn_resnet50_fpn_v2(
+ weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT),
+ FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT,
+ "detection",
+ "pyTorch"),
+ "fasterrcnn_mobilenet_v3_large_320_fpn":
+ (fasterrcnn_mobilenet_v3_large_320_fpn(
+ weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT),
+ FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT,
+ "detection",
+ "pyTorch"),
+ "deeplabv3_resnet101":
+ (deeplabv3_resnet101(
+ weights=DeepLabV3_ResNet101_Weights.DEFAULT),
+ DeepLabV3_ResNet101_Weights.DEFAULT,
+ "segmentation",
+ "pyTorch"),
+ 'yolov8n': (YOLO, "yolov8n.pt", "detection", "ultralytics"),
+ 'yolov8s': (YOLO, "yolov8s.pt", "detection", "ultralytics"),
+ 'yolov8m': (YOLO, "yolov8m.pt", "detection", "ultralytics"),
+ 'yolov8l': (YOLO, "yolov8l.pt", "detection", "ultralytics"),
+ 'yolov8x': (YOLO, "yolov8x.pt", "detection", "ultralytics"),
+ 'yolo_nas_l': (NAS, "yolo_nas_l.pt", "detection", "ultralytics"),
+ 'yolo_nas_m': (NAS, "yolo_nas_m.pt", "detection", "ultralytics"),
+ 'yolo_nas_s': (NAS, "yolo_nas_s.pt", "detection", "ultralytics"),
+ 'rtdetr-l': (RTDETR, "rtdetr-l.pt", "detection", "ultralytics"),
+ 'rtdetr-x': (RTDETR, "rtdetr-x.pt", "detection", "ultralytics"),
+ 'yolov8x-seg': (YOLO, "yolov8x-seg.pt", "segmentation", "ultralytics"),
+ 'sam_l': (SAM, "sam_l.pt", "detection", "ultralytics"),
+ 'FastSAM-x': (FastSAM, "FastSAM-x.pt", "detection", "ultralytics")}`
### Core
@@ -61,18 +121,21 @@ This function is automatically triggered by the Camera-Subscriber of the Vision-
## Visualization
-The Vision-Node implements an ImagePublisher under the topic: "/paf//Center/segmented_image"
+The Vision-Node implements an ImagePublisher under the topic: "/paf/hero/Center/segmented_image"
+
+The Configuration File of RViz has been changed accordingly to display the published images alongside with the Camera.
-The Configuartion File of RViz has been changed accordingly to display the published images alongside with the Camera.
+The build in Visualization of the YOLO-Models works very well.
## Known Issues
### Time
-First experiments showed that the handle_camera_image function is way to slow to be used reliably. It takes around 1.5 seconds to handle one image.
+When running on YOLO-Models the Time issue is fixed because ultralytics has some way of managing the CUDA-Resources very well.
-Right now the Vision-Node is not using cuda due to cuda-memory-issues that couldn't be fixed right away.
+When running on different models, the CUDA-Error persists.
-The performance is expected to rise quite a bit when using cuda.
+## Segmentation
-Also their is lots more room for testing different models inside the Vision-Node to evualte their accuracy and time-performance.
+For some reason the create_segmentation mask function works in a standalone project, but not in the Vision-Node.
+I stopped debugging, because the YOLO-Models work way better and build a very good and stable baseline.