diff --git a/build/docker-compose.yml b/build/docker-compose.yml index a68e97c4..1545f945 100644 --- a/build/docker-compose.yml +++ b/build/docker-compose.yml @@ -57,8 +57,8 @@ services: tty: true shm_size: 2gb #command: bash -c "sleep 10 && python3 /opt/leaderboard/leaderboard/leaderboard_evaluator.py --debug=0 --routes=/opt/leaderboard/data/routes_devtest.xml --agent=/opt/leaderboard/leaderboard/autoagents/npc_agent.py --host=carla-simulator --track=SENSORS" - #command: bash -c "sleep 10 && roslaunch agent/launch/dev.launch" - command: bash -c "sleep 10 && sudo chown -R carla:carla ../code/ && sudo chmod -R a+w ../code/ && python3 /opt/leaderboard/leaderboard/leaderboard_evaluator.py --debug=0 --routes=/opt/leaderboard/data/routes_devtest.xml --agent=/workspace/code/agent/src/agent/agent.py --host=carla-simulator --track=MAP" + command: bash -c "sleep 10 && roslaunch agent/launch/dev.launch" + #command: bash -c "sleep 10 && sudo chown -R carla:carla ../code/ && sudo chmod -R a+w ../code/ && python3 /opt/leaderboard/leaderboard/leaderboard_evaluator.py --debug=0 --routes=/opt/leaderboard/data/routes_devtest.xml --agent=/workspace/code/agent/src/agent/agent.py --host=carla-simulator --track=MAP" logging: driver: "local" environment: diff --git a/code/agent/config/rviz_config.rviz b/code/agent/config/rviz_config.rviz index 7064cc88..5c10eca9 100644 --- a/code/agent/config/rviz_config.rviz +++ b/code/agent/config/rviz_config.rviz @@ -63,11 +63,11 @@ Visualization Manager: Unreliable: false Value: true Visibility: - Grid: true - Imu: true - Path: true - PointCloud2: true - Value: true + Grid: false + Imu: false + Path: false + PointCloud2: false + Value: false Zoom Factor: 1 - Class: rviz/Image Enabled: true @@ -327,4 +327,4 @@ Window Geometry: collapsed: false Width: 2488 X: 1992 - Y: 27 + Y: 27 \ No newline at end of file diff --git a/code/perception/launch/perception.launch b/code/perception/launch/perception.launch index 0a24ba5e..3adc596e 100644 --- a/code/perception/launch/perception.launch +++ b/code/perception/launch/perception.launch @@ -33,15 +33,30 @@ - - + - deeplabv3_resnet101 + - yolov8x-seg + --> + + diff --git a/code/perception/src/vision_node.py b/code/perception/src/vision_node.py index 726ea4de..d736253a 100755 --- a/code/perception/src/vision_node.py +++ b/code/perception/src/vision_node.py @@ -19,6 +19,7 @@ from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks import numpy as np from time import perf_counter +from ultralytics import NAS, YOLO, RTDETR, SAM, FastSAM """ VisionNode: @@ -55,17 +56,32 @@ def __init__(self, name, **kwargs): weights=DeepLabV3_ResNet101_Weights.DEFAULT), DeepLabV3_ResNet101_Weights.DEFAULT, "segmentation", - "pyTorch") + "pyTorch"), + 'yolov8n': (YOLO, "yolov8n.pt", "detection", "ultralytics"), + 'yolov8s': (YOLO, "yolov8s.pt", "detection", "ultralytics"), + 'yolov8m': (YOLO, "yolov8m.pt", "detection", "ultralytics"), + 'yolov8l': (YOLO, "yolov8l.pt", "detection", "ultralytics"), + 'yolov8x': (YOLO, "yolov8x.pt", "detection", "ultralytics"), + 'yolo_nas_l': (NAS, "yolo_nas_l.pt", "detection", "ultralytics"), + 'yolo_nas_m': (NAS, "yolo_nas_m.pt", "detection", "ultralytics"), + 'yolo_nas_s': (NAS, "yolo_nas_s.pt", "detection", "ultralytics"), + 'rtdetr-l': (RTDETR, "rtdetr-l.pt", "detection", "ultralytics"), + 'rtdetr-x': (RTDETR, "rtdetr-x.pt", "detection", "ultralytics"), + 'yolov8x-seg': (YOLO, "yolov8x-seg.pt", "segmentation", + "ultralytics"), + 'sam_l': (SAM, "sam_l.pt", "detection", "ultralytics"), + 'FastSAM-x': (FastSAM, "FastSAM-x.pt", "detection", "ultralytics"), + } + print(torch.__version__) + # general setup self.bridge = CvBridge() self.role_name = self.get_param("role_name", "hero") self.side = self.get_param("side", "Center") - # self.device = torch.device("cuda" - # if torch.cuda.is_available() else "cpu") Cuda Memory Issues - self.device = torch.device("cpu") - print("VisionNode working on: ", self.device) + self.device = torch.device("cuda" + if torch.cuda.is_available() else "cpu") # publish / subscribe setup self.setup_camera_subscriptions() @@ -80,9 +96,22 @@ def __init__(self, name, **kwargs): self.type = model_info[2] self.framework = model_info[3] print("Vision Node Configuration:") + print("Device -> ", self.device) print(f"Model -> {self.get_param('model')},") print(f"Type -> {self.type}, Framework -> {self.framework}") - self.model.to(self.device) + torch.cuda.memory.set_per_process_memory_fraction(0.1) + + # pyTorch and CUDA setup + if self.framework == "pyTorch": + for param in self.model.parameters(): + param.requires_grad = False + self.model.to(self.device) + + # ultralytics setup + if self.framework == "ultralytics": + self.model = self.model(self.weights) + + # tensorflow setup def setup_camera_subscriptions(self): self.new_subscription( @@ -101,6 +130,30 @@ def setup_camera_publishers(self): def handle_camera_image(self, image): startTime = perf_counter() + + # free up cuda memory + if self.device == "cuda": + torch.cuda.empty_cache() + + print("Before Model: ", perf_counter() - startTime) + + if self.framework == "pyTorch": + vision_result = self.predict_torch(image) + + if self.framework == "ultralytics": + vision_result = self.predict_ultralytics(image) + + print("After Model: ", perf_counter() - startTime) + + # publish image to rviz + img_msg = self.bridge.cv2_to_imgmsg(vision_result, + encoding="passthrough") + img_msg.header = image.header + self.publisher.publish(img_msg) + + pass + + def predict_torch(self, image): self.model.eval() cv_image = self.bridge.imgmsg_to_cv2(img_msg=image, desired_encoding='passthrough') @@ -114,39 +167,41 @@ def handle_camera_image(self, image): input_image = preprocess(cv_image).unsqueeze(dim=0) input_image = input_image.to(self.device) - print("Before Model: ", perf_counter() - startTime) prediction = self.model(input_image) - print("After Model: ", perf_counter() - startTime) + if (self.type == "detection"): vision_result = self.apply_bounding_boxes(cv_image, prediction[0]) if (self.type == "segmentation"): vision_result = self.create_mask(cv_image, prediction['out']) - img_msg = self.bridge.cv2_to_imgmsg(vision_result, - encoding="passthrough") - img_msg.header = image.header + return vision_result - self.publisher.publish(img_msg) - print("After Publish: ", perf_counter() - startTime) + def predict_ultralytics(self, image): + cv_image = self.bridge.imgmsg_to_cv2(img_msg=image, + desired_encoding='passthrough') + cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) + print(cv_image.shape) - pass + output = self.model(cv_image) + + return output[0].plot() def create_mask(self, input_image, model_output): output_predictions = torch.argmax(model_output, dim=0) - for i in range(21): output_predictions[i] = output_predictions[i] == i output_predictions = output_predictions.to(dtype=torch.bool) - input_image = t.ToTensor()(input_image) - input_image = input_image.to(dtype=torch.uint8) print(output_predictions.shape) - print(input_image.shape) - segmented_image = draw_segmentation_masks(input_image, - output_predictions) - cv_segmented = cv2.cvtColor(segmented_image.detach().numpy(), - cv2.COLOR_BGR2RGB) + transposed_image = np.transpose(input_image, (2, 0, 1)) + tensor_image = torch.tensor(transposed_image) + tensor_image = tensor_image.to(dtype=torch.uint8) + segmented_image = draw_segmentation_masks(tensor_image, + output_predictions, + alpha=0.6) + cv_segmented = segmented_image.detach().cpu().numpy() + cv_segmented = np.transpose(cv_segmented, (1, 2, 0)) return cv_segmented def apply_bounding_boxes(self, input_image, model_output): diff --git a/code/requirements.txt b/code/requirements.txt index c15e4287..3b0948d0 100644 --- a/code/requirements.txt +++ b/code/requirements.txt @@ -11,3 +11,4 @@ scipy==1.10.0 xmltodict==0.13.0 py-trees==2.1.6 numpy==1.23.5 +ultralytics==8.0.220 \ No newline at end of file diff --git a/doc/06_perception/07_vision_node.md b/doc/06_perception/07_vision_node.md index 1b72ead4..2b9b7bd3 100644 --- a/doc/06_perception/07_vision_node.md +++ b/doc/06_perception/07_vision_node.md @@ -1,25 +1,39 @@ # Vision Node -The Visison Node serves as a replacement for the previous segmentation-node. -It provides an adaptive interface that is able to perform object-detection or image-segmentation +The Visison Node provides an adaptive interface that is able to perform object-detection and/or image-segmentation on several different models. The model can be specified as a parameter in the perception.launch file. +The VisionNode is currently using the yolov8x-seg model. + ## Usage The following code shows how the Vision-Node is specified in perception.launch ` - +
- +
` @@ -31,19 +45,65 @@ The Vision-Node will automatically switch between object-detection, imagesegment For now the Vision-Node only supports pyTorch models. Within the next sprint it should be able to accept other frameworks aswell. It should also be possible to run object-detection and image-segmentation at the same time. +## Model overview + +| Model | Type | Stable | Comments | +|---------------------------------------|--------------|--------|---------------------------------------| +| fasterrcnn_resnet50_fpn_v2 | detection | no | CUDA-Problems | +| fasterrcnn_mobilenet_v3_large_320_fpn | detection | no | CUDA-Problems | +| yolov8n | detection | yes | | +| yolov8s | detection | yes | | +| yolov8m | detection | yes | | +| yolov8l | detection | yes | | +| yolov8x | detection | yes | | +| yolo_nas_l | detection | no | Missing super_gradients package error | +| yolo_nas_m | detection | no | Missing super_gradients package error | +| yolo_nas_s | detection | no | Missing super_gradients package error | +| rtdetr-l | detection | yes | | +| rtdetr-x | detection | yes | | +| sam_l | detection | no | Ultralytics Error | +| FastSAM-x | detection | no | CUDA Problems | +| deeplabv3_resnet101 | segmentation | no | CUDA Problems, Segmentation Problems | +| yolov8x-seg | segmentation | yes | | + ## How it works ### Initialization The Vision-Node contains a Dictionary with all it's models. Depending on the model parameter it will initialize the correct model and weights. -` -self.model_dict = { - "fasterrcnn_resnet50_fpn_v2": (fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT), FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, "detection", "pyTorch"), - "fasterrcnn_mobilenet_v3_large_320_fpn": (fasterrcnn_mobilenet_v3_large_320_fpn(weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT), FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT, "detection", "pyTorch"), - "deeplabv3_resnet101": (deeplabv3_resnet101(weights=DeepLabV3_ResNet101_Weights.DEFAULT), DeepLabV3_ResNet101_Weights.DEFAULT, "segmentation", "pyTorch") - } -` +`self.model_dict = { + "fasterrcnn_resnet50_fpn_v2": + (fasterrcnn_resnet50_fpn_v2( + weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT), + FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, + "detection", + "pyTorch"), + "fasterrcnn_mobilenet_v3_large_320_fpn": + (fasterrcnn_mobilenet_v3_large_320_fpn( + weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT), + FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT, + "detection", + "pyTorch"), + "deeplabv3_resnet101": + (deeplabv3_resnet101( + weights=DeepLabV3_ResNet101_Weights.DEFAULT), + DeepLabV3_ResNet101_Weights.DEFAULT, + "segmentation", + "pyTorch"), + 'yolov8n': (YOLO, "yolov8n.pt", "detection", "ultralytics"), + 'yolov8s': (YOLO, "yolov8s.pt", "detection", "ultralytics"), + 'yolov8m': (YOLO, "yolov8m.pt", "detection", "ultralytics"), + 'yolov8l': (YOLO, "yolov8l.pt", "detection", "ultralytics"), + 'yolov8x': (YOLO, "yolov8x.pt", "detection", "ultralytics"), + 'yolo_nas_l': (NAS, "yolo_nas_l.pt", "detection", "ultralytics"), + 'yolo_nas_m': (NAS, "yolo_nas_m.pt", "detection", "ultralytics"), + 'yolo_nas_s': (NAS, "yolo_nas_s.pt", "detection", "ultralytics"), + 'rtdetr-l': (RTDETR, "rtdetr-l.pt", "detection", "ultralytics"), + 'rtdetr-x': (RTDETR, "rtdetr-x.pt", "detection", "ultralytics"), + 'yolov8x-seg': (YOLO, "yolov8x-seg.pt", "segmentation", "ultralytics"), + 'sam_l': (SAM, "sam_l.pt", "detection", "ultralytics"), + 'FastSAM-x': (FastSAM, "FastSAM-x.pt", "detection", "ultralytics")}` ### Core @@ -61,18 +121,21 @@ This function is automatically triggered by the Camera-Subscriber of the Vision- ## Visualization -The Vision-Node implements an ImagePublisher under the topic: "/paf//Center/segmented_image" +The Vision-Node implements an ImagePublisher under the topic: "/paf/hero/Center/segmented_image" + +The Configuration File of RViz has been changed accordingly to display the published images alongside with the Camera. -The Configuartion File of RViz has been changed accordingly to display the published images alongside with the Camera. +The build in Visualization of the YOLO-Models works very well. ## Known Issues ### Time -First experiments showed that the handle_camera_image function is way to slow to be used reliably. It takes around 1.5 seconds to handle one image. +When running on YOLO-Models the Time issue is fixed because ultralytics has some way of managing the CUDA-Resources very well. -Right now the Vision-Node is not using cuda due to cuda-memory-issues that couldn't be fixed right away. +When running on different models, the CUDA-Error persists. -The performance is expected to rise quite a bit when using cuda. +## Segmentation -Also their is lots more room for testing different models inside the Vision-Node to evualte their accuracy and time-performance. +For some reason the create_segmentation mask function works in a standalone project, but not in the Vision-Node. +I stopped debugging, because the YOLO-Models work way better and build a very good and stable baseline.