Merge branch 'main' into 130-feature-updating-the-perception-document…

…ation-readmemd
una-auxme · Jan 2, 2024 · af917b3 · af917b3
2 parents 46c15da + 1c9342e
commit af917b3
Show file tree

Hide file tree

Showing 47 changed files with 784 additions and 58 deletions.
diff --git a/build/docker-compose.yml b/build/docker-compose.yml
@@ -57,8 +57,8 @@ services:
     tty: true
     shm_size: 2gb
     #command: bash -c "sleep 10 && python3 /opt/leaderboard/leaderboard/leaderboard_evaluator.py --debug=0 --routes=/opt/leaderboard/data/routes_devtest.xml --agent=/opt/leaderboard/leaderboard/autoagents/npc_agent.py --host=carla-simulator --track=SENSORS"
-    #command: bash -c "sleep 10 && roslaunch agent/launch/dev.launch"
-    command: bash -c "sleep 10 && sudo chown -R carla:carla ../code/ && sudo chmod -R a+w ../code/ && python3 /opt/leaderboard/leaderboard/leaderboard_evaluator.py --debug=0 --routes=/opt/leaderboard/data/routes_devtest.xml --agent=/workspace/code/agent/src/agent/agent.py --host=carla-simulator --track=MAP"
+    command: bash -c "sleep 10 && roslaunch agent/launch/dev.launch"
+    #command: bash -c "sleep 10 && sudo chown -R carla:carla ../code/ && sudo chmod -R a+w ../code/ && python3 /opt/leaderboard/leaderboard/leaderboard_evaluator.py --debug=0 --routes=/opt/leaderboard/data/routes_devtest.xml --agent=/workspace/code/agent/src/agent/agent.py --host=carla-simulator --track=MAP"
     logging:
       driver: "local"
     environment:

diff --git a/code/agent/config/rviz_config.rviz b/code/agent/config/rviz_config.rviz
@@ -63,11 +63,11 @@ Visualization Manager:
       Unreliable: false
       Value: true
       Visibility:
-        Grid: true
-        Imu: true
-        Path: true
-        PointCloud2: true
-        Value: true
+        Grid: false
+        Imu: false
+        Path: false
+        PointCloud2: false
+        Value: false
       Zoom Factor: 1
     - Class: rviz/Image
       Enabled: true
@@ -327,4 +327,4 @@ Window Geometry:
     collapsed: false
   Width: 2488
   X: 1992
-  Y: 27
+  Y: 27
diff --git a/code/perception/launch/perception.launch b/code/perception/launch/perception.launch
@@ -33,15 +33,30 @@
   <node pkg="perception" type="vision_node.py" name="VisionNode" output="screen">
     <param name="role_name" value="$(arg role_name)" />
     <param name="side" value="Center" />
-     <!-- 
+
+    <!--
       Object-Detection: 
       - fasterrcnn_resnet50_fpn_v2 
       - fasterrcnn_mobilenet_v3_large_320_fpn
+      - yolov8n
+      - yolov8s
+      - yolov8m
+      - yolov8l
+      - yolov8x
+      - yolo_nas_l
+      - yolo_nas_m
+      - yolo_nas_s
+      - rtdetr-l
+      - rtdetr-x
+      - sam_l
+      - FastSAM-x
 
       Image-Segmentation:
-      - deeplabv3_resnet101 
-      -->
-    <param name="model" value="fasterrcnn_resnet50_fpn_v2" />
+      - deeplabv3_resnet101
+      - yolov8x-seg 
+    -->
+
+    <param name="model" value="yolov8x-seg" />
   </node>
 
   <node pkg="perception" type="global_plan_distance_publisher.py" name="GlobalPlanDistance" output="screen">

diff --git a/code/perception/src/traffic_light_detection/dataset.dvc b/code/perception/src/traffic_light_detection/dataset.dvc
@@ -0,0 +1,6 @@
+outs:
+- md5: 3a559397ebc58c1ecf142dea18d03367.dir
+  size: 13745063
+  nfiles: 2723
+  hash: md5
+  path: dataset
diff --git a/code/perception/src/traffic_light_detection/dvc.lock b/code/perception/src/traffic_light_detection/dvc.lock
@@ -0,0 +1,34 @@
+schema: '2.0'
+stages:
+  train:
+    cmd: python src/traffic_light_detection/traffic_light_training.py
+    deps:
+    - path: dataset
+      md5: 3a559397ebc58c1ecf142dea18d03367.dir
+      size: 13745063
+      nfiles: 2723
+    - path: src
+      hash: md5
+      md5: b6c9cb867c89ad6e86403d9c33538136.dir
+      size: 23777
+      nfiles: 10
+    params:
+      params.yaml:
+        train:
+          epochs: 100
+          batch_size: 32
+    outs:
+    - path: dvclive/metrics.json
+      hash: md5
+      md5: af33de699558fbfd3edee1607ba88f81
+      size: 218
+    - path: dvclive/plots
+      hash: md5
+      md5: 774919de9e9d6820ac6821d0819829c1.dir
+      size: 8900
+      nfiles: 4
+    - path: models
+      hash: md5
+      md5: ee67bac2f189d2cc5a199d91ba3295ac.dir
+      size: 10815
+      nfiles: 1
diff --git a/code/perception/src/traffic_light_detection/dvc.yaml b/code/perception/src/traffic_light_detection/dvc.yaml
@@ -0,0 +1,21 @@
+stages:
+  train:
+    cmd: python src/traffic_light_detection/traffic_light_training.py
+    deps:
+    - dataset
+    - src
+    params:
+    - params.yaml:
+    outs:
+    - models
+    metrics:
+    - dvclive/metrics.json:
+        cache: false
+    plots:
+    - dvclive/plots:
+        cache: false
+metrics:
+- dvclive/metrics.json
+plots:
+- dvclive/plots/metrics:
+    x: step
diff --git a/code/perception/src/traffic_light_detection/src/data_generation/transforms.py b/code/perception/src/traffic_light_detection/src/data_generation/transforms.py
@@ -28,7 +28,7 @@ def __call__(self, image):
             image = torchvision.transforms.Pad((0, pad))(image)
         else:
             image = torchvision.transforms.Pad((pad, 0))(image)
-        image = torchvision.transforms.Resize(self.size)(image)
+        image = torchvision.transforms.Resize(self.size, antialias=True)(image)
         return image
 
 

diff --git a/...erception/src/traffic_light_detection/src/traffic_light_detection/classification_model.py b/...erception/src/traffic_light_detection/src/traffic_light_detection/classification_model.py
@@ -59,10 +59,10 @@ def load_model(cfg):
         if path is not None:
             try:
                 state_dict = torch.load(path)
-                model.load_state_dict(state_dict).eval()
+                model.load_state_dict(state_dict)
                 print(f"Pretrained model loaded from {path}")
                 return model
-            except (Exception, ):
-                print(f"No pretrained model found at {path}. "
+            except Exception as e:
+                print(f"No pretrained model found at {path}: {e}\n"
                       f"Created new model with random weights.")
         return model.eval()
diff --git a/...eption/src/traffic_light_detection/src/traffic_light_detection/traffic_light_inference.py b/...eption/src/traffic_light_detection/src/traffic_light_detection/traffic_light_inference.py
@@ -1,5 +1,4 @@
 import argparse
-from pathlib import Path
 
 import torch.cuda
 import torchvision.transforms as t
@@ -24,6 +23,9 @@ def parse_args():
                                 'model_acc_99.53_val_100.0.pt',
                         help='path to pretrained model',
                         type=str)
+    parser.add_argument('--image', default=None,
+                        help='/dataset/val/green/green_83.png',
+                        type=str)
     return parser.parse_args()
 
 
@@ -66,8 +68,7 @@ def __call__(self, img):
 # main function for testing purposes
 if __name__ == '__main__':
     args = parse_args()
-    image_path = str(Path(__file__).resolve().parents[2].resolve())
-    image_path += "/dataset/val/green/green_83.png"
+    image_path = args.image
     image = load_image(image_path)
     classifier = TrafficLightInference(args.model)
     pred = classifier(image)

diff --git a/code/perception/src/vision_node.py b/code/perception/src/vision_node.py
@@ -19,6 +19,7 @@
 from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
 import numpy as np
 from time import perf_counter
+from ultralytics import NAS, YOLO, RTDETR, SAM, FastSAM
 """
 VisionNode:
 
@@ -55,17 +56,32 @@ def __init__(self, name, **kwargs):
                 weights=DeepLabV3_ResNet101_Weights.DEFAULT),
                 DeepLabV3_ResNet101_Weights.DEFAULT,
                 "segmentation",
-                "pyTorch")
+                "pyTorch"),
+            'yolov8n': (YOLO, "yolov8n.pt", "detection", "ultralytics"),
+            'yolov8s': (YOLO, "yolov8s.pt", "detection", "ultralytics"),
+            'yolov8m': (YOLO, "yolov8m.pt", "detection", "ultralytics"),
+            'yolov8l': (YOLO, "yolov8l.pt", "detection", "ultralytics"),
+            'yolov8x': (YOLO, "yolov8x.pt", "detection", "ultralytics"),
+            'yolo_nas_l': (NAS, "yolo_nas_l.pt", "detection", "ultralytics"),
+            'yolo_nas_m': (NAS, "yolo_nas_m.pt", "detection", "ultralytics"),
+            'yolo_nas_s': (NAS, "yolo_nas_s.pt", "detection", "ultralytics"),
+            'rtdetr-l': (RTDETR, "rtdetr-l.pt", "detection", "ultralytics"),
+            'rtdetr-x': (RTDETR, "rtdetr-x.pt", "detection", "ultralytics"),
+            'yolov8x-seg': (YOLO, "yolov8x-seg.pt", "segmentation",
+                            "ultralytics"),
+            'sam_l': (SAM, "sam_l.pt", "detection", "ultralytics"),
+            'FastSAM-x': (FastSAM, "FastSAM-x.pt", "detection", "ultralytics"),
+
         }
 
+        print(torch.__version__)
+
         # general setup
         self.bridge = CvBridge()
         self.role_name = self.get_param("role_name", "hero")
         self.side = self.get_param("side", "Center")
-        # self.device = torch.device("cuda"
-        # if torch.cuda.is_available() else "cpu") Cuda Memory Issues
-        self.device = torch.device("cpu")
-        print("VisionNode working on: ", self.device)
+        self.device = torch.device("cuda"
+                                   if torch.cuda.is_available() else "cpu")
 
         # publish / subscribe setup
         self.setup_camera_subscriptions()
@@ -80,9 +96,22 @@ def __init__(self, name, **kwargs):
         self.type = model_info[2]
         self.framework = model_info[3]
         print("Vision Node Configuration:")
+        print("Device -> ", self.device)
         print(f"Model -> {self.get_param('model')},")
         print(f"Type -> {self.type}, Framework -> {self.framework}")
-        self.model.to(self.device)
+        torch.cuda.memory.set_per_process_memory_fraction(0.1)
+
+        # pyTorch and CUDA setup
+        if self.framework == "pyTorch":
+            for param in self.model.parameters():
+                param.requires_grad = False
+                self.model.to(self.device)
+
+        # ultralytics setup
+        if self.framework == "ultralytics":
+            self.model = self.model(self.weights)
+
+        # tensorflow setup
 
     def setup_camera_subscriptions(self):
         self.new_subscription(
@@ -101,6 +130,30 @@ def setup_camera_publishers(self):
 
     def handle_camera_image(self, image):
         startTime = perf_counter()
+
+        # free up cuda memory
+        if self.device == "cuda":
+            torch.cuda.empty_cache()
+
+        print("Before Model: ", perf_counter() - startTime)
+
+        if self.framework == "pyTorch":
+            vision_result = self.predict_torch(image)
+
+        if self.framework == "ultralytics":
+            vision_result = self.predict_ultralytics(image)
+
+        print("After Model: ", perf_counter() - startTime)
+
+        # publish image to rviz
+        img_msg = self.bridge.cv2_to_imgmsg(vision_result,
+                                            encoding="passthrough")
+        img_msg.header = image.header
+        self.publisher.publish(img_msg)
+
+        pass
+
+    def predict_torch(self, image):
         self.model.eval()
         cv_image = self.bridge.imgmsg_to_cv2(img_msg=image,
                                              desired_encoding='passthrough')
@@ -114,39 +167,41 @@ def handle_camera_image(self, image):
 
         input_image = preprocess(cv_image).unsqueeze(dim=0)
         input_image = input_image.to(self.device)
-        print("Before Model: ", perf_counter() - startTime)
         prediction = self.model(input_image)
-        print("After Model: ", perf_counter() - startTime)
+
         if (self.type == "detection"):
             vision_result = self.apply_bounding_boxes(cv_image, prediction[0])
         if (self.type == "segmentation"):
             vision_result = self.create_mask(cv_image, prediction['out'])
 
-        img_msg = self.bridge.cv2_to_imgmsg(vision_result,
-                                            encoding="passthrough")
-        img_msg.header = image.header
+        return vision_result
 
-        self.publisher.publish(img_msg)
-        print("After Publish: ", perf_counter() - startTime)
+    def predict_ultralytics(self, image):
+        cv_image = self.bridge.imgmsg_to_cv2(img_msg=image,
+                                             desired_encoding='passthrough')
+        cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
+        print(cv_image.shape)
 
-        pass
+        output = self.model(cv_image)
+
+        return output[0].plot()
 
     def create_mask(self, input_image, model_output):
         output_predictions = torch.argmax(model_output, dim=0)
-
         for i in range(21):
             output_predictions[i] = output_predictions[i] == i
 
         output_predictions = output_predictions.to(dtype=torch.bool)
 
-        input_image = t.ToTensor()(input_image)
-        input_image = input_image.to(dtype=torch.uint8)
         print(output_predictions.shape)
-        print(input_image.shape)
-        segmented_image = draw_segmentation_masks(input_image,
-                                                  output_predictions)
-        cv_segmented = cv2.cvtColor(segmented_image.detach().numpy(),
-                                    cv2.COLOR_BGR2RGB)
+        transposed_image = np.transpose(input_image, (2, 0, 1))
+        tensor_image = torch.tensor(transposed_image)
+        tensor_image = tensor_image.to(dtype=torch.uint8)
+        segmented_image = draw_segmentation_masks(tensor_image,
+                                                  output_predictions,
+                                                  alpha=0.6)
+        cv_segmented = segmented_image.detach().cpu().numpy()
+        cv_segmented = np.transpose(cv_segmented, (1, 2, 0))
         return cv_segmented
 
     def apply_bounding_boxes(self, input_image, model_output):