Integrate automated tablet placement (#78)

* [WIP] integrate show tablet into web app * Got toggle body pose overlay checkbox working * Web app UX is working, with dummy show tablet action * Finalize integration * Ignore stale body poses * Reset to navigation mode after invoking autonomous clients * Remove dummy action server * update show tablet package name (#88) --------- Co-authored-by: Matt Lamsey <[email protected]>
hello-robot · Jul 31, 2024 · e6847ec · e6847ec
1 parent 188ca9c
commit e6847ec
Show file tree

Hide file tree

Showing 14 changed files with 575 additions and 50 deletions.
diff --git a/launch/multi_camera.launch.py b/launch/multi_camera.launch.py
@@ -52,7 +52,7 @@
     {'name': 'depth_module.infra1_format',   'default': 'Y8', 'description': 'infra1 stream format'},
     {'name': 'depth_module.infra2_format',   'default': 'Y8', 'description': 'infra2 stream format'},
     {'name': 'depth_module.color_profile',   'default': '0,0,0', 'description': 'infra2 stream format'},
-    {'name': 'depth_module.exposure',        'default': '8500', 'description': 'Depth module manual exposure value'},
+    {'name': 'depth_module.exposure',        'default': '4500', 'description': 'Depth module manual exposure value'},
     {'name': 'depth_module.gain',            'default': '16', 'description': 'Depth module manual gain value'},
     {'name': 'depth_module.hdr_enabled',     'default': 'false', 'description': 'Depth module hdr enablement flag. Used for hdr_merge filter'},  # noqa: E501
     {'name': 'depth_module.enable_auto_exposure', 'default': 'true', 'description': 'enable/disable auto exposure for depth image'},  # noqa: E501
@@ -104,8 +104,8 @@
     'rgb_camera.profile': D435_RESOLUTION,
     'rgb_camera.color_profile': D435_RESOLUTION,
     'pointcloud.enable': 'true',
-    'pointcloud.stream_filter': '',
-    'pointcloud.stream_index_filter': '',
+    'pointcloud.stream_filter': '2',
+    'pointcloud.stream_index_filter': '0',
     'enable_sync': 'true',
     'align_depth.enable': 'true',
     'initial_reset': 'true',
@@ -121,8 +121,8 @@
     'rgb_camera.profile': D405_RESOLUTION,
     'depth_module.color_profile': D405_RESOLUTION,
     'pointcloud.enable': 'true',
-    'pointcloud.stream_filter': '',
-    'pointcloud.stream_index_filter': '',
+    'pointcloud.stream_filter': '2',
+    'pointcloud.stream_index_filter': '0',
     'enable_sync': 'true',
     'align_depth.enable': 'false',
     'initial_reset': 'true',

diff --git a/launch/web_interface.launch.py b/launch/web_interface.launch.py
@@ -409,7 +409,12 @@ def generate_launch_description():
         PythonLaunchDescriptionSource(
             PathJoinSubstitution([core_package, "launch", "stretch_driver.launch.py"])
         ),
-        launch_arguments={"broadcast_odom_tf": "True"}.items(),
+        # TODO: The tablet_placement code should change the mode, not the launch file
+        launch_arguments={
+            "mode": "position",
+            "broadcast_odom_tf": "True",
+            "fail_out_of_range_goal": "False",
+        }.items(),
     )
     ld.add_action(stretch_driver_launch)
 
@@ -545,7 +550,10 @@ def generate_launch_description():
     # )
 
     # Move To Pre-grasp Action Server
-    if stretch_tool == "eoa_wrist_dw3_tool_sg3":
+    if (
+        stretch_tool == "eoa_wrist_dw3_tool_sg3"
+        or stretch_tool == "tool_stretch_dex_wrist"
+    ):
         move_to_pregrasp_node = Node(
             package="stretch_web_teleop",
             executable="move_to_pregrasp.py",
@@ -565,4 +573,26 @@ def generate_launch_description():
     )
     ld.add_action(text_to_speech_node)
 
+    if stretch_tool == "eoa_wrist_dw3_tool_tablet_12in":
+        detect_body_landmarks_node = Node(
+            package="stretch_show_tablet",
+            executable="detect_body_landmarks",
+            output="screen",
+        )
+        ld.add_action(detect_body_landmarks_node)
+
+        plan_tablet_pose_node = Node(
+            package="stretch_show_tablet",
+            executable="plan_tablet_pose_service",
+            output="screen",
+        )
+        ld.add_action(plan_tablet_pose_node)
+
+        show_tablet_node = Node(
+            package="stretch_show_tablet",
+            executable="show_tablet_server",
+            output="screen",
+        )
+        ld.add_action(show_tablet_node)
+
     return ld
diff --git a/nodes/configure_video_streams.py b/nodes/configure_video_streams.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 
+import json
 import math
 import sys
 import threading
@@ -22,12 +23,14 @@
 from rclpy.qos import QoSProfile, ReliabilityPolicy
 from rclpy.time import Time
 from sensor_msgs.msg import CameraInfo, CompressedImage, Image, JointState, PointCloud2
+from std_msgs.msg import String
 from std_srvs.srv import SetBool
 
 from stretch_web_teleop_helpers.conversions import (
     cv2_image_to_ros_msg,
     deproject_pixel_to_pointcloud_point,
     depth_img_to_pointcloud,
+    project_points_to_pixels,
     ros_msg_to_cv2_image,
     tf2_get_transform,
 )
@@ -38,9 +41,11 @@
 class ConfigureVideoStreams(Node):
     # Define constant colors and alpha values for the depth AR overlay
     BACKGROUND_COLOR = (200, 200, 200)
-    REALSENSE_DEPTH_AR_COLOR = np.array((0, 191, 255), dtype=np.uint8)
+    REALSENSE_DEPTH_AR_COLOR = np.array((0, 191, 255), dtype=np.uint8)  # cyan
     REALSENSE_DEPTH_AR_ALPHA = 0.6
-    GRIPPER_DEPTH_AR_COLOR = np.array((255, 0, 191), dtype=np.uint8)
+    REALSENSE_BODY_LANDMARKS_AR_COLOR = np.array((255, 0, 191), dtype=np.uint8)  # pink
+    REALSENSE_BODY_LANDMARKS_AR_ALPHA = 0.6
+    GRIPPER_DEPTH_AR_COLOR = np.array((255, 0, 191), dtype=np.uint8)  # pink
     GRIPPER_DEPTH_AR_ALPHA = 0.6
 
     # Define constants related to downsampling the pointcloud. Combine pixels
@@ -278,6 +283,18 @@ def __init__(
                 callback_group=MutuallyExclusiveCallbackGroup(),
             )
 
+            # Also subscribe to body landmarks to overlay the user's pose
+            self.latest_body_landmarks_str = None
+            self.latest_body_landmarks_str_recv_time = None
+            self.latest_body_landmarks_str_lock = threading.Lock()
+            self.body_landmarks_subscriber = self.create_subscription(
+                String,
+                "/human_estimates/latest_body_pose",
+                self.realsense_body_landmarks_cb,
+                QoSProfile(depth=1, reliability=ReliabilityPolicy.BEST_EFFORT),
+                callback_group=MutuallyExclusiveCallbackGroup(),
+            )
+
         # Default image perspectives
         if self.use_overhead:
             self.overhead_camera_perspective = (
@@ -296,6 +313,10 @@ def __init__(
                 SetBool, "realsense_depth_ar", self.realsense_depth_ar_callback
             )
             self.realsense_depth_ar = False
+            self.realsense_body_pose_ar_service = self.create_service(
+                SetBool, "realsense_body_pose_ar", self.realsense_body_pose_ar_callback
+            )
+            self.realsense_body_pose_ar = False
         if self.use_gripper:
             self.gripper_depth_ar_service = self.create_service(
                 SetBool, "gripper_depth_ar", self.gripper_depth_ar_callback
@@ -409,25 +430,17 @@ def overlay_realsense_depth_ar(
         # Get filtered points in camera frame
         pts_in_range = pcl_cloud_filtered.to_array()[filtered_indices, :]
 
-        # Convert to homogeneous coordinates
-        pts_in_range = np.hstack((pts_in_range, np.ones((pts_in_range.shape[0], 1))))
-
         # Change color of pixels in robot's reach
         if img is not None:
             # Get pixel coordinates
-            coords = np.matmul(self.realsense_P, np.transpose(pts_in_range))  # 3 x N
-            u_idx = (coords[0, :] / coords[2, :]).astype(int)
-            v_idx = (coords[1, :] / coords[2, :]).astype(int)
-            uv = np.vstack((u_idx, v_idx)).T  # N x 2
-            uv_dedup = np.unique(uv, axis=0)
-            in_bounds_idx = np.where(
-                (uv_dedup[:, 0] >= 0)
-                & (uv_dedup[:, 0] < img.shape[1])
-                & (uv_dedup[:, 1] >= 0)
-                & (uv_dedup[:, 1] < img.shape[0])
-            )
-            u_mask = uv_dedup[in_bounds_idx][:, 0]
-            v_mask = uv_dedup[in_bounds_idx][:, 1]
+            uv_mask = project_points_to_pixels(
+                pts_in_range,
+                self.realsense_P,
+                width=img.shape[1],
+                height=img.shape[0],
+            )
+            u_mask = uv_mask[:, 0]
+            v_mask = uv_mask[:, 1]
 
             # Overlay the pixels in the robot's reach
             overlay_mask = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
@@ -456,12 +469,105 @@ def overlay_realsense_depth_ar(
             img = np.where(overlay_mask[:, :, None], overlaid_img, img)
         return img
 
+    def overlay_realsense_body_pose_ar(
+        self,
+        body_landmarks_str: str,
+        body_landmarks_str_recv_time: Time,
+        img: npt.NDArray[np.uint8],
+        timeout_sec: float = 2.0,
+    ) -> npt.NDArray[np.uint8]:
+        """
+        Overlay the detected body landmarks on the image.
+
+        Parameters
+        ----------
+        body_landmarks_str : str
+            The body landmarks as a JSON string.
+        body_landmarks_str_recv_time : Time
+            The time at which the body landmarks string was received.
+        img : np.ndarray
+            The image to overlay the body landmarks on.
+        timeout_sec : float, optional
+            The maximum time for which the body landmark string is not considered stale.
+
+        Returns
+        -------
+        np.ndarray
+            The image with the body landmarks overlaid.
+        """
+        if self.realsense_P is None:
+            self.get_logger().warn(
+                "Camera projection matrix is not available. Skipping point cloud processing."
+            )
+            return img
+
+        if self.get_clock().now() - body_landmarks_str_recv_time > Duration(
+            seconds=timeout_sec
+        ):
+            self.get_logger().debug(
+                "Body landmarks are stale. Skipping body pose AR overlay.",
+                throttle_duration_sec=1.0,
+            )
+            return img
+
+        try:
+            body_landmarks_xyz = json.loads(body_landmarks_str)
+        except json.JSONDecodeError as err:
+            self.get_logger().warn(
+                f"Could not decode body landmarks: {repr(body_landmarks_str)}. {err}"
+            )
+            return img
+        self.get_logger().debug(f"Body landmarks: {repr(body_landmarks_xyz)}")
+        if len(body_landmarks_xyz) == 0:
+            self.get_logger().warn(
+                "No body landmarks detected.", throttle_duration_sec=1.0
+            )
+            return img
+        body_landmark_order = list(body_landmarks_xyz.keys())
+
+        # Convert the 3D body landmarks to 2D pixel coordinates
+        body_landmarks_3d = np.array(
+            [list(body_landmarks_xyz[name]) for name in body_landmark_order]
+        )  # N x 3
+        self.get_logger().debug(f"Body landmarks 3D: {body_landmarks_3d.shape}")
+        body_landmarks_2d = project_points_to_pixels(
+            body_landmarks_3d,
+            self.realsense_P,
+            width=img.shape[1],
+            height=img.shape[0],
+        )
+
+        # Overlay circles on the detected body landmarks
+        overlay_mask = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
+        radius = min(img.shape[0], img.shape[1]) // 40
+        for u, v in body_landmarks_2d:
+            overlay_mask = cv2.circle(overlay_mask, (u, v), radius, (255,), -1)
+        overlay_img = np.tile(
+            self.REALSENSE_BODY_LANDMARKS_AR_COLOR, (img.shape[0], img.shape[1], 1)
+        )
+        overlaid_img = cv2.addWeighted(
+            img,
+            1 - self.REALSENSE_BODY_LANDMARKS_AR_ALPHA,
+            overlay_img,
+            self.REALSENSE_BODY_LANDMARKS_AR_ALPHA,
+            0,
+        )
+        img = np.where(overlay_mask[:, :, None], overlaid_img, img)
+
+        return img
+
     def realsense_depth_ar_callback(self, req, res):
         self.get_logger().info(f"Realsense depth AR service: {req.data}")
         self.realsense_depth_ar = req.data
         res.success = True
         return res
 
+    def realsense_body_pose_ar_callback(self, req, res):
+        self.get_logger().info(f"Realsense body pose AR service: {req.data}")
+        self.realsense_body_pose_ar = req.data
+        res.success = True
+        return res
+
     def gripper_depth_ar_callback(self, req, res):
         self.get_logger().info(f"Gripper depth AR service: {req.data}")
         self.gripper_depth_ar = req.data
@@ -611,6 +717,11 @@ def realsense_rgb_cb(
         with self.latest_realsense_rgb_image_lock:
             self.latest_realsense_rgb_image = rgb_ros_image
 
+    def realsense_body_landmarks_cb(self, msg):
+        with self.latest_body_landmarks_str_lock:
+            self.latest_body_landmarks_str_recv_time = self.get_clock().now()
+            self.latest_body_landmarks_str = msg.data
+
     def process_realsense_image(
         self,
         rgb_ros_image: Union[CompressedImage, Image],
@@ -619,12 +730,22 @@ def process_realsense_image(
         if isinstance(rgb_ros_image, CompressedImage):
             image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
         for image_config_name in self.realsense_params:
-            # Overlay the pointcloud *before* cropping/masking/rotating the image,
+            # Perform overlays *before* cropping/masking/rotating the image,
             # for consistent (de)projection and transformations
             if self.realsense_depth_ar:
                 with self.latest_realsense_depth_image_lock:
                     depth_msg = self.latest_realsense_depth_image
                 image = self.overlay_realsense_depth_ar(depth_msg, image)
+            if self.realsense_body_pose_ar:
+                with self.latest_body_landmarks_str_lock:
+                    body_landmarks_str = self.latest_body_landmarks_str
+                    body_landmarks_str_recv_time = (
+                        self.latest_body_landmarks_str_recv_time
+                    )
+                if body_landmarks_str is not None and len(body_landmarks_str) > 0:
+                    image = self.overlay_realsense_body_pose_ar(
+                        body_landmarks_str, body_landmarks_str_recv_time, image
+                    )
             img = self.configure_images(image, self.realsense_params[image_config_name])
             # if self.aruco_markers: img = self.aruco_markers_callback(marker_msg, img)
             self.realsense_images[image_config_name] = img
@@ -838,20 +959,14 @@ def overlay_gripper_depth_ar(
 
         # Convert filtered points to (u, v) indices
         pts_in_range = pcl_cloud_filtered.to_array()
-        pts_in_range = np.hstack((pts_in_range, np.ones((pts_in_range.shape[0], 1))))
-        coords = np.matmul(self.gripper_P, np.transpose(pts_in_range))  # 3 x N
-        u_idx = (coords[0, :] / coords[2, :]).astype(int)
-        v_idx = (coords[1, :] / coords[2, :]).astype(int)
-        uv = np.vstack((u_idx, v_idx)).T  # N x 2
-        uv_dedup = np.unique(uv, axis=0)
-        in_bounds_idx = np.where(
-            (uv_dedup[:, 0] >= 0)
-            & (uv_dedup[:, 0] < image.shape[1])
-            & (uv_dedup[:, 1] >= 0)
-            & (uv_dedup[:, 1] < image.shape[0])
+        uv_mask = project_points_to_pixels(
+            pts_in_range,
+            self.gripper_P,
+            width=image.shape[1],
+            height=image.shape[0],
         )
-        u_mask = uv_dedup[in_bounds_idx][:, 0]
-        v_mask = uv_dedup[in_bounds_idx][:, 1]
+        u_mask = uv_mask[:, 0]
+        v_mask = uv_mask[:, 1]
 
         # Overlay the pixels in the robot's reach
         overlay_mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)