Skip to content

Commit

Permalink
Integrate automated tablet placement (#78)
Browse files Browse the repository at this point in the history
* [WIP] integrate show tablet into web app

* Got toggle body pose overlay checkbox working

* Web app UX is working, with dummy show tablet action

* Finalize integration

* Ignore stale body poses

* Reset to navigation mode after invoking autonomous clients

* Remove dummy action server

* update show tablet package name (#88)

---------

Co-authored-by: Matt Lamsey <[email protected]>
  • Loading branch information
hello-amal and hello-lamsey authored Jul 31, 2024
1 parent 188ca9c commit e6847ec
Show file tree
Hide file tree
Showing 14 changed files with 575 additions and 50 deletions.
10 changes: 5 additions & 5 deletions launch/multi_camera.launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
{'name': 'depth_module.infra1_format', 'default': 'Y8', 'description': 'infra1 stream format'},
{'name': 'depth_module.infra2_format', 'default': 'Y8', 'description': 'infra2 stream format'},
{'name': 'depth_module.color_profile', 'default': '0,0,0', 'description': 'infra2 stream format'},
{'name': 'depth_module.exposure', 'default': '8500', 'description': 'Depth module manual exposure value'},
{'name': 'depth_module.exposure', 'default': '4500', 'description': 'Depth module manual exposure value'},
{'name': 'depth_module.gain', 'default': '16', 'description': 'Depth module manual gain value'},
{'name': 'depth_module.hdr_enabled', 'default': 'false', 'description': 'Depth module hdr enablement flag. Used for hdr_merge filter'}, # noqa: E501
{'name': 'depth_module.enable_auto_exposure', 'default': 'true', 'description': 'enable/disable auto exposure for depth image'}, # noqa: E501
Expand Down Expand Up @@ -104,8 +104,8 @@
'rgb_camera.profile': D435_RESOLUTION,
'rgb_camera.color_profile': D435_RESOLUTION,
'pointcloud.enable': 'true',
'pointcloud.stream_filter': '',
'pointcloud.stream_index_filter': '',
'pointcloud.stream_filter': '2',
'pointcloud.stream_index_filter': '0',
'enable_sync': 'true',
'align_depth.enable': 'true',
'initial_reset': 'true',
Expand All @@ -121,8 +121,8 @@
'rgb_camera.profile': D405_RESOLUTION,
'depth_module.color_profile': D405_RESOLUTION,
'pointcloud.enable': 'true',
'pointcloud.stream_filter': '',
'pointcloud.stream_index_filter': '',
'pointcloud.stream_filter': '2',
'pointcloud.stream_index_filter': '0',
'enable_sync': 'true',
'align_depth.enable': 'false',
'initial_reset': 'true',
Expand Down
34 changes: 32 additions & 2 deletions launch/web_interface.launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,12 @@ def generate_launch_description():
PythonLaunchDescriptionSource(
PathJoinSubstitution([core_package, "launch", "stretch_driver.launch.py"])
),
launch_arguments={"broadcast_odom_tf": "True"}.items(),
# TODO: The tablet_placement code should change the mode, not the launch file
launch_arguments={
"mode": "position",
"broadcast_odom_tf": "True",
"fail_out_of_range_goal": "False",
}.items(),
)
ld.add_action(stretch_driver_launch)

Expand Down Expand Up @@ -545,7 +550,10 @@ def generate_launch_description():
# )

# Move To Pre-grasp Action Server
if stretch_tool == "eoa_wrist_dw3_tool_sg3":
if (
stretch_tool == "eoa_wrist_dw3_tool_sg3"
or stretch_tool == "tool_stretch_dex_wrist"
):
move_to_pregrasp_node = Node(
package="stretch_web_teleop",
executable="move_to_pregrasp.py",
Expand All @@ -565,4 +573,26 @@ def generate_launch_description():
)
ld.add_action(text_to_speech_node)

if stretch_tool == "eoa_wrist_dw3_tool_tablet_12in":
detect_body_landmarks_node = Node(
package="stretch_show_tablet",
executable="detect_body_landmarks",
output="screen",
)
ld.add_action(detect_body_landmarks_node)

plan_tablet_pose_node = Node(
package="stretch_show_tablet",
executable="plan_tablet_pose_service",
output="screen",
)
ld.add_action(plan_tablet_pose_node)

show_tablet_node = Node(
package="stretch_show_tablet",
executable="show_tablet_server",
output="screen",
)
ld.add_action(show_tablet_node)

return ld
179 changes: 147 additions & 32 deletions nodes/configure_video_streams.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python3

import json
import math
import sys
import threading
Expand All @@ -22,12 +23,14 @@
from rclpy.qos import QoSProfile, ReliabilityPolicy
from rclpy.time import Time
from sensor_msgs.msg import CameraInfo, CompressedImage, Image, JointState, PointCloud2
from std_msgs.msg import String
from std_srvs.srv import SetBool

from stretch_web_teleop_helpers.conversions import (
cv2_image_to_ros_msg,
deproject_pixel_to_pointcloud_point,
depth_img_to_pointcloud,
project_points_to_pixels,
ros_msg_to_cv2_image,
tf2_get_transform,
)
Expand All @@ -38,9 +41,11 @@
class ConfigureVideoStreams(Node):
# Define constant colors and alpha values for the depth AR overlay
BACKGROUND_COLOR = (200, 200, 200)
REALSENSE_DEPTH_AR_COLOR = np.array((0, 191, 255), dtype=np.uint8)
REALSENSE_DEPTH_AR_COLOR = np.array((0, 191, 255), dtype=np.uint8) # cyan
REALSENSE_DEPTH_AR_ALPHA = 0.6
GRIPPER_DEPTH_AR_COLOR = np.array((255, 0, 191), dtype=np.uint8)
REALSENSE_BODY_LANDMARKS_AR_COLOR = np.array((255, 0, 191), dtype=np.uint8) # pink
REALSENSE_BODY_LANDMARKS_AR_ALPHA = 0.6
GRIPPER_DEPTH_AR_COLOR = np.array((255, 0, 191), dtype=np.uint8) # pink
GRIPPER_DEPTH_AR_ALPHA = 0.6

# Define constants related to downsampling the pointcloud. Combine pixels
Expand Down Expand Up @@ -278,6 +283,18 @@ def __init__(
callback_group=MutuallyExclusiveCallbackGroup(),
)

# Also subscribe to body landmarks to overlay the user's pose
self.latest_body_landmarks_str = None
self.latest_body_landmarks_str_recv_time = None
self.latest_body_landmarks_str_lock = threading.Lock()
self.body_landmarks_subscriber = self.create_subscription(
String,
"/human_estimates/latest_body_pose",
self.realsense_body_landmarks_cb,
QoSProfile(depth=1, reliability=ReliabilityPolicy.BEST_EFFORT),
callback_group=MutuallyExclusiveCallbackGroup(),
)

# Default image perspectives
if self.use_overhead:
self.overhead_camera_perspective = (
Expand All @@ -296,6 +313,10 @@ def __init__(
SetBool, "realsense_depth_ar", self.realsense_depth_ar_callback
)
self.realsense_depth_ar = False
self.realsense_body_pose_ar_service = self.create_service(
SetBool, "realsense_body_pose_ar", self.realsense_body_pose_ar_callback
)
self.realsense_body_pose_ar = False
if self.use_gripper:
self.gripper_depth_ar_service = self.create_service(
SetBool, "gripper_depth_ar", self.gripper_depth_ar_callback
Expand Down Expand Up @@ -409,25 +430,17 @@ def overlay_realsense_depth_ar(
# Get filtered points in camera frame
pts_in_range = pcl_cloud_filtered.to_array()[filtered_indices, :]

# Convert to homogeneous coordinates
pts_in_range = np.hstack((pts_in_range, np.ones((pts_in_range.shape[0], 1))))

# Change color of pixels in robot's reach
if img is not None:
# Get pixel coordinates
coords = np.matmul(self.realsense_P, np.transpose(pts_in_range)) # 3 x N
u_idx = (coords[0, :] / coords[2, :]).astype(int)
v_idx = (coords[1, :] / coords[2, :]).astype(int)
uv = np.vstack((u_idx, v_idx)).T # N x 2
uv_dedup = np.unique(uv, axis=0)
in_bounds_idx = np.where(
(uv_dedup[:, 0] >= 0)
& (uv_dedup[:, 0] < img.shape[1])
& (uv_dedup[:, 1] >= 0)
& (uv_dedup[:, 1] < img.shape[0])
)
u_mask = uv_dedup[in_bounds_idx][:, 0]
v_mask = uv_dedup[in_bounds_idx][:, 1]
uv_mask = project_points_to_pixels(
pts_in_range,
self.realsense_P,
width=img.shape[1],
height=img.shape[0],
)
u_mask = uv_mask[:, 0]
v_mask = uv_mask[:, 1]

# Overlay the pixels in the robot's reach
overlay_mask = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
Expand Down Expand Up @@ -456,12 +469,105 @@ def overlay_realsense_depth_ar(
img = np.where(overlay_mask[:, :, None], overlaid_img, img)
return img

def overlay_realsense_body_pose_ar(
self,
body_landmarks_str: str,
body_landmarks_str_recv_time: Time,
img: npt.NDArray[np.uint8],
timeout_sec: float = 2.0,
) -> npt.NDArray[np.uint8]:
"""
Overlay the detected body landmarks on the image.
Parameters
----------
body_landmarks_str : str
The body landmarks as a JSON string.
body_landmarks_str_recv_time : Time
The time at which the body landmarks string was received.
img : np.ndarray
The image to overlay the body landmarks on.
timeout_sec : float, optional
The maximum time for which the body landmark string is not considered stale.
Returns
-------
np.ndarray
The image with the body landmarks overlaid.
"""
if self.realsense_P is None:
self.get_logger().warn(
"Camera projection matrix is not available. Skipping point cloud processing."
)
return img

if self.get_clock().now() - body_landmarks_str_recv_time > Duration(
seconds=timeout_sec
):
self.get_logger().debug(
"Body landmarks are stale. Skipping body pose AR overlay.",
throttle_duration_sec=1.0,
)
return img

try:
body_landmarks_xyz = json.loads(body_landmarks_str)
except json.JSONDecodeError as err:
self.get_logger().warn(
f"Could not decode body landmarks: {repr(body_landmarks_str)}. {err}"
)
return img
self.get_logger().debug(f"Body landmarks: {repr(body_landmarks_xyz)}")
if len(body_landmarks_xyz) == 0:
self.get_logger().warn(
"No body landmarks detected.", throttle_duration_sec=1.0
)
return img
body_landmark_order = list(body_landmarks_xyz.keys())

# Convert the 3D body landmarks to 2D pixel coordinates
body_landmarks_3d = np.array(
[list(body_landmarks_xyz[name]) for name in body_landmark_order]
) # N x 3
self.get_logger().debug(f"Body landmarks 3D: {body_landmarks_3d.shape}")
body_landmarks_2d = project_points_to_pixels(
body_landmarks_3d,
self.realsense_P,
width=img.shape[1],
height=img.shape[0],
)

# Overlay circles on the detected body landmarks
overlay_mask = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
radius = min(img.shape[0], img.shape[1]) // 40
for u, v in body_landmarks_2d:
overlay_mask = cv2.circle(overlay_mask, (u, v), radius, (255,), -1)
overlay_img = np.tile(
self.REALSENSE_BODY_LANDMARKS_AR_COLOR, (img.shape[0], img.shape[1], 1)
)
overlaid_img = cv2.addWeighted(
img,
1 - self.REALSENSE_BODY_LANDMARKS_AR_ALPHA,
overlay_img,
self.REALSENSE_BODY_LANDMARKS_AR_ALPHA,
0,
)
img = np.where(overlay_mask[:, :, None], overlaid_img, img)

return img

def realsense_depth_ar_callback(self, req, res):
self.get_logger().info(f"Realsense depth AR service: {req.data}")
self.realsense_depth_ar = req.data
res.success = True
return res

def realsense_body_pose_ar_callback(self, req, res):
self.get_logger().info(f"Realsense body pose AR service: {req.data}")
self.realsense_body_pose_ar = req.data
res.success = True
return res

def gripper_depth_ar_callback(self, req, res):
self.get_logger().info(f"Gripper depth AR service: {req.data}")
self.gripper_depth_ar = req.data
Expand Down Expand Up @@ -611,6 +717,11 @@ def realsense_rgb_cb(
with self.latest_realsense_rgb_image_lock:
self.latest_realsense_rgb_image = rgb_ros_image

def realsense_body_landmarks_cb(self, msg):
with self.latest_body_landmarks_str_lock:
self.latest_body_landmarks_str_recv_time = self.get_clock().now()
self.latest_body_landmarks_str = msg.data

def process_realsense_image(
self,
rgb_ros_image: Union[CompressedImage, Image],
Expand All @@ -619,12 +730,22 @@ def process_realsense_image(
if isinstance(rgb_ros_image, CompressedImage):
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
for image_config_name in self.realsense_params:
# Overlay the pointcloud *before* cropping/masking/rotating the image,
# Perform overlays *before* cropping/masking/rotating the image,
# for consistent (de)projection and transformations
if self.realsense_depth_ar:
with self.latest_realsense_depth_image_lock:
depth_msg = self.latest_realsense_depth_image
image = self.overlay_realsense_depth_ar(depth_msg, image)
if self.realsense_body_pose_ar:
with self.latest_body_landmarks_str_lock:
body_landmarks_str = self.latest_body_landmarks_str
body_landmarks_str_recv_time = (
self.latest_body_landmarks_str_recv_time
)
if body_landmarks_str is not None and len(body_landmarks_str) > 0:
image = self.overlay_realsense_body_pose_ar(
body_landmarks_str, body_landmarks_str_recv_time, image
)
img = self.configure_images(image, self.realsense_params[image_config_name])
# if self.aruco_markers: img = self.aruco_markers_callback(marker_msg, img)
self.realsense_images[image_config_name] = img
Expand Down Expand Up @@ -838,20 +959,14 @@ def overlay_gripper_depth_ar(

# Convert filtered points to (u, v) indices
pts_in_range = pcl_cloud_filtered.to_array()
pts_in_range = np.hstack((pts_in_range, np.ones((pts_in_range.shape[0], 1))))
coords = np.matmul(self.gripper_P, np.transpose(pts_in_range)) # 3 x N
u_idx = (coords[0, :] / coords[2, :]).astype(int)
v_idx = (coords[1, :] / coords[2, :]).astype(int)
uv = np.vstack((u_idx, v_idx)).T # N x 2
uv_dedup = np.unique(uv, axis=0)
in_bounds_idx = np.where(
(uv_dedup[:, 0] >= 0)
& (uv_dedup[:, 0] < image.shape[1])
& (uv_dedup[:, 1] >= 0)
& (uv_dedup[:, 1] < image.shape[0])
uv_mask = project_points_to_pixels(
pts_in_range,
self.gripper_P,
width=image.shape[1],
height=image.shape[0],
)
u_mask = uv_dedup[in_bounds_idx][:, 0]
v_mask = uv_dedup[in_bounds_idx][:, 1]
u_mask = uv_mask[:, 0]
v_mask = uv_mask[:, 1]

# Overlay the pixels in the robot's reach
overlay_mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)
Expand Down
Loading

0 comments on commit e6847ec

Please sign in to comment.