From 5979f5facd1119acde769f7668798d4e0ec747c2 Mon Sep 17 00:00:00 2001
From: Gary Lvov <glvov@theaiinstitute.com>
Date: Fri, 13 Sep 2024 15:06:58 -0400
Subject: [PATCH] Add camera to robot calibration support

---
 spot_wrapper/calibration/README.md            |  16 +-
 ...te_multistereo_cameras_with_charuco_cli.py |   9 +-
 .../calibrate_spot_hand_camera_cli.py         | 146 ++++++++++--------
 spot_wrapper/calibration/calibration_util.py  | 134 ++++++++++++----
 .../spot_in_hand_camera_calibration.py        |   5 +-
 5 files changed, 205 insertions(+), 105 deletions(-)

diff --git a/spot_wrapper/calibration/README.md b/spot_wrapper/calibration/README.md
index 2208ef0..361bbff 100644
--- a/spot_wrapper/calibration/README.md
+++ b/spot_wrapper/calibration/README.md
@@ -27,7 +27,11 @@ camera calibration to **solve for the intrinsic and extrinsic parameters for two
 cameras mounted in a fixed pose relative to each other on a robot**
 based off of moving the robot to view a Charuco target from different poses. If you already 
 have an existing dataset of synchronized stereo (or multi-stereo) photos of a Charuco target from different viewpoints, 
-you can use the CLI tool to compute the intrinsic/extrinsic parameters. Additionally,
+you can use the CLI tool to compute the intrinsic/extrinsic parameters. 
+
+Additionally, if you have saved the poses the images are taken at, you can also calibrate
+the camera to robot extrinsic (eye-to-hand registration).
+
 the CLI tool's saving capability allows to store multiple unique calibration runs in one configuration file
 with calibration metadata, to document related runs with different setups or parameters.
 
@@ -205,8 +209,15 @@ existing_dataset/
 │   ├── 0.png # taken at viewpoint 0
 │   ├── 1.png
 │   └── 2.png
+├── poses/ # optional, for camera to robot cal
+│   ├── 0.npy # base to planning frame 4x4 homgenous transform at viewpoint 0
+│   ├── 1.npy
+│   └── 2.npy
 ```
 
+Optionally, you can also include pose information, to find the camera to robot extrinsic.
+
+
 To see all possible arguments for calibration, please run  ```python3 calibrate_multistereo_cameras_with_charuco_cli.py -h```.
     Many parameters such as board proportions and Agmruco dictionary are customizable.
 
@@ -240,6 +251,9 @@ default:
       0:  # reference camera index, second sublevel
         R: flattened_3x3_rotation_matrix_from_primary_to_reference_camera_now_9x1
         T: 3x1_translation_matrix_from_primary_to_reference_camera
+      planning_frame: # the planning frame (in Spot's case, the hand)
+       R: flattened_3x3_rotation_matrix_from_primary_to_robot_planning_frame_now_9x1
+       T: 3x1_translation_matrix_from_primary_to_robot_planning_frame
   run_param:
     num_images: 729
     timestamp: '2024-08-19 03:43:06'
diff --git a/spot_wrapper/calibration/calibrate_multistereo_cameras_with_charuco_cli.py b/spot_wrapper/calibration/calibrate_multistereo_cameras_with_charuco_cli.py
index 9e1314b..82452d9 100644
--- a/spot_wrapper/calibration/calibrate_multistereo_cameras_with_charuco_cli.py
+++ b/spot_wrapper/calibration/calibrate_multistereo_cameras_with_charuco_cli.py
@@ -13,7 +13,7 @@
     create_charuco_board,
     create_ideal_charuco_image,
     detect_charuco_corners,
-    load_images_from_path,
+    load_dataset_from_path,
     multistereo_calibration_charuco,
     save_calibration_parameters,
 )
@@ -64,6 +64,7 @@ def calibration_helper(
     args: argparse.Namespace,
     charuco: cv2.aruco_CharucoBoard,
     aruco_dict: cv2.aruco_Dictionary,
+    poses: np.ndarray
 ):
     logger.warning(
         f"Calibrating from {len(images)} images.. for every "
@@ -84,6 +85,7 @@ def calibration_helper(
         desired_stereo_pairs=args.stereo_pairs,
         charuco_board=charuco,
         aruco_dict=aruco_dict,
+        poses=poses
     )
     logger.info(f"Finished script, obtained {calibration}")
     logger.info("Saving calibration param")
@@ -115,8 +117,9 @@ def main():
     logger.info(f"Loading images from {args.data_path}")
 
     if args.data_path is not None:
-        images = load_images_from_path(args.data_path)
-        calibration_helper(images=images, args=args, charuco=charuco, aruco_dict=aruco_dict)
+        images, poses = load_dataset_from_path(args.data_path)
+        calibration_helper(images=images, args=args, charuco=charuco, aruco_dict=aruco_dict,
+                           poses=poses)
     else:
         logger.warning("Could not load any images to calibrate from, supply --data_path")
 
diff --git a/spot_wrapper/calibration/calibrate_spot_hand_camera_cli.py b/spot_wrapper/calibration/calibrate_spot_hand_camera_cli.py
index 4803396..f0b2553 100644
--- a/spot_wrapper/calibration/calibrate_spot_hand_camera_cli.py
+++ b/spot_wrapper/calibration/calibrate_spot_hand_camera_cli.py
@@ -13,7 +13,7 @@
 )
 from spot_wrapper.calibration.calibration_util import (
     get_multiple_perspective_camera_calibration_dataset,
-    load_images_from_path,
+    load_dataset_from_path,
 )
 from spot_wrapper.calibration.spot_in_hand_camera_calibration import (
     SpotInHandCalibration,
@@ -25,9 +25,29 @@
 
 logger = logging.getLogger(__name__)
 
+def create_robot(args, charuco, aruco_dict): # Replace with your AutomaticCameraCalibrationRobot
+    in_hand_bot = SpotInHandCalibration(args.ip, args.username, args.password)
+    in_hand_bot._set_localization_param(
+        charuco_board=charuco,
+        aruco_dict=aruco_dict,
+        resolution=(
+            args.spot_rgb_photo_width,
+            args.spot_rgb_photo_height,
+        ),
+    )
+    try:
+        args.robot_name = in_hand_bot.robot.get_cached_robot_id().nickname
+    except Exception:
+        logger.warning("Could not determine cached robot nickname, saving name as unknown")
+        args.robot_name = "unknown"
+    return in_hand_bot, args
+
+def create_robot_parser():
+    parser = calibrate_robot_cli()
+    return spot_cli(parser=parser) # Replace with robot specific parsing
 
 def spot_main() -> None:
-    parser = spot_cli(calibrator_cli())
+    parser = create_robot_parser()
     args, aruco_dict, charuco = setup_calibration_param(parser)
 
     if not args.from_data:
@@ -35,25 +55,11 @@ def spot_main() -> None:
         logger.warning("HOLD Ctrl + C NOW TO CANCEL")
         logger.warning("The calibration board should be about a meter away with nothing within a meter of the robot.")
         logger.warning("The robot should NOT be docked, and nobody should have robot control")
-        sleep(5)
-
-        in_hand_bot = SpotInHandCalibration(args.ip, args.username, args.password)
-        in_hand_bot._set_localization_param(
-            charuco_board=charuco,
-            aruco_dict=aruco_dict,
-            resolution=(
-                args.spot_rgb_photo_width,
-                args.spot_rgb_photo_height,
-            ),
-        )
+        #sleep(5)
 
-        try:
-            args.robot_name = in_hand_bot.robot.get_cached_robot_id().nickname
-        except Exception:
-            logger.warning("Could not determine cached robot nickname, saving name as unknown")
-            args.robot_name = "unknown"
+        in_hand_bot, args = create_robot(args, charuco=charuco, aruco_dict=aruco_dict)
 
-        images = get_multiple_perspective_camera_calibration_dataset(
+        images, poses = get_multiple_perspective_camera_calibration_dataset(
             auto_cam_cal_robot=in_hand_bot,
             max_num_images=args.max_num_images,
             distances=np.arange(*args.dist_from_board_viewpoint_range),
@@ -67,42 +73,16 @@ def spot_main() -> None:
         )
     else:
         logger.info(f"Loading images from {args.data_path}")
-        images = load_images_from_path(args.data_path)
+        images, poses = load_dataset_from_path(args.data_path)
 
-    calibration_helper(images=images, args=args, charuco=charuco, aruco_dict=aruco_dict)
+    calibration_helper(images=images, args=args, charuco=charuco, aruco_dict=aruco_dict,
+                       poses=poses)
 
 
-def spot_cli(parser=argparse.ArgumentParser) -> argparse.ArgumentParser:
+def calibrate_robot_cli(parser:argparse.ArgumentParser = None) -> argparse.ArgumentParser:
     if parser is None:
         parser = calibrator_cli()
-    parser.add_argument(
-        "--ip",
-        "-i",
-        "-ip",
-        dest="ip",
-        type=str,
-        help="The IP address of the Robot to calibrate",
-        required=True,
-    )
-    parser.add_argument(
-        "--user",
-        "-u",
-        "--username",
-        dest="username",
-        type=str,
-        help="Robot Username",
-        required=True,
-    )
-    parser.add_argument(
-        "--pass",
-        "-pw",
-        "--password",
-        dest="password",
-        type=str,
-        help="Robot Password",
-        required=True,
-    )
-
+   
     parser.add_argument(
         "--dist_from_board_viewpoint_range",
         "-dfbvr",
@@ -167,22 +147,6 @@ def spot_cli(parser=argparse.ArgumentParser) -> argparse.ArgumentParser:
         help="How long to wait after movement to take a picture; don't want motion blur",
     )
 
-    parser.add_argument(
-        "--spot_rgb_photo_width",
-        "-dpw",
-        type=int,
-        default=640,
-        dest="spot_rgb_photo_width",
-        help="What resolution use for Spot's RGB Hand Camera (width). Currently, only 640 and 1920 are supported",
-    )
-
-    parser.add_argument(
-        "--spot_rgb_photo_height",
-        "-dph",
-        type=int,
-        default=480,
-        help="What resolution use for Spot's RGB Hand Camera (width). Currently, only 480 and 1080 are supported",
-    )
     # path things
     parser.add_argument(
         "--save_data",
@@ -202,6 +166,56 @@ def spot_cli(parser=argparse.ArgumentParser) -> argparse.ArgumentParser:
 
     return parser
 
+def spot_cli(parser: argparse.ArgumentParser = None) -> argparse.ArgumentParser:
+    if parser is None:
+        parser = calibrate_robot_cli()
+
+    parser.add_argument(
+        "--ip",
+        "-i",
+        "-ip",
+        dest="ip",
+        type=str,
+        help="The IP address of the Robot to calibrate",
+        required=True,
+    )
+    parser.add_argument(
+        "--user",
+        "-u",
+        "--username",
+        dest="username",
+        type=str,
+        help="Robot Username",
+        required=True,
+    )
+    parser.add_argument(
+        "--pass",
+        "-pw",
+        "--password",
+        dest="password",
+        type=str,
+        help="Robot Password",
+        required=True,
+    )
+
+    parser.add_argument(
+        "--spot_rgb_photo_width",
+        "-dpw",
+        type=int,
+        default=640,
+        dest="spot_rgb_photo_width",
+        help="What resolution use for Spot's RGB Hand Camera (width). Currently, only 640 and 1920 are supported",
+    )
+
+    parser.add_argument(
+        "--spot_rgb_photo_height",
+        "-dph",
+        type=int,
+        default=480,
+        help="What resolution use for Spot's RGB Hand Camera (width). Currently, only 480 and 1080 are supported",
+    )
+    return parser
+
 
 if __name__ == "__main__":
     spot_main()
diff --git a/spot_wrapper/calibration/calibration_util.py b/spot_wrapper/calibration/calibration_util.py
index 19951c1..d12de07 100644
--- a/spot_wrapper/calibration/calibration_util.py
+++ b/spot_wrapper/calibration/calibration_util.py
@@ -110,7 +110,7 @@ def get_multiple_perspective_camera_calibration_dataset(
     settle_time: float = 0.1,
     data_path: str = os.path.expanduser("~"),
     save_data: Optional[bool] = True,
-) -> np.ndarray:
+) -> Tuple[np.ndarray, np.ndarray]:
     """
     Move the robot around to look at the calibration target from different viewpoints,
     capturing time-synchronized
@@ -168,6 +168,7 @@ def get_multiple_perspective_camera_calibration_dataset(
     calibration_images = []
     logger.info("Beginning Calibration")
     idx = 0
+    poses = []
     while idx < max_num_images and idx < len(viewpoints):
         logger.info(f"Visiting viewpoint {idx} of {min(len(viewpoints), max_num_images)}")
         viewpoint = viewpoints[idx]
@@ -176,6 +177,10 @@ def get_multiple_perspective_camera_calibration_dataset(
             origin_t_planning_frame=primed_pose,
             duration_sec=0.1,
         )
+        # base_to_hand1 = initial_pose
+        # base_to_hand2 = new_pose
+        # hand1_to_hand2 = np.linalg.inv(base_to_hand1) * base_to_hand2
+        poses.append(new_pose)
         logger.info("At viewpoint, waiting to settle")
         sleep(settle_time)
         images = auto_cam_cal_robot.capture_images()
@@ -191,9 +196,10 @@ def get_multiple_perspective_camera_calibration_dataset(
                     os.path.join(data_path, str(jdx), f"{idx}.png"),
                     image,
                 )
+            np.save(os.path.join(data_path, "poses", f"{idx}.npy"), new_pose)
     calibration_images = np.array(calibration_images, dtype=object)
     auto_cam_cal_robot.shutdown()
-    return np.array(calibration_images, dtype=object)
+    return (np.array(calibration_images, dtype=object), poses)
 
 
 def multistereo_calibration_charuco(
@@ -203,6 +209,7 @@ def multistereo_calibration_charuco(
     aruco_dict: cv2.aruco_Dictionary = SPOT_DEFAULT_ARUCO_DICT,
     camera_matrices: Optional[Dict] = {},
     dist_coeffs: Optional[Dict] = {},
+    poses: Union[np.ndarray, None] = None
 ) -> Dict:
     """
     Calibrates the intrinsic and extrinsic parameters for multiple stereo camera pairs
@@ -318,6 +325,7 @@ def multistereo_calibration_charuco(
                     dist_coeffs_origin=dist_coeffs[origin_camera_idx],
                     camera_matrix_reference=camera_matrices[reference_camera_idx],
                     dist_coeffs_reference=dist_coeffs[reference_camera_idx],
+                    poses=poses
                 )
                 camera_matrices[origin_camera_idx] = stereo_dict["camera_matrix_origin"]
                 dist_coeffs[origin_camera_idx] = stereo_dict["dist_coeffs_origin"]
@@ -536,7 +544,7 @@ def calibrate_single_camera_charuco(
     charuco_board: cv2.aruco_CharucoBoard = SPOT_DEFAULT_CHARUCO,
     aruco_dict: cv2.aruco_Dictionary = SPOT_DEFAULT_ARUCO_DICT,
     debug_str: str = "",
-) -> Tuple[np.ndarray, np.ndarray]:
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
     """
     Calibrate a monocular camera from a charuco board.
 
@@ -554,12 +562,13 @@ def calibrate_single_camera_charuco(
         ValueError: Not enough data to calibrate
 
     Returns:
-        Tuple[np.ndarray, np.ndarray]: the camera matrix and distortion coefficients
+        Tuple[np.ndarray, np.ndarray]: the camera matrix, distortion coefficients,
+        rotation matrices, tvecs
     """
     all_corners = []
     all_ids = []
     img_size = None
-
+    #used_image_ids = []
     for idx, img in enumerate(images):
         if img_size is None:
             img_size = img.shape[:2][::-1]
@@ -569,19 +578,21 @@ def calibrate_single_camera_charuco(
         if charuco_corners is not None and len(charuco_corners) >= 8:
             all_corners.append(charuco_corners)
             all_ids.append(charuco_ids)
+            #used_image_ids.append(idx)
         else:
             logger.warning(f"Not enough corners detected in image index {idx} {debug_str}; ignoring")
 
     if len(all_corners) > 1:
         obj_points_all = []
         img_points = []
+        ids = []
         for corners, ids in zip(all_corners, all_ids):
             obj_points = get_charuco_board_object_points(charuco_board, ids)
             obj_points_all.append(obj_points)
             img_points.append(corners)
         logger.info(f"About to process {len(obj_points_all)} points for single camera cal | {debug_str}")
         # Long term improvement: could pull tvec for use to localize camera relative to robot?
-        _, camera_matrix, dist_coeffs, _, _ = cv2.calibrateCamera(  # use LU flag critical for speed
+        _, camera_matrix, dist_coeffs, rvecs, tvecs = cv2.calibrateCamera(  # use LU flag critical for speed
             obj_points_all,
             img_points,
             img_size,
@@ -589,8 +600,8 @@ def calibrate_single_camera_charuco(
             None,
             flags=cv2.CALIB_USE_LU,
         )
-
-        return camera_matrix, dist_coeffs
+        rmats = np.array([cv2.Rodrigues(rvec)[0] for rvec in rvecs])
+        return camera_matrix, dist_coeffs, rmats, tvecs
     else:
         raise ValueError(f"Not enough valid points to individually calibrate {debug_str}")
 
@@ -604,6 +615,7 @@ def stereo_calibration_charuco(
     dist_coeffs_origin: Optional[np.ndarray] = None,
     camera_matrix_reference: Optional[np.ndarray] = None,
     dist_coeffs_reference: Optional[np.ndarray] = None,
+    poses: Union[np.ndarray, None] = None
 ) -> Dict:
     """
     Perform a stereo calibration from a set of synchronized stereo images of a charuco calibration
@@ -635,20 +647,24 @@ def stereo_calibration_charuco(
     """
     if camera_matrix_origin is None or dist_coeffs_origin is None:
         logger.info("Calibrating Origin Camera individually")
-        camera_matrix_origin, dist_coeffs_origin = calibrate_single_camera_charuco(
+        (camera_matrix_origin, dist_coeffs_origin, 
+         rmats_origin, tvecs_origin) = (
+            calibrate_single_camera_charuco(
             images=origin_images,
             charuco_board=charuco_board,
             aruco_dict=aruco_dict,
             debug_str="for origin camera",
-        )
+        ))
     if camera_matrix_reference is None or dist_coeffs_origin is None:
         logger.info("Calibrating reference Camera individually ")
-        camera_matrix_reference, dist_coeffs_reference = calibrate_single_camera_charuco(
+        (camera_matrix_reference, dist_coeffs_reference, 
+         rmats_reference, tvecs_reference) = (
+            calibrate_single_camera_charuco(
             images=reference_images,
             charuco_board=charuco_board,
             aruco_dict=aruco_dict,
             debug_str="for reference camera",
-        )
+        ))
 
     if camera_matrix_origin is None or camera_matrix_reference is None:
         raise ValueError("Could not calibrate one of the cameras as desired")
@@ -658,8 +674,14 @@ def stereo_calibration_charuco(
     all_ids_origin = []
     all_ids_reference = []
     img_size = None
+    
+    no_poses = poses is None
+    if no_poses:
+        poses = [x for x in range(len(origin_images))]
+    else:
+        filtered_poses = []
 
-    for origin_img, reference_img in zip(origin_images, reference_images):
+    for (origin_img, reference_img, pose) in zip(origin_images, reference_images, poses):
         if img_size is None:
             img_size = origin_img.shape[:2][::-1]
 
@@ -667,7 +689,9 @@ def stereo_calibration_charuco(
         reference_charuco_corners, reference_charuco_ids = detect_charuco_corners(
             reference_img, charuco_board, aruco_dict
         )
-
+        
+        if not no_poses:
+            filtered_poses.append(pose)
         if origin_charuco_corners is not None and reference_charuco_corners is not None:
             all_corners_origin.append(origin_charuco_corners)
             all_corners_reference.append(reference_charuco_corners)
@@ -698,7 +722,6 @@ def stereo_calibration_charuco(
 
         if len(obj_points_all) > 0:
             logger.info(f"Collected {len(obj_points_all)} shared point sets for stereo calibration.")
-            # logger.info(f"{np.array(obj_points_all).shape = } {np.array()}")
             _, _, _, _, _, R, T, _, _ = cv2.stereoCalibrate(
                 obj_points_all,
                 img_points_origin,
@@ -716,8 +739,7 @@ def stereo_calibration_charuco(
                 flags=cv2.CALIB_USE_LU,
             )
             logger.info("Stereo calibration completed.")
-
-            return {
+            result_dict = {
                 "dist_coeffs_origin": dist_coeffs_origin,
                 "camera_matrix_origin": camera_matrix_origin,
                 "image_dim_origin": np.array(origin_images[0].shape[:2]),
@@ -727,6 +749,40 @@ def stereo_calibration_charuco(
                 "R": R,
                 "T": T,
             }
+
+            if not no_poses:
+                logger.info("Attempting hand-eye calibation....")
+                #filtered_poses = np.array([np.linalg.inv(pose) for pose in filtered_poses])
+                filtered_poses = np.array(filtered_poses)
+                # Use the filtered poses for the target-to-camera transformation
+                R_gripper2base = np.array([pose[:3, :3] for pose in filtered_poses])
+                t_gripper2base = np.array([pose[:3, 3] for pose in filtered_poses])
+
+                R_handeye, T_handeye = cv2.calibrateHandEye(
+                    R_gripper2base=R_gripper2base,
+                    t_gripper2base=t_gripper2base,
+                    R_target2cam=rmats_origin,
+                    t_target2cam=tvecs_origin,
+                    #method=cv2.CALIB_HAND_EYE_DANIILIDIS
+                    method=cv2.CALIB_HAND_EYE_HORAUD
+                )
+                robot_to_cam = np.eye(4)  # Initialize 4x4 identity matrix
+                robot_to_cam[:3, :3] = R_handeye  # Populate rotation
+                robot_to_cam[:3, 3] = T_handeye.flatten()  # Populate translation
+
+                # Invert the homogeneous matrix
+                cam_to_robot = np.linalg.inv(robot_to_cam)
+
+                # Extract rotation and translation from the inverted matrix
+                camera_to_robot_R = cam_to_robot[:3, :3]  # Extract rotation
+                camera_to_robot_T = cam_to_robot[:3, 3]  # Extract translation
+                logger.info("Hand-eye calibration completed.")
+
+                # Add the hand-eye calibration results to the final result dictionary
+                result_dict["R_handeye"] = camera_to_robot_R
+                result_dict["T_handeye"] = camera_to_robot_T
+            
+            return result_dict
         else:
             raise ValueError("Not enough valid points for stereo calibration.")
     else:
@@ -942,13 +998,16 @@ def create_calibration_save_folders(path: str, num_folders: int) -> None:
 
             logger.info(f"Creating image folder at {cam_path}")
             os.makedirs(cam_path, exist_ok=True)
+        os.makedirs(os.path.join(path, "poses"))
         logger.info(f"Done creating {num_folders} folders.")
 
 
-def load_images_from_path(path: str) -> np.ndarray:
+def load_dataset_from_path(path: str) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
     """
     Load image dataset from path in a way that's compatible with multistereo_calibration_charuco.
 
+    Also, load the poses if they are available.
+
     See Using the CLI Tool To Calibrate On an Existing Dataset section in the README
     to see the expected folder/data structure for this method to work
 
@@ -961,34 +1020,38 @@ def load_images_from_path(path: str) -> np.ndarray:
     Returns:
         np.ndarray: The image dataset
     """
+    alpha_numeric = lambda x: re.search(r"(\d+)(?=\D*$)", x).group() if \
+        re.search(r"(\d+)(?=\D*$)", x) else x
+
     # List all directories within the given path and sort them
     dirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
     if len(dirs) == 0:
         raise ValueError("No sub-dirs found in datapath from which to load images.")
-    dirs = sorted(dirs, key=lambda x: int(x))  # Assuming dir names are integers like "0", "1", etc.
+    dirs = sorted(dirs, key=alpha_numeric)  # Assuming dir names are integers like "0", "1", etc.
 
     # Initialize an empty list to store images
     images = []
-
+    poses = None
+   
     for dir_name in dirs:
-        # Construct the glob pattern for the images in the current directory
-        pic_path_match = os.path.join(path, dir_name, "*")
-        # Get a sorted list of image files based on the numerical order in their filenames
-        image_files = sorted(
-            glob(pic_path_match),
-            key=lambda x: int(re.search(r"(\d+)(?=\D*$)", x).group()),
-        )
-        # Read images and store them
-        images.append([cv2.imread(fn) for fn in image_files])
+        path_match = os.path.join(path, dir_name, "*")
+        files = sorted(
+                glob(path_match),
+                key=alpha_numeric,
+            )
+        if dir_name != "poses":
+            # Read images and store them
+            images.append([cv2.imread(fn) for fn in files])
+        else:
+            poses = np.array([np.load(fn) for fn in files])
 
     # Convert the list of lists into a NumPy array
     # The array shape will be (number_of_images, number_of_directories)
     images = np.array(images, dtype=object)
-
     # Transpose the array so that you can access it as images[:, axis]
     images = np.transpose(images, (1, 0))
 
-    return images
+    return images, poses
 
 
 def save_calibration_parameters(
@@ -1042,7 +1105,7 @@ def process_data_with_nested_dictionaries(
                     "image_dim": flatten_matrix(value["image_dim_reference"]),
                 }
 
-            # Store the rotation and translation in a nested dictionary structure
+            # Store the stereo calibration rotation and translation
             if origin_cam not in relations:
                 relations[origin_cam] = {}
             relations[origin_cam][reference_cam] = {
@@ -1050,6 +1113,13 @@ def process_data_with_nested_dictionaries(
                 "T": flatten_matrix(value["T"]),
             }
 
+            # Now add R_handeye and T_handeye if they exist in the data
+            if "R_handeye" in value and "T_handeye" in value:
+                relations[origin_cam]["planning_frame"] = {
+                    "R": flatten_matrix(value["R_handeye"]),
+                    "T": flatten_matrix(value["T_handeye"]),
+                }
+
         return cameras, relations
 
     # Handle empty or missing tag
diff --git a/spot_wrapper/calibration/spot_in_hand_camera_calibration.py b/spot_wrapper/calibration/spot_in_hand_camera_calibration.py
index 0adc7d8..d6e1ed4 100644
--- a/spot_wrapper/calibration/spot_in_hand_camera_calibration.py
+++ b/spot_wrapper/calibration/spot_in_hand_camera_calibration.py
@@ -42,7 +42,7 @@
     convert_camera_t_viewpoint_to_origin_t_planning_frame,
     est_camera_t_charuco_board_center,
 )
-
+from time import sleep
 logging.basicConfig(
     level=logging.INFO,
 )
@@ -230,8 +230,7 @@ def grab_state_as_transform() -> np.ndarray:
             command_id,
             timeout_sec=duration_sec * 2,
         )
-
-        return (initial_pose, new_pose)  # second value is new value
+        return (initial_pose, grab_state_as_transform())  # second value is new value
 
     def shutdown(self) -> None:
         stow_arm_command = RobotCommandBuilder.arm_stow_command()