We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
how do you calculate the optical flow from depth and pose? I read the code you metioned in data_type.md, but I always get error and here is my code
import os from PIL import Image import cv2 import numpy as np from scipy.spatial.transform import Rotation import torch import vis as vis T_EDN2NED = np.array( [ [0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], ], dtype=float, ) T_NED2EDN = np.array( [ [0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0], ], dtype=float, ) def convert_depth_image_to_camera_coordinates(depth: np.ndarray, intrinsic: np.ndarray): wIdx = np.linspace(0, depth.shape[1] - 1, depth.shape[1]) hIdx = np.linspace(0, depth.shape[0] - 1, depth.shape[0]) u, v = np.meshgrid(wIdx, hIdx) u = u.astype(float) v = v.astype(float) focal_x = intrinsic[0, 0] focal_y = intrinsic[1, 1] x = (u - intrinsic[0, 2]) * depth / focal_x y = (v - intrinsic[1, 2]) * depth / focal_y coor = np.zeros((3, depth.size), dtype=float) coor[0, :] = x.reshape((1, -1)) coor[1, :] = y.reshape((1, -1)) coor[2, :] = depth.reshape((1, -1)) # Rearrange u and v into a two-channel matrix. uv = np.stack([u, v], axis=0) return coor, uv def convert_camera_coordinates_to_image_coordinates( coor: np.ndarray, intrinsics: np.ndarray ): """ coor: A numpy column vector, 3x1. return: A numpy column vector, 2x1. """ x = intrinsics.dot(coor) x = x / x[2, :] return x[0:2, :] def calculate_optical_flow_from_depth_and_camera( depth_j: np.ndarray, pose_j2i: np.ndarray, intrinsic: np.ndarray, R_cam2motion: np.ndarray = np.eye(3), ): """ Calculate the optical flow of image 1 with respect to image 0. Thus, how the pixel of image 1 moves if they are projected onto the image plane of image 1. depth: Depth image of camera 1. pose: The camera pose measured in frame_0 (camera 0's reference frame). A 4x4 numpy matrix. intrinsics: A 3x3 numpy matrix. NOTE: The 3D reference frames of camera 0 and 1 have their z-axis pointing forward. This is NOT the same way in which AirSim represents a 3D point and orientation with respect to its global frame, which in turn has its z-axis pointing downwards. """ R_motion2cam = np.linalg.inv(R_cam2motion) # Calculate the 3D coordinates of the points in reference frame 1. coor_j, uv_j = convert_depth_image_to_camera_coordinates(depth_j, intrinsic) # Transform. R = pose_j2i[:3, :3] T = pose_j2i[:3, 3:4] coor_j_ned = R_cam2motion.dot(coor_j) coor_i_ned = R.dot(coor_j_ned) + T coor_i = R_motion2cam.dot(coor_i_ned) uv_i = convert_camera_coordinates_to_image_coordinates(coor_i, intrinsic) uv_i = uv_i.reshape((2, depth_j.shape[0], depth_j.shape[1])) dudv = uv_j - uv_i return dudv def create_transform_from_xyzq(t, q): R = Rotation.from_quat(q, scalar_first=False).as_matrix() T = np.eye(4) T[:3, :3] = R T[:3, 3] = t return T CAMERA_K = np.array([[320, 0, 320], [0, 320, 240], [0, 0, 1]]) data_root = "/home/liucy/CMIM/data_gen/neighborhood_sample_P002/P002" def run(i, j, check_gt = False): depth_i = np.load(os.path.join(data_root, "depth_left", f"{i:06d}_left_depth.npy")) depth_j = np.load(os.path.join(data_root, "depth_left", f"{j:06d}_left_depth.npy")) image_i = cv2.imread(os.path.join(data_root, "image_left", f"{i:06d}_left.png")) image_i = cv2.cvtColor(image_i, cv2.COLOR_BGR2RGB) image_j = cv2.imread(os.path.join(data_root, "image_left", f"{j:06d}_left.png")) image_j = cv2.cvtColor(image_j, cv2.COLOR_BGR2RGB) pose_data = np.loadtxt(os.path.join(data_root, "pose_left.txt")) pose_i = pose_data[i] pose_j = pose_data[j] pose_i = create_transform_from_xyzq(pose_i[:3], pose_i[3:]) pose_j = create_transform_from_xyzq(pose_j[:3], pose_j[3:]) pose_j2i = np.matmul(np.linalg.inv(pose_i), pose_j) pose_i2j = np.matmul(np.linalg.inv(pose_j), pose_i) """ pose_j2i = np.matmul(pose_i, np.linalg.inv(pose_j)) pose_i2j = np.matmul(pose_j, np.linalg.inv(pose_i)) """ flow = calculate_optical_flow_from_depth_and_camera( depth_i, pose_j2i, CAMERA_K, R_cam2motion=T_EDN2NED ) flow = flow.transpose(1, 2, 0) if check_gt: flow_gt = np.load(os.path.join(data_root, "flow", f"{i:06d}_{j:06d}_flow.npy")) delta_flow = flow_gt - flow print(f"delta_flow mean: {delta_flow.mean()}") print(f"delta_flow max: {delta_flow.max()}") print(f"delta_flow min: {delta_flow.min()}") print(f"delta_flow std: {delta_flow.std()}") print(f"delta_flow median: {np.median(delta_flow)}") print(f"------------------------------------------") return flow = torch.from_numpy(flow).unsqueeze(0).float() flow = vis.convert_uv_to_norm_grid(flow, align_corners=False) flow_img = vis.visualize_flow_as_vector_field(flow[0], step=24) flow_img.save(f"img/flow_{i}_to_{j}.png") _image_i = torch.from_numpy(image_i).permute(2, 0, 1).unsqueeze(0).float() warped_image_i = vis.apply_flow(_image_i, flow, align_corners=False, output_pil=True) warped_image_i = warped_image_i[0] _image_j = torch.from_numpy(image_j).permute(2, 0, 1).unsqueeze(0).float() warped_image_j = vis.apply_flow(_image_j, flow, align_corners=False, output_pil=True) warped_image_j = warped_image_j[0] # warped_image_i[0].save(f"img/warped_image_{i}_to_{j}.png") image_i_pil = Image.fromarray(image_i) image_j_pil = Image.fromarray(image_j) images = [image_i_pil, image_j_pil, warped_image_j, warped_image_i] _images = vis.make_grid(images, rows=2, cols=2) _images.save(f"img/warped_image_{i}_to_{j}.png") for i in [100, 150, 200, 250, 300]: for j in [i+10, i+20, i+30, i+40, i+50]: run(i, j) run(i, i+1, check_gt=True)
The text was updated successfully, but these errors were encountered:
No branches or pull requests
how do you calculate the optical flow from depth and pose? I read the code you metioned in data_type.md, but I always get error and here is my code
The text was updated successfully, but these errors were encountered: