Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

About optical ground truth #55

Open
boilcy opened this issue Sep 20, 2024 · 0 comments
Open

About optical ground truth #55

boilcy opened this issue Sep 20, 2024 · 0 comments

Comments

@boilcy
Copy link

boilcy commented Sep 20, 2024

how do you calculate the optical flow from depth and pose? I read the code you metioned in data_type.md, but I always get error and here is my code

import os
from PIL import Image
import cv2
import numpy as np
from scipy.spatial.transform import Rotation
import torch
import vis as vis

T_EDN2NED = np.array(
    [
        [0.0, 0.0, 1.0],
        [1.0, 0.0, 0.0],
        [0.0, 1.0, 0.0],
    ],
    dtype=float,
)

T_NED2EDN = np.array(
    [
        [0.0, 1.0, 0.0],
        [0.0, 0.0, 1.0],
        [1.0, 0.0, 0.0],
    ],
    dtype=float,
)


def convert_depth_image_to_camera_coordinates(depth: np.ndarray, intrinsic: np.ndarray):
    wIdx = np.linspace(0, depth.shape[1] - 1, depth.shape[1])
    hIdx = np.linspace(0, depth.shape[0] - 1, depth.shape[0])

    u, v = np.meshgrid(wIdx, hIdx)

    u = u.astype(float)
    v = v.astype(float)

    focal_x = intrinsic[0, 0]
    focal_y = intrinsic[1, 1]

    x = (u - intrinsic[0, 2]) * depth / focal_x
    y = (v - intrinsic[1, 2]) * depth / focal_y

    coor = np.zeros((3, depth.size), dtype=float)
    coor[0, :] = x.reshape((1, -1))
    coor[1, :] = y.reshape((1, -1))
    coor[2, :] = depth.reshape((1, -1))

    # Rearrange u and v into a two-channel matrix.
    uv = np.stack([u, v], axis=0)

    return coor, uv


def convert_camera_coordinates_to_image_coordinates(
    coor: np.ndarray, intrinsics: np.ndarray
):
    """
    coor: A numpy column vector, 3x1.
    return: A numpy column vector, 2x1.
    """
    x = intrinsics.dot(coor)
    x = x / x[2, :]

    return x[0:2, :]


def calculate_optical_flow_from_depth_and_camera(
    depth_j: np.ndarray,
    pose_j2i: np.ndarray,
    intrinsic: np.ndarray,
    R_cam2motion: np.ndarray = np.eye(3),
):
    """
        Calculate the optical flow of image 1 with respect to image 0. Thus, how the pixel of image
        1 moves if they are projected onto the image plane of image 1.

    depth: Depth image of camera 1.
    pose: The camera pose measured in frame_0 (camera 0's reference frame). A 4x4 numpy matrix.
    intrinsics: A 3x3 numpy matrix.

    NOTE: The 3D reference frames of camera 0 and 1 have their z-axis pointing forward. This is NOT
    the same way in which AirSim represents a 3D point and orientation with respect to its global
    frame, which in turn has its z-axis pointing downwards.
    """

    R_motion2cam = np.linalg.inv(R_cam2motion)

    # Calculate the 3D coordinates of the points in reference frame 1.
    coor_j, uv_j = convert_depth_image_to_camera_coordinates(depth_j, intrinsic)

    # Transform.
    R = pose_j2i[:3, :3]
    T = pose_j2i[:3, 3:4]

    coor_j_ned = R_cam2motion.dot(coor_j)

    coor_i_ned = R.dot(coor_j_ned) + T

    coor_i = R_motion2cam.dot(coor_i_ned)

    uv_i = convert_camera_coordinates_to_image_coordinates(coor_i, intrinsic)

    uv_i = uv_i.reshape((2, depth_j.shape[0], depth_j.shape[1]))

    dudv = uv_j - uv_i

    return dudv


def create_transform_from_xyzq(t, q):
    R = Rotation.from_quat(q, scalar_first=False).as_matrix()
    T = np.eye(4)
    T[:3, :3] = R
    T[:3, 3] = t
    return T

CAMERA_K = np.array([[320, 0, 320], [0, 320, 240], [0, 0, 1]])

data_root = "/home/liucy/CMIM/data_gen/neighborhood_sample_P002/P002"

def run(i, j, check_gt = False):
    depth_i = np.load(os.path.join(data_root, "depth_left", f"{i:06d}_left_depth.npy"))
    depth_j = np.load(os.path.join(data_root, "depth_left", f"{j:06d}_left_depth.npy"))

    image_i = cv2.imread(os.path.join(data_root, "image_left", f"{i:06d}_left.png"))
    image_i = cv2.cvtColor(image_i, cv2.COLOR_BGR2RGB)
    image_j = cv2.imread(os.path.join(data_root, "image_left", f"{j:06d}_left.png"))
    image_j = cv2.cvtColor(image_j, cv2.COLOR_BGR2RGB)

    pose_data = np.loadtxt(os.path.join(data_root, "pose_left.txt"))
    pose_i = pose_data[i]
    pose_j = pose_data[j]
    pose_i = create_transform_from_xyzq(pose_i[:3], pose_i[3:])
    pose_j = create_transform_from_xyzq(pose_j[:3], pose_j[3:])



    pose_j2i = np.matmul(np.linalg.inv(pose_i), pose_j)
    pose_i2j = np.matmul(np.linalg.inv(pose_j), pose_i) 


    
    """ pose_j2i = np.matmul(pose_i, np.linalg.inv(pose_j))
    pose_i2j = np.matmul(pose_j, np.linalg.inv(pose_i)) """
   

    flow = calculate_optical_flow_from_depth_and_camera(
        depth_i, pose_j2i, CAMERA_K, R_cam2motion=T_EDN2NED
    )
    flow = flow.transpose(1, 2, 0)

    if check_gt:
        flow_gt = np.load(os.path.join(data_root, "flow", f"{i:06d}_{j:06d}_flow.npy"))
        delta_flow = flow_gt - flow
        print(f"delta_flow mean: {delta_flow.mean()}")
        print(f"delta_flow max: {delta_flow.max()}")
        print(f"delta_flow min: {delta_flow.min()}")
        print(f"delta_flow std: {delta_flow.std()}")
        print(f"delta_flow median: {np.median(delta_flow)}")
        print(f"------------------------------------------")
        return

    flow = torch.from_numpy(flow).unsqueeze(0).float()

    flow = vis.convert_uv_to_norm_grid(flow, align_corners=False)

    flow_img = vis.visualize_flow_as_vector_field(flow[0], step=24)
    flow_img.save(f"img/flow_{i}_to_{j}.png")

    _image_i = torch.from_numpy(image_i).permute(2, 0, 1).unsqueeze(0).float()
    warped_image_i = vis.apply_flow(_image_i, flow, align_corners=False, output_pil=True)
    warped_image_i = warped_image_i[0]
    _image_j = torch.from_numpy(image_j).permute(2, 0, 1).unsqueeze(0).float()
    warped_image_j = vis.apply_flow(_image_j, flow, align_corners=False, output_pil=True)
    warped_image_j = warped_image_j[0]
    # warped_image_i[0].save(f"img/warped_image_{i}_to_{j}.png")

    image_i_pil = Image.fromarray(image_i)
    image_j_pil = Image.fromarray(image_j)


    images = [image_i_pil, image_j_pil, warped_image_j, warped_image_i]

    _images = vis.make_grid(images, rows=2, cols=2)
    _images.save(f"img/warped_image_{i}_to_{j}.png")

for i in [100, 150, 200, 250, 300]:
    for j in [i+10, i+20, i+30, i+40, i+50]:
        run(i, j)
    run(i, i+1, check_gt=True)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant