From 4a64aacaec578523189710685820a229ecf1e5f3 Mon Sep 17 00:00:00 2001
From: Yixing Lao <yixing.lao@gmail.com>
Date: Sun, 13 Oct 2024 11:57:54 -0700
Subject: [PATCH] feat: standardized depth image and distance image terminology

---
 README.md            |   4 +-
 camtools/colormap.py |  20 ------
 camtools/convert.py  | 110 ++++++++++++++++++++++++++++++++
 camtools/project.py  |  31 ++++++---
 camtools/raycast.py  | 145 +++++++++++++++++++++++++++++++------------
 camtools/solver.py   |  26 ++++++++
 test/test_backend.py |   4 +-
 test/test_convert.py |  29 +++++++++
 test/test_raycast.py |  46 ++++++++++++++
 9 files changed, 342 insertions(+), 73 deletions(-)
 create mode 100644 test/test_raycast.py

diff --git a/README.md b/README.md
index 435dccf..7ce8cd3 100644
--- a/README.md
+++ b/README.md
@@ -60,8 +60,8 @@ clear and easy-to-use APIs.
    # Back-project depth image to 3D points.
    points = ct.project.im_depth_to_points(im_depth, K, T)
 
-   # Ray cast a triangle mesh to depth image.
-   im_depth = ct.raycast.mesh_to_depths(mesh, Ks, Ts, height, width)
+   # Ray cast a triangle mesh to depth image given the camera parameters.
+   im_depth = ct.raycast.mesh_to_im_depth(mesh, K, T, height, width)
 
    # And more...
    ```
diff --git a/camtools/colormap.py b/camtools/colormap.py
index 6c35cab..9786f88 100644
--- a/camtools/colormap.py
+++ b/camtools/colormap.py
@@ -50,23 +50,3 @@ def normalize(array, vmin=0.0, vmax=1.0, clip=False):
         array = (array - amin) / (amax - amin) * (vmax - vmin) + vmin
 
     return array
-
-
-def main():
-    """
-    Test create color map image.
-    """
-    height = 200
-    width = 1600
-
-    colors = query(np.linspace(0, 1, num=width))
-    im = np.zeros((height, width, 3), dtype=np.float32)
-    for i in range(width):
-        im[:, i : i + 1, :] = colors[i]
-
-    im_path = "colormap.png"
-    io.imwrite(im_path, im)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/camtools/convert.py b/camtools/convert.py
index 32a7fe8..fed149f 100644
--- a/camtools/convert.py
+++ b/camtools/convert.py
@@ -1,5 +1,8 @@
 import cv2
 import numpy as np
+import open3d as o3d
+from jaxtyping import Float
+from typing import Optional
 
 from . import sanity
 from . import convert
@@ -505,3 +508,110 @@ def spherical_to_T_towards_origin(radius, theta, phi):
     T = pose_to_T(pose)
 
     return T
+
+
+def mesh_to_lineset(
+    mesh: o3d.geometry.TriangleMesh,
+    color: Optional[Float[np.ndarray, "3"]] = None,
+) -> o3d.geometry.LineSet:
+    """
+    Convert Open3D mesh to Open3D lineset.
+    """
+    if not isinstance(mesh, o3d.geometry.TriangleMesh):
+        raise ValueError(f"Expected Open3D mesh, but got {type(mesh)}.")
+
+    vertices = np.asarray(mesh.vertices)
+    triangles = np.asarray(mesh.triangles)
+
+    edges = set()
+    for triangle in triangles:
+        edges.add(tuple(sorted([triangle[0], triangle[1]])))
+        edges.add(tuple(sorted([triangle[1], triangle[2]])))
+        edges.add(tuple(sorted([triangle[2], triangle[0]])))
+
+    edges = np.array(list(edges))
+
+    lineset = o3d.geometry.LineSet()
+    lineset.points = o3d.utility.Vector3dVector(vertices)
+    lineset.lines = o3d.utility.Vector2iVector(edges)
+
+    if color is not None:
+        if len(color) != 3:
+            raise ValueError(f"Expected color of shape (3,), but got {color.shape}.")
+        lineset.paint_uniform_color(color)
+
+    return lineset
+
+
+def im_distance_to_im_depth(
+    im_distance: Float[np.ndarray, "h w"],
+    K: Float[np.ndarray, "3 3"],
+) -> Float[np.ndarray, "h w"]:
+    """
+    Convert distance image to depth image.
+
+    Args:
+        im_distance: Distance image (H, W), float.
+        K: Camera intrinsic matrix (3, 3).
+
+    Returns:
+        Depth image (H, W), float.
+    """
+    if not im_distance.ndim == 2:
+        raise ValueError(
+            f"Expected im_distance of shape (H, W), but got {im_distance.shape}."
+        )
+    sanity.assert_K(K)
+    height, width = im_distance.shape
+    fx, fy = K[0, 0], K[1, 1]
+    cx, cy = K[0, 2], K[1, 2]
+    dtype = im_distance.dtype
+
+    u = np.arange(width)
+    v = np.arange(height)
+    u_grid, v_grid = np.meshgrid(u, v)
+
+    u_norm = (u_grid - cx) / fx
+    v_norm = (v_grid - cy) / fy
+    norm_square = u_norm**2 + v_norm**2
+    im_depth = im_distance / np.sqrt(norm_square + 1)
+    im_depth = im_depth.astype(dtype)
+
+    return im_depth
+
+
+def im_depth_to_im_distance(
+    im_depth: Float[np.ndarray, "h w"],
+    K: Float[np.ndarray, "3 3"],
+) -> Float[np.ndarray, "h w"]:
+    """
+    Convert depth image to distance image.
+
+    Args:
+        im_depth: Depth image (H, W), float.
+        K: Camera intrinsic matrix (3, 3).
+
+    Returns:
+        Distance image (H, W), float.
+    """
+    if not im_depth.ndim == 2:
+        raise ValueError(
+            f"Expected im_depth of shape (H, W), but got {im_depth.shape}."
+        )
+    sanity.assert_K(K)
+    height, width = im_depth.shape
+    fx, fy = K[0, 0], K[1, 1]
+    cx, cy = K[0, 2], K[1, 2]
+    dtype = im_depth.dtype
+
+    u = np.arange(width)
+    v = np.arange(height)
+    u_grid, v_grid = np.meshgrid(u, v)
+
+    u_norm = (u_grid - cx) / fx
+    v_norm = (v_grid - cy) / fy
+    norm_square = u_norm**2 + v_norm**2
+    im_distance = im_depth * np.sqrt(norm_square + 1)
+    im_distance = im_distance.astype(dtype)
+
+    return im_distance
diff --git a/camtools/project.py b/camtools/project.py
index 5fced7c..1559fa4 100644
--- a/camtools/project.py
+++ b/camtools/project.py
@@ -4,11 +4,17 @@
 
 import cv2
 import numpy as np
+from jaxtyping import Float
+from typing import Union, Tuple, Optional
 
 from . import convert, image, sanity
 
 
-def point_cloud_to_pixel(points, K, T):
+def points_to_pixels(
+    points: Float[np.ndarray, "n 3"],
+    K: Float[np.ndarray, "3 3"],
+    T: Float[np.ndarray, "4 4"],
+) -> Float[np.ndarray, "N 2"]:
     """
     Project points in world coordinates to pixel coordinates.
 
@@ -52,22 +58,27 @@ def point_cloud_to_pixel(points, K, T):
     return pixels
 
 
-def depth_to_point_cloud(
-    im_depth: np.ndarray,
-    K: np.ndarray,
-    T: np.ndarray,
-    im_color: np.ndarray = None,
+def im_depth_to_point_cloud(
+    im_depth: Float[np.ndarray, "h w"],
+    K: Float[np.ndarray, "3 3"],
+    T: Float[np.ndarray, "4 4"],
+    im_color: Optional[Float[np.ndarray, "h w 3"]] = None,
     to_image: bool = False,
     ignore_invalid: bool = True,
     scale_factor: float = 1.0,
-):
+) -> Union[
+    Float[np.ndarray, "n 3"],
+    Float[np.ndarray, "h w 3"],
+    Tuple[Float[np.ndarray, "n 3"], Float[np.ndarray, "n 3"]],
+    Tuple[Float[np.ndarray, "h w 3"], Float[np.ndarray, "h w 3"]],
+]:
     """
     Convert a depth image to a point cloud, optionally including color information.
     Can return either a sparse (N, 3) point cloud or a dense one with the image
     shape (H, W, 3).
 
     Args:
-        im_depth: Depth image (H, W), float32, in world scale.
+        im_depth: Depth image (H, W), float32 or float64, in world scale.
         K: Intrinsics matrix (3, 3).
         T: Extrinsics matrix (4, 4).
         im_color: Color image (H, W, 3), float32/float64, range [0, 1].
@@ -95,8 +106,8 @@ def depth_to_point_cloud(
     sanity.assert_T(T)
     if not isinstance(im_depth, np.ndarray):
         raise TypeError("im_depth must be a numpy array")
-    if im_depth.dtype != np.float32:
-        raise TypeError("im_depth must be of type float32")
+    if im_depth.dtype not in [np.float32, np.float64]:
+        raise TypeError("im_depth must be of type float32 or float64")
     if im_depth.ndim != 2:
         raise ValueError("im_depth must be a 2D array")
     if im_color is not None:
diff --git a/camtools/raycast.py b/camtools/raycast.py
index 274fa73..b9304ae 100644
--- a/camtools/raycast.py
+++ b/camtools/raycast.py
@@ -1,5 +1,6 @@
 import numpy as np
 import open3d as o3d
+from jaxtyping import Float
 
 from . import sanity
 from . import convert
@@ -39,53 +40,56 @@ def gen_rays(K, T, pixels):
     return centers, dirs
 
 
-def mesh_to_depth(mesh, K, T, height, width):
+def mesh_to_im_distance(
+    mesh: o3d.geometry.TriangleMesh,
+    K: Float[np.ndarray, "3 3"],
+    T: Float[np.ndarray, "4 4"],
+    height: int,
+    width: int,
+):
     """
-    Cast mesh to depth image given camera parameters and image dimensions.
+    Cast mesh to a distance image given camera parameters and image dimensions.
+    Each pixel contains the distance between camera center to the mesh surface.
+    Invalid distances are set to np.inf.
 
     Args:
         mesh: Open3D mesh.
         K: (3, 3) array, camera intrinsic matrix.
-        T: (4, 4) array, camera extrinsic matrix, [R | t] with [0, 0, 0, 1].
+        T: (4, 4) array, camera extrinsic matrix.
         height: int, image height.
         width: int, image width.
 
     Return:
-        (height, width) array, float32, representing depth image. Invalid depths
-        are set to np.inf.
+        (height, width) array, float32, representing the distance image. The
+        distance is the distance between camera center to the mesh surface.
+        Invalid distances are set to np.inf.
 
-    Note: this is not meant to be used repeatedly with the same mesh. If you
-    need to perform ray casting of the same mesh multiple times, you should
-    create the scene object manually to perform ray casting.
+    Note: to cast the same mesh to multiple set of camera parameters, use
+    mesh_to_im_distances for higher efficiency.
     """
-    sanity.assert_K(K)
-    sanity.assert_T(T)
-
-    t_mesh = o3d.t.geometry.TriangleMesh(
-        vertex_positions=np.asarray(mesh.vertices).astype(np.float32),
-        triangle_indices=np.asarray(mesh.triangles),
-    )
-    scene = o3d.t.geometry.RaycastingScene()
-    scene.add_triangles(t_mesh)
-
-    rays = o3d.t.geometry.RaycastingScene.create_rays_pinhole(
-        intrinsic_matrix=K,
-        extrinsic_matrix=T,
-        width_px=width,
-        height_px=height,
+    im_distances = mesh_to_im_distances(
+        mesh=mesh,
+        Ks=[K],
+        Ts=[T],
+        height=height,
+        width=width,
     )
-    ray_lengths = np.linalg.norm(rays[:, :, 3:].numpy(), axis=2)
+    im_distance = im_distances[0]
 
-    ans = scene.cast_rays(rays)
-    im_depth = ans["t_hit"].numpy() * ray_lengths
-
-    return im_depth
+    return im_distance
 
 
-def mesh_to_depths(mesh, Ks, Ts, height, width):
+def mesh_to_im_distances(
+    mesh: o3d.geometry.TriangleMesh,
+    Ks: Float[np.ndarray, "n 3 3"],
+    Ts: Float[np.ndarray, "n 4 4"],
+    height: int,
+    width: int,
+):
     """
-    Cast mesh to depth image given camera parameters and image dimensions.
-    Same as mesh_to_depth, but for multiple cameras.
+    Cast mesh to distance images given multiple camera parameters and image
+    dimensions. Each distance image contains the distance between camera center
+    to the mesh surface. Invalid distances are set to np.inf.
 
     Args:
         mesh: Open3D mesh.
@@ -95,8 +99,9 @@ def mesh_to_depths(mesh, Ks, Ts, height, width):
         width: int, image width.
 
     Return:
-        (N, height, width) array, float32, representing depth image. Invalid
-        depths are set to np.inf.
+        (N, height, width) array, float32, representing the distance images. The
+        distance is the distance between camera center to the mesh surface.
+        Invalid distances are set to np.inf.
     """
     for K in Ks:
         sanity.assert_K(K)
@@ -110,7 +115,7 @@ def mesh_to_depths(mesh, Ks, Ts, height, width):
     scene = o3d.t.geometry.RaycastingScene()
     scene.add_triangles(t_mesh)
 
-    im_depths = []
+    im_distances = []
     for K, T in zip(Ks, Ts):
         rays = o3d.t.geometry.RaycastingScene.create_rays_pinhole(
             intrinsic_matrix=K,
@@ -120,10 +125,72 @@ def mesh_to_depths(mesh, Ks, Ts, height, width):
         )
         ray_lengths = np.linalg.norm(rays[:, :, 3:].numpy(), axis=2)
         ans = scene.cast_rays(rays)
-        im_depth = ans["t_hit"].numpy() * ray_lengths
-        im_depths.append(im_depth)
-    im_depths = np.stack(im_depths, axis=0)
+        im_distance = ans["t_hit"].numpy() * ray_lengths
+        im_distances.append(im_distance)
+    im_distances = np.stack(im_distances, axis=0)
+
+    return im_distances
 
+
+def mesh_to_im_depth(
+    mesh: o3d.geometry.TriangleMesh,
+    K: Float[np.ndarray, "3 3"],
+    T: Float[np.ndarray, "4 4"],
+    height: int,
+    width: int,
+) -> Float[np.ndarray, "h w"]:
+    """
+    Cast mesh to a depth image given camera parameters and image dimensions.
+    Each pixel contains the depth (z-coordinate) of the mesh surface in the
+    camera frame. Invalid depths are set to np.inf.
+
+    Args:
+        mesh: Open3D mesh.
+        K: (3, 3) array, camera intrinsic matrix.
+        T: (4, 4) array, camera extrinsic matrix.
+        height: int, image height.
+        width: int, image width.
+
+    Return:
+        (height, width) array, float32, representing the depth image.
+        Invalid depths are set to np.inf.
+    """
+    im_distance = mesh_to_im_distance(mesh, K, T, height, width)
+    im_depth = convert.im_distance_to_im_depth(im_distance, K)
+    return im_depth
+
+
+def mesh_to_im_depths(
+    mesh: o3d.geometry.TriangleMesh,
+    Ks: Float[np.ndarray, "n 3 3"],
+    Ts: Float[np.ndarray, "n 4 4"],
+    height: int,
+    width: int,
+) -> Float[np.ndarray, "n h w"]:
+    """
+    Cast mesh to depth images given multiple camera parameters and image
+    dimensions. Each depth image contains the depth (z-coordinate) of the mesh
+    surface in the camera frame. Invalid depths are set to np.inf.
+
+    Args:
+        mesh: Open3D mesh.
+        Ks: (N, 3, 3) array, camera intrinsic matrices.
+        Ts: (N, 4, 4) array, camera extrinsic matrices.
+        height: int, image height.
+        width: int, image width.
+
+    Return:
+        (N, height, width) array, float32, representing the depth images.
+        Invalid depths are set to np.inf.
+    """
+    im_distances = mesh_to_im_distances(mesh, Ks, Ts, height, width)
+    im_depths = np.stack(
+        [
+            convert.im_distance_to_im_depth(im_distance, K)
+            for im_distance, K in zip(im_distances, Ks)
+        ],
+        axis=0,
+    )
     return im_depths
 
 
@@ -146,7 +213,7 @@ def mesh_to_mask(mesh, K, T, height, width):
     need to perform ray casting of the same mesh multiple times, you should
     create the scene object manually to perform ray casting.
     """
-    im_depth = mesh_to_depth(mesh, K, T, height, width)
-    im_mask = (im_depth != np.inf).astype(np.float32)
+    im_distance = mesh_to_im_distance(mesh, K, T, height, width)
+    im_mask = (im_distance != np.inf).astype(np.float32)
 
     return im_mask
diff --git a/camtools/solver.py b/camtools/solver.py
index e2b7777..510e176 100644
--- a/camtools/solver.py
+++ b/camtools/solver.py
@@ -1,6 +1,8 @@
 import numpy as np
 
 from camtools import sanity
+from jaxtyping import Float
+import open3d as o3d
 
 
 def line_intersection_3d(src_points=None, dst_points=None):
@@ -173,3 +175,27 @@ def point_plane_distance_three_points(point, plane_points):
     # Ref: https://mathworld.wolfram.com/Point-PlaneDistance.html
     distance = np.abs(np.dot(plane_n, point - plane_a))
     return distance
+
+
+def points_to_mesh_distances(
+    points: Float[np.ndarray, "n 3"],
+    mesh: o3d.geometry.TriangleMesh,
+) -> Float[np.ndarray, "n"]:
+    """
+    Compute the distance from points to a mesh surface.
+
+    Args:
+        points (np.ndarray): Array of points with shape (N, 3).
+        mesh (o3d.geometry.TriangleMesh): The input mesh.
+
+    Returns:
+        np.ndarray: Array of distances with shape (N,).
+    """
+    if not points.ndim == 2 or points.shape[1] != 3:
+        raise ValueError(f"Expected points of shape (N, 3), but got {points.shape}.")
+    mesh_t = o3d.t.geometry.TriangleMesh.from_legacy(mesh)
+    scene = o3d.t.geometry.RaycastingScene()
+    _ = scene.add_triangles(mesh_t)
+    points_tensor = o3d.core.Tensor(points, dtype=o3d.core.Dtype.Float32)
+    distances = scene.compute_distance(points_tensor)
+    return distances.numpy()
diff --git a/test/test_backend.py b/test/test_backend.py
index e76bd43..07dd34f 100644
--- a/test/test_backend.py
+++ b/test/test_backend.py
@@ -513,8 +513,8 @@ def test_union_of_hints():
     def identity(
         arr: Union[
             Float[Tensor, "3 4"],
-            Float[Tensor, "N 3 4"],
-            Int[Tensor, "N 3 4"],
+            Float[Tensor, "n 3 4"],
+            Int[Tensor, "n 3 4"],
         ]
     ):
         return arr
diff --git a/test/test_convert.py b/test/test_convert.py
index 2d28ce0..b5c96de 100644
--- a/test/test_convert.py
+++ b/test/test_convert.py
@@ -1,6 +1,7 @@
 import numpy as np
 import camtools as ct
 import pytest
+import open3d as o3d
 
 np.set_printoptions(formatter={"float": "{: 0.2f}".format})
 
@@ -285,3 +286,31 @@ def test_from_homo():
     with pytest.raises(ValueError) as _:
         src = np.array([[1]])
         ct.convert.from_homo(src)
+
+
+def test_im_depth_im_distance_convert():
+    # Geometries
+    sphere = o3d.geometry.TriangleMesh.create_sphere(radius=1.0)
+    sphere = sphere.translate([0, 0, 4])
+    box = o3d.geometry.TriangleMesh.create_box(width=1.5, height=1.5, depth=1.5)
+    box = box.translate([0, 0, 4])
+    mesh = sphere + box
+
+    # Camera
+    width, height = 640, 480
+    fx, fy = 500, 500
+    cx, cy = width / 2, height / 2
+    K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
+    T = np.eye(4)
+
+    # Generate depth image
+    im_depth = ct.raycast.mesh_to_im_depth(mesh, K, T, height, width)
+
+    # Convert depth to distance
+    im_distance = ct.convert.im_depth_to_im_distance(im_depth, K)
+
+    # Convert distance back to depth
+    im_depth_reconstructed = ct.convert.im_distance_to_im_depth(im_distance, K)
+
+    # Assert that the reconstructed depth is close to the original
+    np.testing.assert_allclose(im_depth, im_depth_reconstructed, rtol=1e-5, atol=1e-5)
diff --git a/test/test_raycast.py b/test/test_raycast.py
new file mode 100644
index 0000000..1212052
--- /dev/null
+++ b/test/test_raycast.py
@@ -0,0 +1,46 @@
+import numpy as np
+import open3d as o3d
+import camtools as ct
+
+
+def test_mesh_to_depth():
+    # Geometries
+    sphere = o3d.geometry.TriangleMesh.create_sphere(radius=1.0)
+    sphere = sphere.translate([0, 0, 4])
+    box = o3d.geometry.TriangleMesh.create_box(width=1.5, height=1.5, depth=1.5)
+    box = box.translate([0, 0, 4])
+    mesh = sphere + box
+    lineset = ct.convert.mesh_to_lineset(mesh)
+
+    # Camera
+    width, height = 640, 480
+    fx, fy = 500, 500
+    cx, cy = width / 2, height / 2
+    K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
+    T = np.eye(4)
+    camera_frustum = ct.camera.create_camera_frustums([K], [T], size=1)
+
+    # 3D -> 2D -> 3D projection (mesh -> depth -> points)
+    im_depth = ct.raycast.mesh_to_im_depth(mesh, K, T, height, width)
+    points = ct.project.im_depth_to_point_cloud(im_depth, K, T)
+
+    # Verify points are on the mesh surface
+    points_to_mesh_distances = ct.solver.points_to_mesh_distances(points, mesh)
+    assert np.max(points_to_mesh_distances) < 5e-3
+
+    visualize = False
+    if visualize:
+        import matplotlib.pyplot as plt
+
+        pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(points)
+        axes = o3d.geometry.TriangleMesh.create_coordinate_frame(size=1)
+        o3d.visualization.draw_geometries([pcd, lineset, camera_frustum, axes])
+
+        plt.figure(figsize=(10, 7.5))
+        plt.imshow(im_depth, cmap="viridis")
+        plt.colorbar(label="Depth")
+        plt.title("Depth Image")
+        plt.xlabel("Pixel X")
+        plt.ylabel("Pixel Y")
+        plt.show()