From 4a64aacaec578523189710685820a229ecf1e5f3 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 13 Oct 2024 11:57:54 -0700 Subject: [PATCH] feat: standardized depth image and distance image terminology --- README.md | 4 +- camtools/colormap.py | 20 ------ camtools/convert.py | 110 ++++++++++++++++++++++++++++++++ camtools/project.py | 31 ++++++--- camtools/raycast.py | 145 +++++++++++++++++++++++++++++++------------ camtools/solver.py | 26 ++++++++ test/test_backend.py | 4 +- test/test_convert.py | 29 +++++++++ test/test_raycast.py | 46 ++++++++++++++ 9 files changed, 342 insertions(+), 73 deletions(-) create mode 100644 test/test_raycast.py diff --git a/README.md b/README.md index 435dccf..7ce8cd3 100644 --- a/README.md +++ b/README.md @@ -60,8 +60,8 @@ clear and easy-to-use APIs. # Back-project depth image to 3D points. points = ct.project.im_depth_to_points(im_depth, K, T) - # Ray cast a triangle mesh to depth image. - im_depth = ct.raycast.mesh_to_depths(mesh, Ks, Ts, height, width) + # Ray cast a triangle mesh to depth image given the camera parameters. + im_depth = ct.raycast.mesh_to_im_depth(mesh, K, T, height, width) # And more... ``` diff --git a/camtools/colormap.py b/camtools/colormap.py index 6c35cab..9786f88 100644 --- a/camtools/colormap.py +++ b/camtools/colormap.py @@ -50,23 +50,3 @@ def normalize(array, vmin=0.0, vmax=1.0, clip=False): array = (array - amin) / (amax - amin) * (vmax - vmin) + vmin return array - - -def main(): - """ - Test create color map image. - """ - height = 200 - width = 1600 - - colors = query(np.linspace(0, 1, num=width)) - im = np.zeros((height, width, 3), dtype=np.float32) - for i in range(width): - im[:, i : i + 1, :] = colors[i] - - im_path = "colormap.png" - io.imwrite(im_path, im) - - -if __name__ == "__main__": - main() diff --git a/camtools/convert.py b/camtools/convert.py index 32a7fe8..fed149f 100644 --- a/camtools/convert.py +++ b/camtools/convert.py @@ -1,5 +1,8 @@ import cv2 import numpy as np +import open3d as o3d +from jaxtyping import Float +from typing import Optional from . import sanity from . import convert @@ -505,3 +508,110 @@ def spherical_to_T_towards_origin(radius, theta, phi): T = pose_to_T(pose) return T + + +def mesh_to_lineset( + mesh: o3d.geometry.TriangleMesh, + color: Optional[Float[np.ndarray, "3"]] = None, +) -> o3d.geometry.LineSet: + """ + Convert Open3D mesh to Open3D lineset. + """ + if not isinstance(mesh, o3d.geometry.TriangleMesh): + raise ValueError(f"Expected Open3D mesh, but got {type(mesh)}.") + + vertices = np.asarray(mesh.vertices) + triangles = np.asarray(mesh.triangles) + + edges = set() + for triangle in triangles: + edges.add(tuple(sorted([triangle[0], triangle[1]]))) + edges.add(tuple(sorted([triangle[1], triangle[2]]))) + edges.add(tuple(sorted([triangle[2], triangle[0]]))) + + edges = np.array(list(edges)) + + lineset = o3d.geometry.LineSet() + lineset.points = o3d.utility.Vector3dVector(vertices) + lineset.lines = o3d.utility.Vector2iVector(edges) + + if color is not None: + if len(color) != 3: + raise ValueError(f"Expected color of shape (3,), but got {color.shape}.") + lineset.paint_uniform_color(color) + + return lineset + + +def im_distance_to_im_depth( + im_distance: Float[np.ndarray, "h w"], + K: Float[np.ndarray, "3 3"], +) -> Float[np.ndarray, "h w"]: + """ + Convert distance image to depth image. + + Args: + im_distance: Distance image (H, W), float. + K: Camera intrinsic matrix (3, 3). + + Returns: + Depth image (H, W), float. + """ + if not im_distance.ndim == 2: + raise ValueError( + f"Expected im_distance of shape (H, W), but got {im_distance.shape}." + ) + sanity.assert_K(K) + height, width = im_distance.shape + fx, fy = K[0, 0], K[1, 1] + cx, cy = K[0, 2], K[1, 2] + dtype = im_distance.dtype + + u = np.arange(width) + v = np.arange(height) + u_grid, v_grid = np.meshgrid(u, v) + + u_norm = (u_grid - cx) / fx + v_norm = (v_grid - cy) / fy + norm_square = u_norm**2 + v_norm**2 + im_depth = im_distance / np.sqrt(norm_square + 1) + im_depth = im_depth.astype(dtype) + + return im_depth + + +def im_depth_to_im_distance( + im_depth: Float[np.ndarray, "h w"], + K: Float[np.ndarray, "3 3"], +) -> Float[np.ndarray, "h w"]: + """ + Convert depth image to distance image. + + Args: + im_depth: Depth image (H, W), float. + K: Camera intrinsic matrix (3, 3). + + Returns: + Distance image (H, W), float. + """ + if not im_depth.ndim == 2: + raise ValueError( + f"Expected im_depth of shape (H, W), but got {im_depth.shape}." + ) + sanity.assert_K(K) + height, width = im_depth.shape + fx, fy = K[0, 0], K[1, 1] + cx, cy = K[0, 2], K[1, 2] + dtype = im_depth.dtype + + u = np.arange(width) + v = np.arange(height) + u_grid, v_grid = np.meshgrid(u, v) + + u_norm = (u_grid - cx) / fx + v_norm = (v_grid - cy) / fy + norm_square = u_norm**2 + v_norm**2 + im_distance = im_depth * np.sqrt(norm_square + 1) + im_distance = im_distance.astype(dtype) + + return im_distance diff --git a/camtools/project.py b/camtools/project.py index 5fced7c..1559fa4 100644 --- a/camtools/project.py +++ b/camtools/project.py @@ -4,11 +4,17 @@ import cv2 import numpy as np +from jaxtyping import Float +from typing import Union, Tuple, Optional from . import convert, image, sanity -def point_cloud_to_pixel(points, K, T): +def points_to_pixels( + points: Float[np.ndarray, "n 3"], + K: Float[np.ndarray, "3 3"], + T: Float[np.ndarray, "4 4"], +) -> Float[np.ndarray, "N 2"]: """ Project points in world coordinates to pixel coordinates. @@ -52,22 +58,27 @@ def point_cloud_to_pixel(points, K, T): return pixels -def depth_to_point_cloud( - im_depth: np.ndarray, - K: np.ndarray, - T: np.ndarray, - im_color: np.ndarray = None, +def im_depth_to_point_cloud( + im_depth: Float[np.ndarray, "h w"], + K: Float[np.ndarray, "3 3"], + T: Float[np.ndarray, "4 4"], + im_color: Optional[Float[np.ndarray, "h w 3"]] = None, to_image: bool = False, ignore_invalid: bool = True, scale_factor: float = 1.0, -): +) -> Union[ + Float[np.ndarray, "n 3"], + Float[np.ndarray, "h w 3"], + Tuple[Float[np.ndarray, "n 3"], Float[np.ndarray, "n 3"]], + Tuple[Float[np.ndarray, "h w 3"], Float[np.ndarray, "h w 3"]], +]: """ Convert a depth image to a point cloud, optionally including color information. Can return either a sparse (N, 3) point cloud or a dense one with the image shape (H, W, 3). Args: - im_depth: Depth image (H, W), float32, in world scale. + im_depth: Depth image (H, W), float32 or float64, in world scale. K: Intrinsics matrix (3, 3). T: Extrinsics matrix (4, 4). im_color: Color image (H, W, 3), float32/float64, range [0, 1]. @@ -95,8 +106,8 @@ def depth_to_point_cloud( sanity.assert_T(T) if not isinstance(im_depth, np.ndarray): raise TypeError("im_depth must be a numpy array") - if im_depth.dtype != np.float32: - raise TypeError("im_depth must be of type float32") + if im_depth.dtype not in [np.float32, np.float64]: + raise TypeError("im_depth must be of type float32 or float64") if im_depth.ndim != 2: raise ValueError("im_depth must be a 2D array") if im_color is not None: diff --git a/camtools/raycast.py b/camtools/raycast.py index 274fa73..b9304ae 100644 --- a/camtools/raycast.py +++ b/camtools/raycast.py @@ -1,5 +1,6 @@ import numpy as np import open3d as o3d +from jaxtyping import Float from . import sanity from . import convert @@ -39,53 +40,56 @@ def gen_rays(K, T, pixels): return centers, dirs -def mesh_to_depth(mesh, K, T, height, width): +def mesh_to_im_distance( + mesh: o3d.geometry.TriangleMesh, + K: Float[np.ndarray, "3 3"], + T: Float[np.ndarray, "4 4"], + height: int, + width: int, +): """ - Cast mesh to depth image given camera parameters and image dimensions. + Cast mesh to a distance image given camera parameters and image dimensions. + Each pixel contains the distance between camera center to the mesh surface. + Invalid distances are set to np.inf. Args: mesh: Open3D mesh. K: (3, 3) array, camera intrinsic matrix. - T: (4, 4) array, camera extrinsic matrix, [R | t] with [0, 0, 0, 1]. + T: (4, 4) array, camera extrinsic matrix. height: int, image height. width: int, image width. Return: - (height, width) array, float32, representing depth image. Invalid depths - are set to np.inf. + (height, width) array, float32, representing the distance image. The + distance is the distance between camera center to the mesh surface. + Invalid distances are set to np.inf. - Note: this is not meant to be used repeatedly with the same mesh. If you - need to perform ray casting of the same mesh multiple times, you should - create the scene object manually to perform ray casting. + Note: to cast the same mesh to multiple set of camera parameters, use + mesh_to_im_distances for higher efficiency. """ - sanity.assert_K(K) - sanity.assert_T(T) - - t_mesh = o3d.t.geometry.TriangleMesh( - vertex_positions=np.asarray(mesh.vertices).astype(np.float32), - triangle_indices=np.asarray(mesh.triangles), - ) - scene = o3d.t.geometry.RaycastingScene() - scene.add_triangles(t_mesh) - - rays = o3d.t.geometry.RaycastingScene.create_rays_pinhole( - intrinsic_matrix=K, - extrinsic_matrix=T, - width_px=width, - height_px=height, + im_distances = mesh_to_im_distances( + mesh=mesh, + Ks=[K], + Ts=[T], + height=height, + width=width, ) - ray_lengths = np.linalg.norm(rays[:, :, 3:].numpy(), axis=2) + im_distance = im_distances[0] - ans = scene.cast_rays(rays) - im_depth = ans["t_hit"].numpy() * ray_lengths - - return im_depth + return im_distance -def mesh_to_depths(mesh, Ks, Ts, height, width): +def mesh_to_im_distances( + mesh: o3d.geometry.TriangleMesh, + Ks: Float[np.ndarray, "n 3 3"], + Ts: Float[np.ndarray, "n 4 4"], + height: int, + width: int, +): """ - Cast mesh to depth image given camera parameters and image dimensions. - Same as mesh_to_depth, but for multiple cameras. + Cast mesh to distance images given multiple camera parameters and image + dimensions. Each distance image contains the distance between camera center + to the mesh surface. Invalid distances are set to np.inf. Args: mesh: Open3D mesh. @@ -95,8 +99,9 @@ def mesh_to_depths(mesh, Ks, Ts, height, width): width: int, image width. Return: - (N, height, width) array, float32, representing depth image. Invalid - depths are set to np.inf. + (N, height, width) array, float32, representing the distance images. The + distance is the distance between camera center to the mesh surface. + Invalid distances are set to np.inf. """ for K in Ks: sanity.assert_K(K) @@ -110,7 +115,7 @@ def mesh_to_depths(mesh, Ks, Ts, height, width): scene = o3d.t.geometry.RaycastingScene() scene.add_triangles(t_mesh) - im_depths = [] + im_distances = [] for K, T in zip(Ks, Ts): rays = o3d.t.geometry.RaycastingScene.create_rays_pinhole( intrinsic_matrix=K, @@ -120,10 +125,72 @@ def mesh_to_depths(mesh, Ks, Ts, height, width): ) ray_lengths = np.linalg.norm(rays[:, :, 3:].numpy(), axis=2) ans = scene.cast_rays(rays) - im_depth = ans["t_hit"].numpy() * ray_lengths - im_depths.append(im_depth) - im_depths = np.stack(im_depths, axis=0) + im_distance = ans["t_hit"].numpy() * ray_lengths + im_distances.append(im_distance) + im_distances = np.stack(im_distances, axis=0) + + return im_distances + +def mesh_to_im_depth( + mesh: o3d.geometry.TriangleMesh, + K: Float[np.ndarray, "3 3"], + T: Float[np.ndarray, "4 4"], + height: int, + width: int, +) -> Float[np.ndarray, "h w"]: + """ + Cast mesh to a depth image given camera parameters and image dimensions. + Each pixel contains the depth (z-coordinate) of the mesh surface in the + camera frame. Invalid depths are set to np.inf. + + Args: + mesh: Open3D mesh. + K: (3, 3) array, camera intrinsic matrix. + T: (4, 4) array, camera extrinsic matrix. + height: int, image height. + width: int, image width. + + Return: + (height, width) array, float32, representing the depth image. + Invalid depths are set to np.inf. + """ + im_distance = mesh_to_im_distance(mesh, K, T, height, width) + im_depth = convert.im_distance_to_im_depth(im_distance, K) + return im_depth + + +def mesh_to_im_depths( + mesh: o3d.geometry.TriangleMesh, + Ks: Float[np.ndarray, "n 3 3"], + Ts: Float[np.ndarray, "n 4 4"], + height: int, + width: int, +) -> Float[np.ndarray, "n h w"]: + """ + Cast mesh to depth images given multiple camera parameters and image + dimensions. Each depth image contains the depth (z-coordinate) of the mesh + surface in the camera frame. Invalid depths are set to np.inf. + + Args: + mesh: Open3D mesh. + Ks: (N, 3, 3) array, camera intrinsic matrices. + Ts: (N, 4, 4) array, camera extrinsic matrices. + height: int, image height. + width: int, image width. + + Return: + (N, height, width) array, float32, representing the depth images. + Invalid depths are set to np.inf. + """ + im_distances = mesh_to_im_distances(mesh, Ks, Ts, height, width) + im_depths = np.stack( + [ + convert.im_distance_to_im_depth(im_distance, K) + for im_distance, K in zip(im_distances, Ks) + ], + axis=0, + ) return im_depths @@ -146,7 +213,7 @@ def mesh_to_mask(mesh, K, T, height, width): need to perform ray casting of the same mesh multiple times, you should create the scene object manually to perform ray casting. """ - im_depth = mesh_to_depth(mesh, K, T, height, width) - im_mask = (im_depth != np.inf).astype(np.float32) + im_distance = mesh_to_im_distance(mesh, K, T, height, width) + im_mask = (im_distance != np.inf).astype(np.float32) return im_mask diff --git a/camtools/solver.py b/camtools/solver.py index e2b7777..510e176 100644 --- a/camtools/solver.py +++ b/camtools/solver.py @@ -1,6 +1,8 @@ import numpy as np from camtools import sanity +from jaxtyping import Float +import open3d as o3d def line_intersection_3d(src_points=None, dst_points=None): @@ -173,3 +175,27 @@ def point_plane_distance_three_points(point, plane_points): # Ref: https://mathworld.wolfram.com/Point-PlaneDistance.html distance = np.abs(np.dot(plane_n, point - plane_a)) return distance + + +def points_to_mesh_distances( + points: Float[np.ndarray, "n 3"], + mesh: o3d.geometry.TriangleMesh, +) -> Float[np.ndarray, "n"]: + """ + Compute the distance from points to a mesh surface. + + Args: + points (np.ndarray): Array of points with shape (N, 3). + mesh (o3d.geometry.TriangleMesh): The input mesh. + + Returns: + np.ndarray: Array of distances with shape (N,). + """ + if not points.ndim == 2 or points.shape[1] != 3: + raise ValueError(f"Expected points of shape (N, 3), but got {points.shape}.") + mesh_t = o3d.t.geometry.TriangleMesh.from_legacy(mesh) + scene = o3d.t.geometry.RaycastingScene() + _ = scene.add_triangles(mesh_t) + points_tensor = o3d.core.Tensor(points, dtype=o3d.core.Dtype.Float32) + distances = scene.compute_distance(points_tensor) + return distances.numpy() diff --git a/test/test_backend.py b/test/test_backend.py index e76bd43..07dd34f 100644 --- a/test/test_backend.py +++ b/test/test_backend.py @@ -513,8 +513,8 @@ def test_union_of_hints(): def identity( arr: Union[ Float[Tensor, "3 4"], - Float[Tensor, "N 3 4"], - Int[Tensor, "N 3 4"], + Float[Tensor, "n 3 4"], + Int[Tensor, "n 3 4"], ] ): return arr diff --git a/test/test_convert.py b/test/test_convert.py index 2d28ce0..b5c96de 100644 --- a/test/test_convert.py +++ b/test/test_convert.py @@ -1,6 +1,7 @@ import numpy as np import camtools as ct import pytest +import open3d as o3d np.set_printoptions(formatter={"float": "{: 0.2f}".format}) @@ -285,3 +286,31 @@ def test_from_homo(): with pytest.raises(ValueError) as _: src = np.array([[1]]) ct.convert.from_homo(src) + + +def test_im_depth_im_distance_convert(): + # Geometries + sphere = o3d.geometry.TriangleMesh.create_sphere(radius=1.0) + sphere = sphere.translate([0, 0, 4]) + box = o3d.geometry.TriangleMesh.create_box(width=1.5, height=1.5, depth=1.5) + box = box.translate([0, 0, 4]) + mesh = sphere + box + + # Camera + width, height = 640, 480 + fx, fy = 500, 500 + cx, cy = width / 2, height / 2 + K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) + T = np.eye(4) + + # Generate depth image + im_depth = ct.raycast.mesh_to_im_depth(mesh, K, T, height, width) + + # Convert depth to distance + im_distance = ct.convert.im_depth_to_im_distance(im_depth, K) + + # Convert distance back to depth + im_depth_reconstructed = ct.convert.im_distance_to_im_depth(im_distance, K) + + # Assert that the reconstructed depth is close to the original + np.testing.assert_allclose(im_depth, im_depth_reconstructed, rtol=1e-5, atol=1e-5) diff --git a/test/test_raycast.py b/test/test_raycast.py new file mode 100644 index 0000000..1212052 --- /dev/null +++ b/test/test_raycast.py @@ -0,0 +1,46 @@ +import numpy as np +import open3d as o3d +import camtools as ct + + +def test_mesh_to_depth(): + # Geometries + sphere = o3d.geometry.TriangleMesh.create_sphere(radius=1.0) + sphere = sphere.translate([0, 0, 4]) + box = o3d.geometry.TriangleMesh.create_box(width=1.5, height=1.5, depth=1.5) + box = box.translate([0, 0, 4]) + mesh = sphere + box + lineset = ct.convert.mesh_to_lineset(mesh) + + # Camera + width, height = 640, 480 + fx, fy = 500, 500 + cx, cy = width / 2, height / 2 + K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) + T = np.eye(4) + camera_frustum = ct.camera.create_camera_frustums([K], [T], size=1) + + # 3D -> 2D -> 3D projection (mesh -> depth -> points) + im_depth = ct.raycast.mesh_to_im_depth(mesh, K, T, height, width) + points = ct.project.im_depth_to_point_cloud(im_depth, K, T) + + # Verify points are on the mesh surface + points_to_mesh_distances = ct.solver.points_to_mesh_distances(points, mesh) + assert np.max(points_to_mesh_distances) < 5e-3 + + visualize = False + if visualize: + import matplotlib.pyplot as plt + + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(points) + axes = o3d.geometry.TriangleMesh.create_coordinate_frame(size=1) + o3d.visualization.draw_geometries([pcd, lineset, camera_frustum, axes]) + + plt.figure(figsize=(10, 7.5)) + plt.imshow(im_depth, cmap="viridis") + plt.colorbar(label="Depth") + plt.title("Depth Image") + plt.xlabel("Pixel X") + plt.ylabel("Pixel Y") + plt.show()