diff --git a/configs/nerf-hmvs-wbg.yaml b/configs/nerf-hmvs-wbg.yaml index 8fd7900..9f528e1 100644 --- a/configs/nerf-hmvs-wbg.yaml +++ b/configs/nerf-hmvs-wbg.yaml @@ -5,17 +5,23 @@ seed: 42 dataset: name: hmvs root_dir: /home/yidaw/Documents/datasets/Buildings + pose_file: extrinsics.npy # Tile_+002_+002_AT.bin + pcd_file: pointcloud.npz # false # /home/yidaw/Documents/datasets/Buildings/Building/building_interest.ply # Bridge/Tile_+004_+008_AT.ply mesh_outpath: false + img_wh: + - 960 + - 540 img_downscale: 4 # specify training image size by either img_wh or img_downscale - cam_downscale: 75 # if false, to rescale the cameras/pts positions automatically + cam_downscale: false # 75 # if false, to rescale the cameras/pts positions automatically repose: true # false up_est_method: no-change # if true, use estimated ground plane normal direction as up direction - center_est_method: point # camera/point + center_est_method: camera # camera/point n_test_traj_steps: 16 apply_mask: false apply_depth: false load_data_on_gpu: false - max_imgs: 400 + max_imgs: 1200 + preprocess_only: false model: name: nerf @@ -55,7 +61,7 @@ model: n_neurons: 64 n_hidden_layers: 1 max_imgs: ${dataset.max_imgs} - use_appearance_embedding: false + use_appearance_embedding: true # false use_average_appearance_embedding: ture appearance_embedding_dim: 16 use_distortion: false @@ -73,7 +79,7 @@ model: n_neurons: 64 n_hidden_layers: 2 max_imgs: ${dataset.max_imgs} - use_appearance_embedding: false + use_appearance_embedding: true # false use_average_appearance_embedding: ture appearance_embedding_dim: 16 # background model configurations @@ -112,7 +118,7 @@ model: n_hidden_layers: 2 color_activation: sigmoid max_imgs: ${dataset.max_imgs} - use_appearance_embedding: false + use_appearance_embedding: true # false use_average_appearance_embedding: ture appearance_embedding_dim: 8 diff --git a/configs/neuralangelo-hmvs-360.yaml b/configs/neuralangelo-hmvs-360.yaml index 7f9ef16..3b88320 100644 --- a/configs/neuralangelo-hmvs-360.yaml +++ b/configs/neuralangelo-hmvs-360.yaml @@ -5,8 +5,8 @@ seed: 42 dataset: name: hmvs root_dir: /home/yidaw/Documents/datasets/DJI/DJI_010 - pose_path: /home/yidaw/Documents/datasets/DJI/DJI_010/Mesh/ATOrigin.bin - pcd_path: /home/yidaw/Documents/datasets/DJI/DJI_010/Mesh/TexturedModel/Tile_+000_+000_0/building_interest.ply + pose_file: Mesh/ATOrigin.bin + pcd_file: Mesh/TexturedModel/Tile_+000_+000_0/building_interest.ply mesh_exported_path: false img_downscale: 4 # better with more than 1000 x 1000 resolution cam_downscale: 17 # 2.5 # if false, to rescale the cameras/pts positions automatically diff --git a/configs/neuralangelo-hmvs-eagle-wbg.yaml b/configs/neuralangelo-hmvs-eagle-wbg.yaml index 54fd118..0f1af9f 100644 --- a/configs/neuralangelo-hmvs-eagle-wbg.yaml +++ b/configs/neuralangelo-hmvs-eagle-wbg.yaml @@ -5,13 +5,13 @@ seed: 42 dataset: name: hmvs root_dir: /home/yidaw/Documents/datasets/Buildings - pose_path: /home/yidaw/Documents/datasets/Buildings/Building/Tile_+002_+002_AT.bin - pcd_path: /home/yidaw/Documents/datasets/Buildings/Building/building_interest.ply + pose_file: extrinsics.npy # Tile_+002_+002_AT.bin + pcd_file: pointcloud.npz # false # /home/yidaw/Documents/datasets/Buildings/Building/building_interest.ply # Bridge/Tile_+004_+008_AT.ply mesh_outpath: false img_wh: - 960 - 540 - img_downscale: 2 # 4 # better with more than 1000 x 1000 resolution + img_downscale: 4 # better with more than 1000 x 1000 resolution cam_downscale: 200 # if false, to rescale the cameras/pts positions automatically repose: true # false up_est_method: ground # no-change/camera/ground/z-axis @@ -20,7 +20,8 @@ dataset: apply_mask: false apply_depth: false load_data_on_gpu: false - max_imgs: 1600 + max_imgs: 1200 + preprocess_only: false model: name: neus @@ -200,8 +201,8 @@ trainer: max_steps: 6e4 log_every_n_steps: 500 num_sanity_val_steps: 0 - val_check_interval: 2e4 + val_check_interval: 3e4 limit_train_batches: 1.0 - limit_val_batches: 6 # 3 + limit_val_batches: 7 # 3 enable_progress_bar: true precision: 16 diff --git a/configs/neuralangelo-hmvs-eagle.yaml b/configs/neuralangelo-hmvs-eagle.yaml index d2bdfb6..c49c449 100644 --- a/configs/neuralangelo-hmvs-eagle.yaml +++ b/configs/neuralangelo-hmvs-eagle.yaml @@ -5,8 +5,8 @@ seed: 42 dataset: name: hmvs root_dir: /home/yidaw/Documents/datasets/Buildings - pose_path: /home/yidaw/Documents/datasets/Buildings/Building/Tile_+002_+002_AT.bin - pcd_path: /home/yidaw/Documents/datasets/Buildings/Building/building_interest.ply + pose_file: Building/Tile_+002_+002_AT.bin + pcd_file: Building/building_interest.ply mesh_outpath: false img_downscale: 4 # better with more than 1000 x 1000 resolution cam_downscale: 200 # if false, to rescale the cameras/pts positions automatically diff --git a/configs/neus-hmvs-360-wbg.yaml b/configs/neus-hmvs-360-wbg.yaml index bec2e0b..c581e6b 100644 --- a/configs/neus-hmvs-360-wbg.yaml +++ b/configs/neus-hmvs-360-wbg.yaml @@ -5,8 +5,8 @@ seed: 42 dataset: name: hmvs root_dir: /home/yidaw/Documents/datasets/Buildings - pose_path: /home/yidaw/Documents/datasets/Buildings/Building/Tile_+002_+002_AT.bin - pcd_path: /home/yidaw/Documents/datasets/Buildings/Building/building_interest.ply # Bridge/Tile_+004_+008_AT.ply + pose_file: Tile_+002_+002_AT.bin + pcd_file: Building/building_interest.ply # Bridge/Tile_+004_+008_AT.ply mesh_exported_path: false img_downscale: 4 # better with more than 1000 x 1000 resolution cam_downscale: 17 # if false, to rescale the cameras/pts positions automatically diff --git a/configs/neus-hmvs-eagle-wbg.yaml b/configs/neus-hmvs-eagle-wbg.yaml index 1119bac..2e8d2b2 100644 --- a/configs/neus-hmvs-eagle-wbg.yaml +++ b/configs/neus-hmvs-eagle-wbg.yaml @@ -5,8 +5,8 @@ seed: 42 dataset: name: hmvs root_dir: /home/yidaw/Documents/datasets/Buildings - pose_path: /home/yidaw/Documents/datasets/Buildings/Building/Tile_+002_+002_AT.bin - pcd_path: false # /home/yidaw/Documents/datasets/Buildings/Building/building_interest.ply # Bridge/Tile_+004_+008_AT.ply + pose_file: extrinsics.npy # Tile_+002_+002_AT.bin + pcd_file: pointcloud.npz # false # /home/yidaw/Documents/datasets/Buildings/Building/building_interest.ply # Bridge/Tile_+004_+008_AT.ply mesh_outpath: false img_wh: - 960 @@ -20,7 +20,8 @@ dataset: apply_mask: false apply_depth: true load_data_on_gpu: false - max_imgs: 1000 + max_imgs: 1200 + preprocess_only: false model: name: neus @@ -198,8 +199,8 @@ trainer: max_steps: 6e4 log_every_n_steps: 500 num_sanity_val_steps: 0 - val_check_interval: 1e4 # 6e4 + val_check_interval: 3e4 # 6e4 limit_train_batches: 1.0 - limit_val_batches: 6 + limit_val_batches: 7 enable_progress_bar: true precision: 16 diff --git a/configs/neus-hmvs-eagle.yaml b/configs/neus-hmvs-eagle.yaml index 9fe6acc..aa8d150 100644 --- a/configs/neus-hmvs-eagle.yaml +++ b/configs/neus-hmvs-eagle.yaml @@ -5,7 +5,7 @@ seed: 42 dataset: name: hmvs root_dir: /home/yidaw/Documents/datasets/Buildings - pose_path: /home/yidaw/Documents/datasets/Buildings/Building/Tile_+002_+002_AT.bin + pose_file: extrinsics.npy # Tile_+002_+002_AT.bin pcd_path: /home/yidaw/Documents/datasets/Buildings/Building/building_interest.ply # Bridge/Tile_+004_+008_AT.ply mesh_outpath: false img_downscale: 4 # better with more than 1000 x 1000 resolution diff --git a/datasets/colmap.py b/datasets/colmap.py index 60b3f03..a978a28 100644 --- a/datasets/colmap.py +++ b/datasets/colmap.py @@ -88,6 +88,7 @@ def setup(self, config, split): all_c2w, all_images, all_fg_masks, all_depths, all_depth_masks, all_vis_masks = [], [], [], [], [], [] pts3d = read_points3d_binary(os.path.join(self.config.root_dir, 'sparse/0/points3D.bin')) + pts3d_rgb = np.array([pts3d[k].rgb for k in pts3d]) pts3d = np.array([pts3d[k].xyz for k in pts3d]) for i, d in enumerate(imdata.values()): @@ -100,7 +101,7 @@ def setup(self, config, split): if not os.path.exists(img_path): os.makedirs(os.path.join(self.config.root_dir, f"images_{self.config.img_downscale}"), exist_ok=True) img_path = os.path.join(self.config.root_dir, 'images', d.name) - img = Image.open(img_path) + img = Image.open(img_path.replace('JPG','jpg')) if img.size[0] != w or img.size[1] != h: img = img.resize(img_wh, Image.BICUBIC) img.save(os.path.join(self.config.root_dir, f"images_{self.config.img_downscale}", d.name)) @@ -135,7 +136,7 @@ def setup(self, config, split): import open3d as o3d # NOTE: save the point cloud using point cloud pcd = o3d.geometry.PointCloud() - mesh_init_path = os.path.join(self.config.root_dir, 'sparse/0/points3D.obj') + mesh_init_path = os.path.join(self.config.root_dir, 'sparse/0/points3D_mesh.ply') if os.path.exists(mesh_init_path): mesh_o3d = o3d.t.geometry.TriangleMesh.from_legacy(o3d.io.read_triangle_mesh(mesh_init_path)) else: @@ -143,6 +144,7 @@ def setup(self, config, split): os.makedirs(mesh_dir, exist_ok=True) if not os.path.exists(os.path.join(mesh_dir, 'layout_pcd_gt.ply')): pcd.points = o3d.utility.Vector3dVector(pts3d) + pcd.colors = o3d.utility.Vector3dVector(pts3d_rgb) else: pcd = o3d.io.read_point_cloud(os.path.join(mesh_dir, 'layout_pcd_gt.ply')) # pts3d = torch.from_numpy(pts3d).float() diff --git a/datasets/hmvs.py b/datasets/hmvs.py index 83b6f8a..33d3a58 100644 --- a/datasets/hmvs.py +++ b/datasets/hmvs.py @@ -16,7 +16,7 @@ read_cameras_binary, read_images_binary, read_points3d_binary from models.ray_utils import get_ray_directions from utils.misc import get_rank -from utils.pose_utils import align_global, get_center, normalize_poses, create_spheric_poses +from utils.pose_utils import align_global, get_center, normalize_poses, create_spheric_poses, bin2cams import os import copy @@ -25,50 +25,6 @@ import argparse -def bin2camera(work_space, bin_file): - """convert r3d bin data to camera ex/intrinsics""" - try: - cam_intrinsics, cam_rotations, cam_centers, resolutions, fns = R3DParser.LoadR3DBinDataset(work_space, bin_file) - except: - cam_intrinsics, cam_rotations, cam_centers, resolutions, fns = R3DUtil.LoadR3DBinDataset(work_space, bin_file) - cam_intrinsics = cam_intrinsics.reshape(-1, 3, 3) - resolutions = resolutions.reshape(-1, 2) - cam_rotations = cam_rotations.reshape(-1, 3, 3) - cam_centers = cam_centers.reshape(-1, 3) - extrinsics = np.zeros((len(fns), 4, 4)).astype("float32") - extrinsics[:, :3, :3] = cam_rotations - extrinsics[:, :3, 3] = cam_centers - extrinsics[:, 3, 3] = 1 - - intrinsics = [] - for i in range(len(fns)): - intrinsic = { - 'width': 0, - 'height': 0, - 'f': 0, - 'cx': 0, - 'cy': 0, - 'b1': 0, - 'b2': 0, - 'k1': 0, - 'k2': 0, - 'k3': 0, - 'k4': 0, - 'p1': 0, - 'p2': 0, - 'p3': 0, - 'p4': 0, - } - cam_intrinsic = cam_intrinsics[i] - intrinsic["width"] = resolutions[i][0] - intrinsic["height"] = resolutions[i][1] - intrinsic["cx"] = cam_intrinsic[0, 2] # - resolutions[i][0] / 2 - intrinsic["cy"] = cam_intrinsic[1, 2] # - resolutions[i][1] / 2 - intrinsic["f"] = cam_intrinsic[0, 0] - intrinsics.append(copy.deepcopy(intrinsic)) - return extrinsics, intrinsics, fns - - class ColmapDatasetBase(): # the data only has to be processed once initialized = False @@ -80,43 +36,59 @@ def setup(self, config, split): self.rank = get_rank() if not ColmapDatasetBase.initialized: - try: - try: - import R3DParser - except: - import R3DUtil - assert os.path.isfile(self.config.pose_path) - all_c2w, intrinsics, fns = bin2camera(self.config.root_dir, - self.config.pose_path) - except: - all_c2w, intrinsics = np.load(os.path.join(self.config.root_dir, - 'extrinsics.npy'), allow_pickle=True), np.load(os.path.join(self.config.root_dir, - 'intrinsics.npy'), allow_pickle=True) + self.pose_path = os.path.join(self.config.root_dir, + self.config.pose_file) + assert os.path.isfile(self.pose_path) + if self.config.pose_file.endswith('.bin'): + all_c2w, intrinsics, fns = bin2cams(self.pose_path) + elif self.config.pose_file.endswith('.npy'): + all_c2w, intrinsics = np.load(self.pose_path, + allow_pickle=True), np.load( + os.path.join( + self.config.root_dir, + 'intrinsics.npy'), + allow_pickle=True) try: + # Waymo perception dataset + print(all_c2w[()]['FRONT'].shape()[0], 'images') try: - all_c2w = np.concatenate((all_c2w[()]['FRONT'], all_c2w[()]['FRONT_LEFT'], all_c2w[()]['FRONT_RIGHT']), axis=0) - intrinsics = np.concatenate((intrinsics[()]['FRONT'], intrinsics[()]['FRONT_LEFT'], intrinsics[()]['FRONT_RIGHT']), axis=0) + all_c2w = np.concatenate( + (all_c2w[()]['FRONT'], all_c2w[()]['FRONT_LEFT'], + all_c2w[()]['FRONT_RIGHT']), + axis=0) + intrinsics = np.concatenate( + (intrinsics[()]['FRONT'], + intrinsics[()]['FRONT_LEFT'], + intrinsics[()]['FRONT_RIGHT']), + axis=0) print('Using', colored('front 3', 'blue'), 'cameras') except: all_c2w = all_c2w[()]['FRONT'] intrinsics = intrinsics[()]['FRONT'] - print('Using', colored('front', 'blue'), 'camera alone') + print('Using', colored('front', 'blue'), + 'camera alone') except: + # LiAuto dataset print('Using', colored('all', 'blue'), 'available cameras') - fns = sorted(os.listdir(os.path.join(self.config.root_dir, - 'images'))) - fns = [os.path.join(self.config.root_dir, 'images', fn) for fn in fns] + fns = sorted( + os.listdir(os.path.join(self.config.root_dir, 'images'))) + fns = [ + os.path.join(self.config.root_dir, 'images', fn) + for fn in fns + ] mask_ori_dir = os.path.join(self.config.root_dir, 'sky_mask') mask_dir = os.path.join(self.config.root_dir, f'sky_mask_{self.config.img_downscale}') # masks labling invalid regions - vis_mask_ori_dir = os.path.join(self.config.root_dir, f'vis_mask') + vis_mask_ori_dir = os.path.join(self.config.root_dir, f'masks') vis_mask_dir = os.path.join(self.config.root_dir, - f'vis_mask_{self.config.img_downscale}') + f'masks_{self.config.img_downscale}') # masks labling dynamic objects - dynamic_mask_ori_dir = os.path.join(self.config.root_dir, f'moving_vehicle_bound') - dynamic_mask_dir = os.path.join(self.config.root_dir, - f'moving_vehicle_bound_{self.config.img_downscale}') + dyn_mask_ori_dir = os.path.join(self.config.root_dir, + f'moving_vehicle_bound') + dyn_mask_dir = os.path.join( + self.config.root_dir, + f'moving_vehicle_bound_{self.config.img_downscale}') # mesh folder mesh_dir = os.path.join(self.config.root_dir, 'meshes') os.makedirs(mesh_dir, exist_ok=True) @@ -154,9 +126,9 @@ def setup(self, config, split): w, h = int(W * self.factor), int(H * self.factor) img_wh = (w, h) intrinsic = intrinsics[i] - intrinsic[:2,:] *= self.factor - fx = intrinsic[0, 0] # camdata[1].params[0] * self.factor - fy = intrinsic[1, 1] # camdata[1].params[0] * self.factor + intrinsic[:2, :] *= self.factor + fx = intrinsic[0, 0] # camdata[1].params[0] * self.factor + fy = intrinsic[1, 1] # camdata[1].params[0] * self.factor cx = intrinsic[0, 2] cy = intrinsic[1, 2] else: @@ -168,10 +140,13 @@ def setup(self, config, split): w, h = self.config.img_wh elif 'img_downscale' in self.config: w, h = int(W / self.config.img_downscale + - 0.5), int(H / self.config.img_downscale + 0.5) + 0.5), int(H / + self.config.img_downscale + + 0.5) else: raise KeyError( - "Either img_wh or img_downscale should be specified.") + "Either img_wh or img_downscale should be specified." + ) img_wh = (w, h) self.factor = w / W @@ -193,52 +168,75 @@ def setup(self, config, split): img.save(img_path) # notice that to_tensor rescale the input in range [0, 1] # img = TF.to_tensor(img).permute(1, 2, 0)[..., :3] - img = TF.pil_to_tensor(img).permute(1, 2, 0)[..., :3] / 255.0 # (4, h, w) => (h, w, 4 ) and normalize it + img = TF.pil_to_tensor(img).permute(1, 2, 0)[ + ..., : + 3] / 255.0 # (4, h, w) => (h, w, 4 ) and normalize it img = img.to( self.rank ) if self.config.load_data_on_gpu else img.cpu() - # NOTE: Visual masks + # NOTE: Visual masks vis_mask_path = os.path.join( - vis_mask_dir, 'mask' + fns[i].split("/")[-1][6:-3] + 'png') - if not os.path.exists(vis_mask_path) and os.path.exists(vis_mask_path.replace(f"{vis_mask_dir}",f"{vis_mask_ori_dir}")): - os.makedirs(os.path.dirname(vis_mask_path), exist_ok=True) + vis_mask_dir, + 'mask' + fns[i].split("/")[-1][6:-3] + 'png') + if not os.path.exists(vis_mask_path) and os.path.exists( + vis_mask_path.replace(f"{vis_mask_dir}", + f"{vis_mask_ori_dir}")): + os.makedirs(os.path.dirname(vis_mask_path), + exist_ok=True) vis_mask_path = vis_mask_path.replace( - f"{vis_mask_dir}", - f"{vis_mask_ori_dir}") + f"{vis_mask_dir}", f"{vis_mask_ori_dir}") if os.path.exists(vis_mask_path): - self.num_cams = np.max([self.num_cams, int(fns[i].split("/")[-1][7:-4])+1]) + self.num_cams = np.max([ + self.num_cams, + int(fns[i].split("/")[-1][7:-4]) + 1 + ]) vis_mask = Image.open(vis_mask_path) if vis_mask.size[0] != w or vis_mask.size[1] != h: vis_mask = vis_mask.resize(img_wh, Image.NEAREST) vis_mask_path = vis_mask_path.replace( - f"{vis_mask_ori_dir}", - f"{vis_mask_dir}") + f"{vis_mask_ori_dir}", f"{vis_mask_dir}") vis_mask.save(vis_mask_path) vis_mask = TF.to_tensor(vis_mask)[0] else: - vis_mask = torch.ones_like(img[..., 0], device=img.device) + vis_mask = torch.ones_like(img[..., 0], + device=img.device) # NOTE: dynamic object mask - dynamic_mask_path = os.path.join( - dynamic_mask_dir, fns[i].split("/")[-1][:-3] + 'png') - if not os.path.exists(dynamic_mask_path): - os.makedirs(os.path.dirname(dynamic_mask_path), exist_ok=True) - dynamic_mask_path = dynamic_mask_path.replace( - f"{dynamic_mask_dir}", - f"{dynamic_mask_ori_dir}") - if os.path.exists(dynamic_mask_path): - dynamic_mask = Image.open(dynamic_mask_path) - if dynamic_mask.size[0] != w or dynamic_mask.size[1] != h: - dynamic_mask = dynamic_mask.resize(img_wh, Image.NEAREST) - dynamic_mask_path = dynamic_mask_path.replace( - f"{dynamic_mask_ori_dir}", - f"{dynamic_mask_dir}") - dynamic_mask.save(dynamic_mask_path) - vis_mask *= (1 - TF.to_tensor(dynamic_mask)[0]) + dyn_mask_path = os.path.join( + dyn_mask_dir, fns[i].split("/")[-1][:-3] + 'png') + if not os.path.exists(dyn_mask_path): + os.makedirs(os.path.dirname(dyn_mask_path), + exist_ok=True) + dyn_mask_path = dyn_mask_path.replace( + f"{dyn_mask_dir}", f"{dyn_mask_ori_dir}") + if os.path.exists(dyn_mask_path): + dyn_mask = Image.open(dyn_mask_path) + if dyn_mask.size[0] != w or dyn_mask.size[ + 1] != h: + dyn_mask = dyn_mask.resize( + img_wh, Image.NEAREST) + dyn_mask_path = dyn_mask_path.replace( + f"{dyn_mask_ori_dir}", + f"{dyn_mask_dir}") + dyn_mask.save(dyn_mask_path) + vis_mask *= (1 - TF.to_tensor(dyn_mask)[0]) + + # save maskes for nerfstudio training + valid_mask_dir = 'valid_mask' + os.makedirs(os.path.join(self.config.root_dir, + valid_mask_dir), + exist_ok=True) + vis_mask = Image.fromarray( + vis_mask.cpu().detach().numpy().astype(np.uint8) * + 255) + vis_mask.save(fns[i].replace(f"{img_folder}", + f"/{valid_mask_dir}")) depth_folder = 'lidar_depth' - if self.config.apply_depth and os.path.exists(fns[i].replace(f"{img_folder}", f"/{depth_folder}")): - depth_format = '.npy' # 'tif' 'npy' 'pth' 'png' + if self.config.apply_depth and os.path.exists( + os.path.dirname(fns[i].replace( + f"{img_folder}", f"/{depth_folder}"))): + depth_format = '.npy' # 'tif' 'npy' 'pth' 'png' depth_path = fns[i].replace( f"{img_folder}", f"/{depth_folder}_{self.config.img_downscale}" @@ -247,16 +245,18 @@ def setup(self, config, split): os.makedirs(os.path.dirname(depth_path), exist_ok=True) depth_path = fns[i].replace( - f"{img_folder}", - f"/{depth_folder}").replace(".png", depth_format) + f"{img_folder}", f"/{depth_folder}").replace( + ".png", depth_format) # loading depth if depth_format == '.tiff' or depth_format == '.png': depth = Image.open(depth_path) elif depth_format == '.npy': depth = np.load(depth_path) + Image.fromarray(depth.astype(np.uint8)).save( + depth_path.replace('.npy', '.png')) depth = Image.fromarray(depth) elif depth_path == '.pth': - depth = torch.load(depth_path)[...,3] + depth = torch.load(depth_path)[..., 3] depth = Image.fromarray(depth.numpy()) depth_path = fns[i].replace( f"{img_folder}", @@ -270,49 +270,67 @@ def setup(self, config, split): elif depth_format == '.npy': np.save(depth_path, depth) depth = TF.pil_to_tensor(depth).permute( - 1, 2, 0) # / self.config.cam_downscale + 1, 2, 0) # / self.config.cam_downscale depth = depth.to( self.rank ) if self.config.load_data_on_gpu else depth.cpu() elif self.config.apply_depth: - print(colored(fns[i].replace(f"{img_folder}", f"/{depth_folder}") + ' does not exist', 'red')) - depth = torch.zeros_like(img[..., 0], device=img.device) # (h, w) + print( + colored( + fns[i].replace(f"{img_folder}", + f"/{depth_folder}") + + ' does not exist', 'red')) + depth = torch.zeros_like(img[..., 0], + device=img.device) # (h, w) else: depth = torch.zeros_like(img[..., 0], device=img.device) # (h, w) depth_mask = (depth > 0.0).to(bool) # saving point cloud form depth - dep_max = 300.0 # lidar limit + dep_max = 300.0 # lidar limit import open3d as o3d if depth.max() != 0.0: # NOTE: removing dynamic objects inf_mask = (depth == float("Inf")) depth[inf_mask] = 0 - depth_o3d = o3d.geometry.Image(depth.numpy() * (1-(np.array(dynamic_mask) / 255.0).astype(np.float32)[..., np.newaxis])) + depth_o3d = o3d.geometry.Image( + depth.numpy() * + (1 - (np.array(dyn_mask) / 255.0).astype( + np.float32)[..., np.newaxis])) img_o3d = o3d.geometry.Image(img.numpy()) rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth( img_o3d, depth_o3d, - depth_trunc=dep_max-0.01, + depth_trunc=dep_max - 0.01, depth_scale=1, convert_rgb_to_intensity=False) intrin_o3d = o3d.camera.PinholeCameraIntrinsic( w, h, fx, fy, cx, cy) # Open3D uses world-to-camera extrinsics pts_frm = o3d.geometry.PointCloud.create_from_depth_image( - depth_o3d, intrinsic=intrin_o3d, extrinsic=np.linalg.inv(all_c2w[i]), depth_scale=1) - o3d.io.write_point_cloud(os.path.join(mesh_dir, f"layout_depth_frame_{str(i + 1)}.ply"), pts_frm) + depth_o3d, + intrinsic=intrin_o3d, + extrinsic=np.linalg.inv(all_c2w[i]), + depth_scale=1) + o3d.io.write_point_cloud( + os.path.join( + mesh_dir, + f"layout_depth_frame_{str(i + 1)}.ply"), + pts_frm) pts_clt.points = (o3d.utility.Vector3dVector( - np.concatenate((np.array(pts_clt.points), np.array(pts_frm.points)), + np.concatenate((np.array( + pts_clt.points), np.array(pts_frm.points)), axis=0))) all_vis_masks.append(vis_mask) all_images.append(img) - print('Training with', colored(self.num_imgs, 'blue'), 'shot by', colored(self.num_cams, 'blue'), 'cameras') + print('Training with', colored(self.num_imgs, 'blue'), 'shot by', + colored(self.num_cams, 'blue'), 'cameras') # pts_clt = pts_clt.voxel_down_sample(voxel_size=0.01) - o3d.io.write_point_cloud(os.path.join(mesh_dir, 'layout_depth_clt.ply'), pts_clt) + o3d.io.write_point_cloud( + os.path.join(mesh_dir, 'layout_depth_clt.ply'), pts_clt) # ICP registration on purpose of coordinates alignment path_pts_target = os.path.join(mesh_dir, 'dlo_map.ply') @@ -321,33 +339,42 @@ def setup(self, config, split): if not os.path.exists(os.path.join(mesh_dir, 'pose_crt.npy')): reg_p2p_trans = align_global(mesh_dir, path_pts_target) else: - reg_p2p_trans = np.load(os.path.join(mesh_dir, 'pose_crt.npy')) + reg_p2p_trans = np.load( + os.path.join(mesh_dir, 'pose_crt.npy')) all_c2w = np.matmul(reg_p2p_trans, all_c2w) else: - print(colored('Skip aligning to a given coordinates, continue', 'green')) + print( + colored('Skip aligning to other given coordinates, continue', + 'green')) # Load point cloud which might be scanned if self.config.apply_depth: - print(colored('point cloud fused by per-frame depth', 'green')) + print(colored('Point cloud fused by per-frame depth', 'green')) pts3d = np.array(pts_clt.points) - elif self.config.pcd_path: - assert os.path.isfile(self.config.pcd_path) - print(colored('point cloud loaded with a given fused cloud', 'green')) - if self.config.pcd_path.endswith('.npz'): + elif self.config.pcd_file: + self.pcd_path = os.path.join(self.config.root_dir, + self.config.pcd_file) + assert os.path.isfile(self.pcd_path) + print( + colored('Point cloud loaded with a given fused cloud', + 'green')) + if self.config.pcd_file.endswith('.npz'): pts3d = [] - pts3d_frames = np.load(self.config.pcd_path, allow_pickle=True)['pointcloud'].item() + pts3d_frames = np.load( + self.pcd_path, + allow_pickle=True)['pointcloud'].item() for id_pts, pts_frame in pts3d_frames.items(): pts3d.append(np.array(pts_frame)) pts3d = np.vstack(pts3d)[:, :3] - elif self.config.pcd_path.endswith('.ply'): + elif self.config.pcd_file.endswith('.ply'): pts3d = np.asarray( - o3d.io.read_point_cloud(self.config.pcd_path).points) + o3d.io.read_point_cloud(self.pcd_path).points) else: - print(colored('point cloud supervision not given', 'red')) - pts3d = None # [] + print(colored('Point cloud supervision not given', 'red')) + pts3d = None # [] """ pts3d = np.asarray( - o3d.io.read_point_cloud(self.config.pcd_path).points) + o3d.io.read_point_cloud(self.pcd_path).points) """ if pts3d is not None: @@ -356,73 +383,64 @@ def setup(self, config, split): pcd.points = o3d.utility.Vector3dVector(pts3d) pts3d = torch.from_numpy(pts3d).float() """ - if not os.path.exists(os.path.join(mesh_dir, 'layout_pcd_gt.ply')): + if not os.path.exists( + os.path.join(mesh_dir, 'layout_pcd_gt.ply')): # pcd = pcd.voxel_down_sample(voxel_size=0.002) pcd.estimate_normals( - search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.05, max_nn=20)) - o3d.io.write_point_cloud(os.path.join(mesh_dir, 'layout_pcd_gt.ply'), pcd) + search_param=o3d.geometry.KDTreeSearchParamHybrid( + radius=0.05, max_nn=20)) + o3d.io.write_point_cloud( + os.path.join(mesh_dir, 'layout_pcd_gt.ply'), pcd) else: - pcd.normals = o3d.io.read_point_cloud(os.path.join(mesh_dir, 'layout_pcd_gt.ply')).normals + pcd.normals = o3d.io.read_point_cloud( + os.path.join(mesh_dir, 'layout_pcd_gt.ply')).normals """ # Poisson surface on top of given GT points if os.path.exists(path_pts_target): - print(colored('Rasterizing on mesh presented in target coordinates', 'green')) - if os.path.exists(os.path.join(mesh_dir, 'layout_depth_clt_transform.ply')): - pcd = o3d.io.read_point_cloud(os.path.join(mesh_dir, 'layout_depth_clt_transform.ply')) - mesh_poisson_path = os.path.join(mesh_dir, 'layout_mesh_ps_transform.ply') + print( + colored( + 'Rasterizing on mesh presented in target coordinates', + 'green')) + if os.path.exists( + os.path.join(mesh_dir, + 'layout_depth_clt_transform.ply')): + pcd = o3d.io.read_point_cloud( + os.path.join(mesh_dir, + 'layout_depth_clt_transform.ply')) + mesh_poisson_path = os.path.join( + mesh_dir, 'layout_mesh_ps_transform.ply') else: pcd = o3d.io.read_point_cloud(path_pts_target) - mesh_poisson_path = os.path.join(mesh_dir, 'layout_mesh_ps_gt.ply') + mesh_poisson_path = os.path.join( + mesh_dir, 'layout_mesh_ps_gt.ply') else: - mesh_poisson_path = os.path.join(mesh_dir, 'layout_mesh_ps.ply') + mesh_poisson_path = os.path.join(mesh_dir, + 'layout_mesh_ps.ply') # mesh_poisson_path = os.path.join(mesh_dir, 'layout_mesh_nksr.ply') - print(colored('Rasterizing on mesh presented in original coordinates', 'green')) - pcd = o3d.io.read_point_cloud(os.path.join(mesh_dir, 'layout_depth_clt.ply')) + print( + colored( + 'Rasterizing on mesh presented in original coordinates', + 'green')) + pcd = o3d.io.read_point_cloud( + os.path.join(mesh_dir, 'layout_depth_clt.ply')) if not os.path.exists(mesh_poisson_path): pcd.estimate_normals( - search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.02, max_nn=20)) + search_param=o3d.geometry.KDTreeSearchParamHybrid( + radius=0.02, max_nn=20)) # pcd.orient_normals_towards_camera_location(camera_location=np.mean(all_c2w[:,:3,3] + np.array([0,0,100]), axis=0)) - print('Extracting', colored( - 'Poisson surface', - 'blue'), 'on top of given GT points') + print('Extracting', colored('Poisson surface', 'blue'), + 'on top of given GT points') with o3d.utility.VerbosityContextManager( o3d.utility.VerbosityLevel.Debug) as cm: # Poisson surface mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( - pcd, depth=11, linear_fit=True) # safe with 10 + pcd, depth=11, linear_fit=True) # safe with 10 densities = np.asarray(densities) - vertices_to_remove = densities < np.quantile(densities, 0.02) + vertices_to_remove = densities < np.quantile( + densities, 0.02) mesh.remove_vertices_by_mask(vertices_to_remove) - """ - print('Extracting', colored( - 'NKSR surface', - 'blue'), 'on top of given GT points') - import nksr - device = torch.device("cuda:0") - reconstructor = nksr.Reconstructor(device) - reconstructor.chunk_tmp_device = torch.device("cpu") - - input_xyz = torch.from_numpy(np.asarray(pcd.points)).float().to(device) - input_sensor = torch.from_numpy(np.asarray(pcd.normals)).float().to(device) - - field = reconstructor.reconstruct( - input_xyz, sensor=input_sensor, detail_level=None, - # Minor configs for better efficiency (not necessary) - approx_kernel_grad=True, solver_tol=1e-4, fused_mode=True, - # Chunked reconstruction (if OOM) - # chunk_size=51.2, - preprocess_fn=nksr.get_estimate_normal_preprocess_fn(64, 85.0) - ) - - # (Optional) Convert to CPU for mesh extraction - # field.to_("cpu") - # reconstructor.network.to("cpu") - - mesh_nksr = field.extract_dual_mesh(mise_iter=1) - mesh = o3d.geometry.TriangleMesh(mesh_nksr.v, mesh_nksr.f) - """ else: mesh = o3d.io.read_triangle_mesh(mesh_poisson_path) @@ -437,25 +455,30 @@ def setup(self, config, split): for i, d in enumerate(all_c2w): if self.split in ['train', 'val']: if self.config.apply_depth: - pts_clt = o3d.t.geometry.PointCloud.from_legacy(o3d.io.read_point_cloud(os.path.join(mesh_dir, 'layout_depth_clt.ply'))) - depth_reproj = pts_clt.project_to_depth_image(w, - h, - intrinsics[i], - np.linalg.inv(all_c2w[i]), - depth_scale=1.0, - depth_max=dep_max) - depth_reproj = Image.fromarray(np.asarray(depth_reproj.to_legacy())) + pts_clt = o3d.t.geometry.PointCloud.from_legacy( + o3d.io.read_point_cloud( + os.path.join(mesh_dir, + 'layout_depth_clt.ply'))) + depth_reproj = pts_clt.project_to_depth_image( + w, + h, + intrinsics[i], + np.linalg.inv(all_c2w[i]), + depth_scale=1.0, + depth_max=dep_max) + depth_reproj = Image.fromarray( + np.asarray(depth_reproj.to_legacy())) # save reprojected depth depth_reproj_path = os.path.join( - self.config.root_dir, - f"depth_reproj_{self.config.img_downscale}") + self.config.root_dir, + f"depth_reproj_{self.config.img_downscale}") os.makedirs(depth_reproj_path, exist_ok=True) # visualize the hit distance (depth) depth_reproj.save( os.path.join(depth_reproj_path, - fns[i].split("/")[-1][:-3] + 'tiff')) + fns[i].split("/")[-1][:-3] + 'tiff')) depth_reproj = TF.pil_to_tensor(depth_reproj).permute( 1, 2, 0) @@ -465,7 +488,11 @@ def setup(self, config, split): # Rays are 6D vectors with origin and ray direction. # Here we use a helper function to create rays - rays_mesh = scene.create_rays_pinhole(intrinsic_matrix=intrinsic, extrinsic_matrix=np.linalg.inv(all_c2w[i]), width_px=w, height_px=h) + rays_mesh = scene.create_rays_pinhole( + intrinsic_matrix=intrinsic, + extrinsic_matrix=np.linalg.inv(all_c2w[i]), + width_px=w, + height_px=h) # Compute the ray intersections. rays_rast = scene.cast_rays(rays_mesh) @@ -473,16 +500,13 @@ def setup(self, config, split): norm_rast = rays_rast['primitive_normals'].numpy() # add more confident depth values from the scaners - lidar_mask = (depth_reproj_mask != 0)[...,0] + lidar_mask = (depth_reproj_mask != 0)[..., 0] # depth_rast = np.where(lidar_mask, depth_reproj[...,0], depth_rast) - - # NOTE: foreground masks + # NOTE: foreground masks if self.config.apply_mask: mask_path = os.path.join( mask_dir, fns[i].split("/")[-1][:-3] + 'png') - # mask_paths = list(filter(os.path.exists, mask_paths)) - # assert len(mask_paths) == 1 if not os.path.exists(mask_path): os.makedirs(os.path.join( self.config.root_dir, @@ -492,7 +516,7 @@ def setup(self, config, split): mask_ori_dir, fns[i].split("/")[-1][:-3] + 'png') if os.path.exists(mask_path): - mask = Image.open(mask_path) # (H, W, 1) + mask = Image.open(mask_path) # (H, W, 1) if mask.size[0] != w or mask.size[1] != h: mask = mask.resize(img_wh, Image.NEAREST) mask.save( @@ -500,14 +524,22 @@ def setup(self, config, split): self.config.root_dir, f"sky_mask_{self.config.img_downscale}", fns[i].split("/")[-1][:-3] + 'png')) - mask = TF.to_tensor(mask)[0] # TF.pil_to_tensor does not rescale the input PIL mask + mask = TF.to_tensor( + mask + )[0] # TF.pil_to_tensor does not rescale the input PIL mask mask_fg = torch.ones_like(mask, device=img.device) - mask_fg[mask == 1] = 0 # check whether we use 1 or 255 + mask_fg[mask == + 1] = 0 # check whether we use 1 or 255 mask = mask_fg elif inf_mask is not None: mask = (1 - inf_mask.to(int)).to(bool) else: - print(colored('Foreground mask not available, need to disable apply_mask', 'red')) + print( + colored( + 'Foreground mask not available, need to disable apply_mask', + 'red')) + if mask.shape[-1] == 1: + mask = mask[..., 0] else: mask = torch.ones_like(img[..., 0], device=img.device) @@ -520,31 +552,45 @@ def setup(self, config, split): # save rasterized depth depth_rast_path = os.path.join( - self.config.root_dir, - f"depth_rast_{self.config.img_downscale}") + self.config.root_dir, + f"depth_rast_{self.config.img_downscale}") os.makedirs(depth_rast_path, exist_ok=True) - np.save(os.path.join(depth_rast_path, fns[i].split("/")[-1][:-3] + 'npy'), depth_rast) + np.save( + os.path.join(depth_rast_path, + fns[i].split("/")[-1][:-3] + 'npy'), + depth_rast) # save rasterized norm norm_rast_path = os.path.join( - self.config.root_dir, - f"norm_rast_{self.config.img_downscale}") + self.config.root_dir, + f"norm_rast_{self.config.img_downscale}") os.makedirs(norm_rast_path, exist_ok=True) - np.save(os.path.join(norm_rast_path, fns[i].split("/")[-1][:-3] + 'npy'), norm_rast) + np.save( + os.path.join(norm_rast_path, + fns[i].split("/")[-1][:-3] + 'npy'), + norm_rast) save_norm_dep_vis = True + depth_rast_quantize = Image.fromarray( + depth_rast.astype(np.uint16)) depth_rast = Image.fromarray(depth_rast) if save_norm_dep_vis: # visualize the hit distance (depth) depth_rast.save( - os.path.join(depth_rast_path, + os.path.join( + depth_rast_path, fns[i].split("/")[-1][:-3] + 'tiff')) + depth_rast_quantize.save( + os.path.join( + depth_rast_path, + fns[i].split("/")[-1][:-3] + 'png')) # save rasterized norm - norm_rast = Image.fromarray(((norm_rast + 1) * 128).astype(np.uint8)) + norm_rast = Image.fromarray( + ((norm_rast + 1) * 128).astype(np.uint8)) norm_rast_path = os.path.join( - self.config.root_dir, - f"norm_rast_{self.config.img_downscale}") + self.config.root_dir, + f"norm_rast_{self.config.img_downscale}") os.makedirs(norm_rast_path, exist_ok=True) norm_rast.save( os.path.join( @@ -553,17 +599,24 @@ def setup(self, config, split): # convert depth into tensor depth_rast = TF.pil_to_tensor(depth_rast).permute( - 1, 2, 0) # / self.config.cam_downscale + 1, 2, 0) depth_rast = depth_rast.to( self.rank - ) if self.config.load_data_on_gpu else depth_rast.cpu() - depth_rast_mask = (depth_rast > 0.0).to(bool) # trim points outside the contraction box off + ) if self.config.load_data_on_gpu else depth_rast.cpu( + ) + depth_rast_mask = (depth_rast > 0.0).to( + bool + ) # trim points outside the contraction box off depth_rast_mask *= mask[..., None].to(bool) - all_depths.append(depth_rast) # (h, w) + all_depths.append(depth_rast) # (h, w) all_depth_masks.append(depth_rast_mask) - all_fg_masks.append(mask) # (h, w) + all_fg_masks.append(mask) # (h, w) + + print(colored('Finish preprocessing.', 'green')) + if self.config.preprocess_only: + exit() directions = torch.stack(directions, dim=0) all_c2w = torch.tensor(all_c2w)[:, :3].float() @@ -590,7 +643,7 @@ def setup(self, config, split): t = -get_center(pts3d) cam_downscale = self.config.cam_downscale - all_depths = [dep/cam_downscale for dep in all_depths] + all_depths = [dep / cam_downscale for dep in all_depths] ColmapDatasetBase.properties = { 'num_imgs': self.num_imgs, @@ -626,7 +679,8 @@ def setup(self, config, split): self.all_c2w[:, :, 3], n_steps=self.config.n_test_traj_steps) """ n_steps = torch.arange(self.num_imgs // self.num_cams) - self.all_c2w = self.all_c2w[n_steps*self.num_cams] # front alone when there is 6 cameras + self.all_c2w = self.all_c2w[ + n_steps * self.num_cams] # front alone when there is 6 cameras self.config.n_test_traj_steps = int(self.num_imgs // self.num_cams) self.all_images = torch.zeros( (self.config.n_test_traj_steps, self.h, self.w, 3), @@ -644,13 +698,12 @@ def setup(self, config, split): (self.config.n_test_traj_steps, self.h, self.w), dtype=torch.bool) else: - self.all_images, self.all_vis_masks, self.all_fg_masks, self.all_depths, self.all_depth_masks = torch.stack( + self.all_images, self.all_vis_masks, self.all_fg_masks = torch.stack( self.all_images, dim=0).float(), torch.stack( self.all_vis_masks, dim=0).float(), torch.stack( - self.all_fg_masks, dim=0).float(), torch.stack( - self.all_depths, - dim=0).float(), torch.stack(self.all_depth_masks, - dim=0).bool() + self.all_fg_masks, dim=0).float() + if len(self.all_depths) > 0: + self.all_depths, self.all_depth_masks = torch.stack( self.all_depths, dim=0).float(), torch.stack(self.all_depth_masks, dim=0).bool() """ # for debug use from models.ray_utils import get_rays diff --git a/systems/nerf.py b/systems/nerf.py index f0a168c..2d6cb59 100644 --- a/systems/nerf.py +++ b/systems/nerf.py @@ -53,8 +53,10 @@ def preprocess_data(self, batch, stage): rays_o, rays_d = get_rays(directions, c2w) rgb = self.dataset.all_images[index, y, x].view(-1, self.dataset.all_images.shape[-1]).to(self.rank) fg_mask = self.dataset.all_fg_masks[index, y, x].view(-1).to(self.rank) - depth = self.dataset.all_depths[index, y, x].view(-1).to(self.rank) - depth_mask = self.dataset.all_depth_masks[index, y, x].view(-1).to(self.rank) + vis_mask = self.dataset.all_vis_masks[index, y, x].view(-1).to(self.rank) + if self.dataset.apply_depth: + depth = self.dataset.all_depths[index, y, x].view(-1).to(self.rank) + depth_mask = self.dataset.all_depth_masks[index, y, x].view(-1).to(self.rank) else: c2w = self.dataset.all_c2w[index][0] if self.dataset.directions.ndim == 3: # (H, W, 3) @@ -64,8 +66,10 @@ def preprocess_data(self, batch, stage): rays_o, rays_d = get_rays(directions, c2w) rgb = self.dataset.all_images[index].view(-1, self.dataset.all_images.shape[-1]).to(self.rank) fg_mask = self.dataset.all_fg_masks[index].view(-1).to(self.rank) - depth = self.dataset.all_depths[index].view(-1).to(self.rank) - depth_mask = self.dataset.all_depth_masks[index].view(-1).to(self.rank) + vis_mask = self.dataset.all_vis_masks[index].view(-1).to(self.rank) + if self.dataset.apply_depth: + depth = self.dataset.all_depths[index].view(-1).to(self.rank) + depth_mask = self.dataset.all_depth_masks[index].view(-1).to(self.rank) rays = torch.cat([rays_o, F.normalize(rays_d, p=2, dim=-1)], dim=-1) @@ -86,10 +90,14 @@ def preprocess_data(self, batch, stage): 'rays': rays, 'rgb': rgb, 'fg_mask': fg_mask, - 'depth': depth, - 'depth_mask': depth_mask, + 'vis_mask': vis_mask, 'camera_indices': index }) + if self.dataset.apply_depth: + batch.update({ + 'depth': depth, + 'depth_mask': depth_mask, + }) def training_step(self, batch, batch_idx): out = self(batch) @@ -101,7 +109,8 @@ def training_step(self, batch, batch_idx): train_num_rays = int(self.train_num_rays * (self.train_num_samples / out['num_samples'].sum().item())) self.train_num_rays = min(int(self.train_num_rays * 0.9 + train_num_rays * 0.1), self.config.model.max_train_num_rays) - loss_rgb = F.smooth_l1_loss(out['comp_rgb'][out['rays_valid'][...,0]], batch['rgb'][out['rays_valid'][...,0]]) + vis_mask = batch['vis_mask'][out['rays_valid'][...,0]][...,None] + loss_rgb = F.smooth_l1_loss(out['comp_rgb'][out['rays_valid'][...,0]] * vis_mask, batch['rgb'][out['rays_valid'][...,0]] * vis_mask) self.log('train/loss_rgb', loss_rgb) loss += loss_rgb * self.C(self.config.system.loss.lambda_rgb) diff --git a/utils/pose_utils.py b/utils/pose_utils.py index ff5f5a4..efd77d5 100644 --- a/utils/pose_utils.py +++ b/utils/pose_utils.py @@ -8,6 +8,7 @@ import torch.nn.functional as F import math + # get center def get_center(pts): center = pts.mean(0) @@ -111,7 +112,7 @@ def normalize_poses(poses, if center_est_method == 'point': # rotation if up_est_method == 'z-axis': - Rc = R_z[:3, :3].T + Rc = R_z[:3, :3].T elif up_est_method == 'no-change': Rc = R_z else: @@ -176,7 +177,7 @@ def normalize_poses(poses, else: # rotation and translation if up_est_method == 'z-axis': - Rc = R_z[:3, :3].T + Rc = R_z[:3, :3].T elif up_est_method == 'no-change': Rc = R_z else: @@ -217,6 +218,7 @@ def normalize_poses(poses, return poses_norm, pts, R, t, cam_downscale + def create_spheric_poses(cameras, n_steps=120): center = torch.as_tensor([0., 0., 0.], dtype=cameras.dtype, @@ -243,10 +245,12 @@ def create_spheric_poses(cameras, n_steps=120): return all_c2w +# global point cloud registration def align_global(mesh_dir, path_pts_target): # ICP registration on purpose of coordinates alignment - pts_clt = o3d.io.read_point_cloud(os.path.join(mesh_dir, 'layout_depth_clt.ply')) - + pts_clt = o3d.io.read_point_cloud( + os.path.join(mesh_dir, 'layout_depth_clt.ply')) + print(colored('Fused point cloud from depth images LOADED', 'blue')) pts_clt_transform = o3d.geometry.PointCloud() # GT lidar pcd @@ -254,22 +258,74 @@ def align_global(mesh_dir, path_pts_target): print(colored('Target point cloud LOADED', 'blue')) # global pts alignment threshold = 0.1 - trans_init = np.asarray([[1., 0., 0., 0.], - [0., 1., 0., 0.], - [0., 0., 1., 0.], - [0., 0., 0., 1.]]) + trans_init = np.asarray([[1., 0., 0., 0.], [0., 1., 0., 0.], + [0., 0., 1., 0.], [0., 0., 0., 1.]]) reg_p2p = o3d.pipelines.registration.registration_icp( pts_clt, pts_gt, threshold, trans_init, o3d.pipelines.registration.TransformationEstimationPointToPoint(), o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=2000)) print(colored('ICP registration done', 'blue')) - pts_clt_transform = copy.deepcopy(pts_clt).transform(reg_p2p.transformation) - np.save(os.path.join(mesh_dir, 'pose_crt.npy'), np.asarray(reg_p2p.transformation)) - o3d.io.write_point_cloud(os.path.join(mesh_dir, 'layout_depth_clt_transform.ply'), pts_clt_transform) + pts_clt_transform = copy.deepcopy(pts_clt).transform( + reg_p2p.transformation) + np.save(os.path.join(mesh_dir, 'pose_crt.npy'), + np.asarray(reg_p2p.transformation)) + o3d.io.write_point_cloud( + os.path.join(mesh_dir, 'layout_depth_clt_transform.ply'), + pts_clt_transform) return np.asarray(reg_p2p.transformation) + if __name__ == "__main__": root_dir = '/lpai/volumes/perception/sunhaiyang/world_model_dataset/liauto/easy_truth_240305/0501697614537981_linre_0318_6views_optim' mesh_dir = os.path.join(root_dir, 'meshes') path_pts_target = os.path.join(mesh_dir, 'dlo_map.ply') align_global(mesh_dir, path_pts_target) + + +# parsing camera parameters from a binary file +def bin2cams(work_space, bin_file): + """convert r3d bin data to camera ex/intrinsics""" + try: + import R3DParser + cam_intrinsics, cam_rotations, cam_centers, resolutions, fns = R3DParser.LoadR3DBinDataset( + work_space, bin_file) + except: + import R3DUtil + cam_intrinsics, cam_rotations, cam_centers, resolutions, fns = R3DUtil.LoadR3DBinDataset( + work_space, bin_file) + cam_intrinsics = cam_intrinsics.reshape(-1, 3, 3) + resolutions = resolutions.reshape(-1, 2) + cam_rotations = cam_rotations.reshape(-1, 3, 3) + cam_centers = cam_centers.reshape(-1, 3) + extrinsics = np.zeros((len(fns), 4, 4)).astype("float32") + extrinsics[:, :3, :3] = cam_rotations + extrinsics[:, :3, 3] = cam_centers + extrinsics[:, 3, 3] = 1 + + intrinsics = [] + for i in range(len(fns)): + intrinsic = { + 'width': 0, + 'height': 0, + 'f': 0, + 'cx': 0, + 'cy': 0, + 'b1': 0, + 'b2': 0, + 'k1': 0, + 'k2': 0, + 'k3': 0, + 'k4': 0, + 'p1': 0, + 'p2': 0, + 'p3': 0, + 'p4': 0, + } + cam_intrinsic = cam_intrinsics[i] + intrinsic["width"] = resolutions[i][0] + intrinsic["height"] = resolutions[i][1] + intrinsic["cx"] = cam_intrinsic[0, 2] # - resolutions[i][0] / 2 + intrinsic["cy"] = cam_intrinsic[1, 2] # - resolutions[i][1] / 2 + intrinsic["f"] = cam_intrinsic[0, 0] + intrinsics.append(copy.deepcopy(intrinsic)) + return extrinsics, intrinsics, fns