diff --git a/configs/3DGS.yaml b/configs/3DGS.yaml new file mode 100644 index 0000000..aca57dc --- /dev/null +++ b/configs/3DGS.yaml @@ -0,0 +1,157 @@ +seed: 0 +dataset: waymo/5cams + +# ------------- Trainer ------------ # +trainer: + type: models.trainers.SingleTrainer + optim: + num_iters: 30000 + use_grad_scaler: false + cache_buffer_freq: -1 # if > 0, use error based image sampler for training + render: + near_plane: 0.1 # near plane for rendering + far_plane: 10000000000.0 # far plane for rendering + antialiased: false # whether to use antialiasing for gaussian rendering, supported by gsplat kernel + packed: false # whether to use packed rendering, supported by gsplat kernel + absgrad: false # whether to use absolute gradient for rendering, supported by gsplat kernel + sparse_grad: false # whether to use sparse gradient for rendering, supported by gsplat kernel + batch_size: 1 # batch size for rendering, currently only support 1 + losses: + rgb: + w: 0.8 + ssim: + w: 0.2 + mask: + w: 1.0 + opacity_loss_type: bce # choose from [bce, safe_bce] + # bce: 二进制交叉熵(Binary Cross-Entropy) + # safe bce: 通过使用 clamp_min_ 和条件裁剪,此实现旨在提高数值稳定性,特别是当 x 接近 0 或 1 时,直接的 log(x) 或 log(1 - x) 可能导致数值问题。继承自pytorch的自动微分函数,计算成本大幅提高 + depth: + w: 0.1 # weight of depth loss + inverse_depth: False # whether to use inverse depth, NOTE that when set to True, must normalize=True + # 针对深度图的平滑性损失计算,使用了kornia.losses.inverse_depth_smoothness_loss + normalize: False # whether to normalize depth loss + loss_type: l1 # choose from ["l1", "l2"] + # reduction: mean_on_hw # choose from ["mean_on_hit", "mean_on_hw", "sum", "none"] + # refer to pvg codebase + # 根据不同的规约选项对损失(loss)进行处理。它支持多种数据类型和规约方法,可以使用可选的掩码(mask)来加权或选择性地应用规约操作。 + # mean: 如果没有掩码,直接返回所有元素的平均值;如果有掩码,返回加权的平均值(损失与掩码的元素乘积的平均值)。 + # mean_in_mask: 只在掩码指定的位置计算平均值,实际上是计算掩码内的加权和除以掩码值的和(确保分母不为零,避免除零错误)。 + # sum: 类似于平均值,但返回的是总和。 + # max/min: 返回损失中的最大值或最小值。如果有掩码,只在掩码非零的位置考虑损失值。 + # none: 返回原始损失,如果有掩码,返回损失与掩码的乘积。 + # weight_decay: 1.0 + # 调整L2对损失的贡献 + # opacity_entropy: + # w: 0.05 + # inverse_depth_smoothness: + # w: 0.001 + res_schedule: + double_steps: 3000 # training starts at 1/d resolution, every n steps this is doubled + downscale_times: 3 # at the beginning, resolution is 1/2^d, where d is this number + gaussian_optim_general_cfg: + xyz: + lr: 1.6e-04 + lr_final: 1.6e-06 + scale_factor: scene_radius # str or float, if "scene_scale", scale the learning rate by the scene scale + sh_dc: + lr: 0.0025 + sh_rest: + lr: 0.000125 + opacity: + lr: 0.005 + scaling: + lr: 0.005 + rotation: + lr: 0.001 + gaussian_ctrl_general_cfg: + warmup_steps: 500 # warmup steps for alpha + reset_alpha_interval: 3000 # reset alpha every n steps + refine_interval: 100 # refine gaussians every n steps + sh_degree_interval: 2000 # every n intervals turn on another sh degree + n_split_samples: 2 # number of samples to split gaussians into + # may differ in different models + reset_alpha_value: 0.01 # reset alpha to this value + densify_grad_thresh: 0.0005 # above this grad, gaussians are densified + densify_size_thresh: 0.003 # below this size, gaussians are *duplicated*, otherwise split + cull_alpha_thresh: 0.005 # threshold of opacity for culling gaussians + cull_scale_thresh: 0.5 # threshold of scale for culling gaussians + cull_screen_size: 0.15 # if a gaussian is more than this percent of screen space, cull it + split_screen_size: 0.05 # if a gaussian is more than this percent of screen space, split it + stop_screen_size_at: 4000 # stop culling/splitting at this step WRT screen size of gaussians + stop_split_at: 20000 # stop splitting at this step + sh_degree: 3 # sh degree for gaussians + +# ------------- Model ------------ # +model: + Background: + type: models.gaussians.VanillaGaussians + init: + from_lidar: + num_samples: 800_000 + return_color: True + near_randoms: 100_000 + far_randoms: 100_000 + reg: + sharp_shape_reg: + w: 1. + step_interval: 10 + max_gauss_ratio: 10. # threshold of ratio of gaussian max to min scale before applying regularization loss from the PhysGaussian paper + # ctrl: + # # pvg specific + # cycle_length: 0.2 + # time_interval: 0.02 + # enable_temporal_smoothing: True + # smooth_probability: 0.5 + # distribution_span: 1.5 # unit: frame interval + # betas_init: 0.1 + # densify_until_num_points: 3_000_000 + # densify_t_grad_thresh: 0.002 + # densify_t_size_thresh: 0.01 + # no_time_split: true + Sky: + type: models.modules.EnvLight + params: + resolution: 1024 + optim: + all: + lr: 0.01 + Affine: + type: models.modules.AffineTransform + params: + embedding_dim: 4 + base_mlp_layer_width: 64 + pixel_affine: False + optim: + all: + lr: 1.0e-5 + weight_decay: 1.0e-6 + CamPose: + type: models.modules.CameraOptModule + optim: + all: + lr: 1.0e-5 + weight_decay: 1.0e-6 + +# ------------- render ------------ # +render: + fps: 10 # frames per second for the main rendered output + render_full: True # whether to render full resolution videos + render_test: True # whether to render test set + render_novel: + traj_types: + #- front_center_interp # type of trajectory for novel view synthesis, /data/bing.han/drivestudio/utils/camera.py + - s_curve + #- three_key_poses + fps: 15 # frames per second for novel view rendering + vis_lidar: False # whether to visualize lidar points on ground truth images + vis_sky: False # whether to include "rgb_sky" and "rgb_sky_blend" in rendered keys + vis_error: False # whether to include "rgb_error_map" in rendered keys + +# ------------- logging ------------ # +logging: + vis_freq: 2000 # how often to visualize training stats + print_freq: 500 # how often to print training stats + saveckpt_freq: 15000 # how often to save checkpoints + save_seperate_video: True # whether to save seperate videos for each scene + export_freq: 10000 # how often to export ply files \ No newline at end of file diff --git a/configs/omnire.yaml b/configs/omnire.yaml index 33dd7bc..a62b497 100644 --- a/configs/omnire.yaml +++ b/configs/omnire.yaml @@ -275,4 +275,5 @@ logging: vis_freq: 2000 # how often to visualize training stats print_freq: 500 # how often to print training stats saveckpt_freq: 15000 # how often to save checkpoints - save_seperate_video: True # whether to save seperate videos for each scene \ No newline at end of file + save_seperate_video: True # whether to save seperate videos for each scene + export_freq: 10000 # how often to export ply files \ No newline at end of file diff --git a/models/gaussians/vanilla.py b/models/gaussians/vanilla.py index d59ff14..d94eeda 100644 --- a/models/gaussians/vanilla.py +++ b/models/gaussians/vanilla.py @@ -111,6 +111,9 @@ def colors(self): else: return torch.sigmoid(self._features_dc) @property + def means(self): + return self._means + @property def shs_0(self): return self._features_dc @property @@ -136,11 +139,20 @@ def get_scaling(self): else: return torch.exp(self._scales) @property + def scales(self): + return self._scales + @property def get_opacity(self): return torch.sigmoid(self._opacities) @property + def opacities(self): + return self._opacities + @property def get_quats(self): return self.quat_act(self._quats) + @property + def quats(self): + return self._quats def quat_act(self, x: torch.Tensor) -> torch.Tensor: return x / x.norm(dim=-1, keepdim=True) @@ -462,7 +474,10 @@ def load_state_dict(self, state_dict: Dict, **kwargs) -> str: msg = super().load_state_dict(state_dict, **kwargs) return msg + + def export_gaussians_to_ply(self, alpha_thresh: float) -> Dict: + import open3d as o3d means = self._means direct_color = self.colors @@ -471,4 +486,9 @@ def export_gaussians_to_ply(self, alpha_thresh: float) -> Dict: return { "positions": means[mask], "colors": direct_color[mask], - } \ No newline at end of file + } + # pcd = o3d.geometry.PointCloud() + # pcd.points = o3d.utility.Vector3dVector(means[mask]) + # pcd.colors = o3d.utility.Vector3dVector(direct_color[mask]) + # return pcd + diff --git a/tools/train.py b/tools/train.py index f86411a..b7aa3f6 100644 --- a/tools/train.py +++ b/tools/train.py @@ -10,7 +10,7 @@ import torch from tools.eval import do_evaluation -from utils.misc import import_str +from utils.misc import import_str, export_gaussians_to_ply from utils.backup import backup_project from utils.logging import MetricLogger, setup_logging from models.video_utils import render_images, save_videos @@ -300,6 +300,16 @@ def main(args): is_final=step == trainer.num_iters, ) + do_save_ply = step > 0 and ( + (step % cfg.logging.export_freq == 0) or (step == trainer.num_iters) + ) and (args.resume_from is None) + if do_save_ply: + # for class_name, model in trainer.models.items(): + export_gaussians_to_ply(trainer.models["Background"], cfg.log_dir, f"{args.run_name}_{step}_Background.ply") + # pcd = trainer.models["Background"].export_gaussians_to_ply(alpha_thresh=0) + # file_name = f"{args.run_name}_{step}_Background.ply" + # file_path = f"{cfg.log_dir}/{file_name}" + # o3d.io.write_point_cloud(file_path, pcd) #---------------------------------------------------------------------------- #------------------------ Cache Image Error --------------------------- if ( diff --git a/utils/misc.py b/utils/misc.py index 7d54513..a00f774 100644 --- a/utils/misc.py +++ b/utils/misc.py @@ -47,55 +47,157 @@ def export_points_to_ply( pcd.colors = o3d.utility.Vector3dVector(colors) o3d.io.write_point_cloud(save_path, pcd) -def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None): - model.eval() - filename = os.path.join(path, name) - map_to_tensors = {} - - with torch.no_grad(): - positions = model.means - if aabb is not None: - aabb = aabb.to(positions.device) - aabb_min, aabb_max = aabb[:3], aabb[3:] - aabb_center = (aabb_min + aabb_max) / 2 - aabb_sacle_max = (aabb_max - aabb_min).max() / 2 * 1.1 - vis_mask = torch.logical_and(positions >= aabb_min, positions < aabb_max).all(-1) - else: - aabb_center = positions.mean(0) - aabb_sacle_max = (positions - aabb_center).abs().max() * 1.1 - vis_mask = torch.ones_like(positions[:, 0], dtype=torch.bool) +# def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None): +# model.eval() +# filename = os.path.join(path, name) +# from collections import OrderedDict +# map_to_tensors = OrderedDict() + +# with torch.no_grad(): +# positions = model.means +# if aabb is not None: +# aabb = aabb.to(positions.device) +# aabb_min, aabb_max = aabb[:3], aabb[3:] +# aabb_center = (aabb_min + aabb_max) / 2 +# aabb_sacle_max = (aabb_max - aabb_min).max() / 2 * 1.1 +# vis_mask = torch.logical_and(positions >= aabb_min, positions < aabb_max).all(-1) +# else: +# aabb_center = positions.mean(0) +# aabb_sacle_max = (positions - aabb_center).abs().max() * 1.1 +# vis_mask = torch.ones_like(positions[:, 0], dtype=torch.bool) - positions = ((positions[vis_mask] - aabb_center) / aabb_sacle_max).cpu().numpy() - map_to_tensors["positions"] = o3d.core.Tensor(positions, o3d.core.float32) - map_to_tensors["normals"] = o3d.core.Tensor(np.zeros_like(positions), o3d.core.float32) +# positions = ((positions[vis_mask] - aabb_center) / aabb_sacle_max).cpu().numpy() +# map_to_tensors["positions"] = o3d.core.Tensor(positions, o3d.core.float32) +# map_to_tensors["normals"] = o3d.core.Tensor(np.zeros_like(positions), o3d.core.float32) - colors = model.colors[vis_mask].data.cpu().numpy() - map_to_tensors["colors"] = (colors * 255).astype(np.uint8) - for i in range(colors.shape[1]): - map_to_tensors[f"f_dc_{i}"] = colors[:, i : i + 1] +# colors = model.colors[vis_mask].data.cpu().numpy() +# # map_to_tensors["colors"] = (colors * 255).astype(np.uint8) +# for i in range(colors.shape[1]): +# map_to_tensors[f"f_dc_{i}"] = colors[:, i : i + 1] - shs = model.shs_rest[vis_mask].data.cpu().numpy() - if model.config.sh_degree > 0: - shs = shs.reshape((colors.shape[0], -1, 1)) - for i in range(shs.shape[-1]): - map_to_tensors[f"f_rest_{i}"] = shs[:, i] +# shs = model.shs_rest[vis_mask].data.cpu().numpy() +# # if model.sh_degree > 0: +# if 3 > 0: +# shs = shs.reshape((colors.shape[0], -1, 1)) +# for i in range(shs.shape[-2]): +# map_to_tensors[f"f_rest_{i}"] = shs[:, i] - map_to_tensors["opacity"] = model.opacities[vis_mask].data.cpu().numpy() +# opacity = model.get_opacity +# map_to_tensors["opacity"] = opacity[vis_mask].data.cpu().numpy() - scales = model.scales[vis_mask].data.cpu().unsqueeze(-1).numpy() - for i in range(3): - map_to_tensors[f"scale_{i}"] = scales[:, i] +# scales = model.get_scaling +# scales = scales[vis_mask].data.cpu().unsqueeze(-1).numpy() +# for i in range(3): +# map_to_tensors[f"scale_{i}"] = scales[:, i] - quats = model.quats[vis_mask].data.cpu().unsqueeze(-1).numpy() +# quats = model.get_quats +# quats = quats[vis_mask].data.cpu().unsqueeze(-1).numpy() - for i in range(4): - map_to_tensors[f"rot_{i}"] = quats[:, i] +# for i in range(4): +# map_to_tensors[f"rot_{i}"] = quats[:, i] + +# # pcd = o3d.t.geometry.PointCloud(map_to_tensors) +# # o3d.t.io.write_point_cloud(str(filename), pcd) + +# logger.info(f"Exported point cloud to {filename}, containing {vis_mask.sum().item()} points.") + +def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None): + model.eval() + filename = os.path.join(path, name) + + with torch.no_grad(): + positions = model.means + # if aabb is not None: + # aabb = aabb.to(positions.device) + # aabb_min, aabb_max = aabb[:3], aabb[3:] + # aabb_center = (aabb_min + aabb_max) / 2 + # aabb_scale_max = (aabb_max - aabb_min).max() / 2 * 1.1 + # vis_mask = torch.logical_and(positions >= aabb_min, positions < aabb_max).all(-1) + # else: + # aabb_center = positions.mean(0) + # aabb_scale_max = (positions - aabb_center).abs().max() * 1.1 + # vis_mask = torch.ones_like(positions[:, 0], dtype=torch.bool) + + # positions = ((positions[vis_mask] - aabb_center) / aabb_scale_max).cpu().numpy() + vis_mask = torch.ones_like(positions[:, 0], dtype=torch.bool) + positions = positions[vis_mask].cpu().numpy() + positions = positions + + normals = np.zeros_like(positions) # normals: 0 + + # colors = model.colors[vis_mask].data.cpu().numpy() + colors = model.shs_0[vis_mask].data.cpu().numpy() + for i in range(colors.shape[1]): + pass + + shs = model.shs_rest[vis_mask].data.cpu().numpy() + shs = shs.reshape((colors.shape[0], -1)) + + # opacity = model.get_opacity[vis_mask].data.cpu().numpy() + opacity = model.opacities[vis_mask].data.cpu().numpy() + # scales = model.get_scaling[vis_mask].data.cpu().numpy() + scales = model.scales[vis_mask].data.cpu().numpy() + # quats = model.get_quats[vis_mask].data.cpu().numpy() + quats = model.quats[vis_mask].data.cpu().numpy() + num_points = positions.shape[0] + + with open(filename, 'wb') as f: + f.write(b"ply\n") + f.write(b"format binary_little_endian 1.0\n") + f.write(b"comment Generated by opensplat\n") + f.write(f"element vertex {num_points}\n".encode()) + f.write(b"property float x\n") + f.write(b"property float y\n") + f.write(b"property float z\n") + f.write(b"property float nx\n") + f.write(b"property float ny\n") + f.write(b"property float nz\n") + + for i in range(colors.shape[1]): + f.write(f"property float f_dc_{i}\n".encode()) + + for i in range(shs.shape[1]): + f.write(f"property float f_rest_{i}\n".encode()) + + f.write(b"property float opacity\n") + f.write(b"property float scale_0\n") + f.write(b"property float scale_1\n") + f.write(b"property float scale_2\n") + f.write(b"property float rot_0\n") + f.write(b"property float rot_1\n") + f.write(b"property float rot_2\n") + f.write(b"property float rot_3\n") + f.write(b"end_header\n") + + data_list = [ + positions[:, 0], positions[:, 1], positions[:, 2], + normals[:, 0], normals[:, 1], normals[:, 2] + ] + + for i in range(colors.shape[1]): + data_list.append(colors[:, i]) + + for i in range(shs.shape[1]): + data_list.append(shs[:, i]) + + data_list.append(opacity) + + data_list.append(scales[:, 0]) + data_list.append(scales[:, 1]) + data_list.append(scales[:, 2]) + + data_list.append(quats[:, 0]) + data_list.append(quats[:, 1]) + data_list.append(quats[:, 2]) + data_list.append(quats[:, 3]) + + # stack and write to file + data = np.column_stack(data_list).astype(np.float32) + data.tofile(f) - pcd = o3d.t.geometry.PointCloud(map_to_tensors) - o3d.t.io.write_point_cloud(str(filename), pcd) - logger.info(f"Exported point cloud to {filename}, containing {vis_mask.sum().item()} points.") + def is_enabled() -> bool: """ Returns: