From 5fcba3858baeaee8bf1d4186f19ac76e8d4f6f5a Mon Sep 17 00:00:00 2001 From: Eisoc Date: Wed, 11 Sep 2024 14:37:11 +0800 Subject: [PATCH 1/5] init --- .gitmodules | 6 +- configs/3DGS.yaml | 278 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 281 insertions(+), 3 deletions(-) create mode 100644 configs/3DGS.yaml diff --git a/.gitmodules b/.gitmodules index 66d0dc0..b91d861 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ -[submodule "third_party/Humans4D"] - path = third_party/Humans4D - url = https://github.com/shubham-goel/4D-Humans.git +# [submodule "third_party/Humans4D"] +# path = third_party/Humans4D +# url = https://github.com/shubham-goel/4D-Humans.git diff --git a/configs/3DGS.yaml b/configs/3DGS.yaml new file mode 100644 index 0000000..33dd7bc --- /dev/null +++ b/configs/3DGS.yaml @@ -0,0 +1,278 @@ +seed: 0 +dataset: waymo/3cams + +# ------------- Trainer ------------ # +trainer: + type: models.trainers.MultiTrainer + optim: + num_iters: 30000 + use_grad_scaler: false + cache_buffer_freq: -1 # if > 0, use error based image sampler for training + render: + near_plane: 0.1 # near plane for rendering + far_plane: 10000000000.0 # far plane for rendering + antialiased: false # whether to use antialiasing for gaussian rendering, supported by gsplat kernel + packed: false # whether to use packed rendering, supported by gsplat kernel + absgrad: true # whether to use absolute gradient for rendering, supported by gsplat kernel + sparse_grad: false # whether to use sparse gradient for rendering, supported by gsplat kernel + batch_size: 1 # batch size for rendering, currently only support 1 + losses: + rgb: + w: 0.8 + ssim: + w: 0.2 + mask: + w: 0.05 + opacity_loss_type: bce # choose from [bce, safe_bce] + depth: + w: 0.01 # weight of depth loss + inverse_depth: False # whether to use inverse depth, NOTE that when set to True, must normalize=True + normalize: False # whether to normalize depth loss + loss_type: l1 # choose from ["l1", "l2"] + affine: + w: 0.00001 # weight of affine regularization + res_schedule: + double_steps: 250 # training starts at 1/d resolution, every n steps this is doubled + downscale_times: 2 # at the beginning, resolution is 1/2^d, where d is this number + gaussian_optim_general_cfg: + xyz: + lr: 1.6e-04 + lr_final: 1.6e-06 + scale_factor: scene_radius # str or float, if "scene_scale", scale the learning rate by the scene scale + sh_dc: + lr: 0.0025 + sh_rest: + lr: 0.000125 + opacity: + lr: 0.05 + scaling: + lr: 0.005 + rotation: + lr: 0.001 + gaussian_ctrl_general_cfg: + warmup_steps: 500 # warmup steps for alpha + reset_alpha_interval: 3000 # reset alpha every n steps + refine_interval: 100 # refine gaussians every n steps + sh_degree_interval: 1000 # every n intervals turn on another sh degree + n_split_samples: 2 # number of samples to split gaussians into + # may differ in different models + reset_alpha_value: 0.01 # reset alpha to this value + densify_grad_thresh: 0.0005 # above this grad, gaussians are densified + densify_size_thresh: 0.003 # below this size, gaussians are *duplicated*, otherwise split + cull_alpha_thresh: 0.005 # threshold of opacity for culling gaussians + cull_scale_thresh: 0.5 # threshold of scale for culling gaussians + cull_screen_size: 0.15 # if a gaussian is more than this percent of screen space, cull it + split_screen_size: 0.05 # if a gaussian is more than this percent of screen space, split it + stop_screen_size_at: 4000 # stop culling/splitting at this step WRT screen size of gaussians + stop_split_at: 15000 # stop splitting at this step + sh_degree: 3 # sh degree for gaussians + +# ------------- Model ------------ # +model: + Background: + type: models.gaussians.VanillaGaussians + init: + from_lidar: + num_samples: 800_000 + return_color: True + near_randoms: 100_000 + far_randoms: 100_000 + reg: + sharp_shape_reg: + w: 1. + step_interval: 10 + max_gauss_ratio: 10. # threshold of ratio of gaussian max to min scale before applying regularization loss from the PhysGaussian paper + RigidNodes: + type: models.nodes.RigidNodes + init: + instance_max_pts: 5000 # max initial points for each instance + only_moving: true # only optimize moving instances + traj_length_thres: 1.0 # threshold of trajectory length for moving instances + ctrl: + cull_scale_thresh: 0.1 + stop_screen_size_at: 30000 + stop_split_at: 20000 + cull_out_of_bound: true + reg: + sharp_shape_reg: + w: 1. + step_interval: 10 + max_gauss_ratio: 10. + temporal_smooth_reg: + trans: + w: 0.01 # optimal value may vary + smooth_range: 5 # no ablation + optim: + ins_rotation: + lr: 0.00001 + lr_final: 0.000005 + ins_translation: + lr: 0.0005 + lr_final: 0.0001 + DeformableNodes: + type: models.nodes.DeformableNodes + init: + instance_max_pts: 5000 + only_moving: true + traj_length_thres: 0.5 + networks: + D: 8 + W: 256 + embed_dim: 16 + x_multires: 10 # default 10 + t_multires: 10 # default 10 + deform_quat: True + deform_scale: False + ctrl: + reset_alpha_value: 0.1 + cull_scale_thresh: 0.1 + stop_screen_size_at: 30000 + stop_split_at: 20000 + sh_degree: 3 + cull_out_of_bound: false + use_deformgs_for_nonrigid: True # default True + use_deformgs_after: 3000 # default 3000 + stop_optimizing_canonical_xyz: True + reg: + sharp_shape_reg: + w: 1. + step_interval: 10 + max_gauss_ratio: 10. + out_of_bound_loss: + w: 1. + stop_after: 30000 + temporal_smooth_reg: + trans: + w: 0.01 # optimal value may vary + smooth_range: 5 # optimal value may vary + optim: + xyz: + lr: 1.6e-04 + lr_final: 1.6e-06 + scale_factor: 2. + embedding: + lr: 0.001 + lr_final: 0.0001 + deform_network: + lr: 1.6e-03 + lr_final: 1.6e-04 + scale_factor: 5. + ins_rotation: + lr: 0.00001 + lr_final: 0.000005 + ins_translation: + lr: 0.0005 + lr_final: 0.0001 + SMPLNodes: + type: models.nodes.SMPLNodes + init: + only_moving: true + traj_length_thres: 0.5 + reg: + sharp_shape_reg: + w: 1. + step_interval: 10 + max_gauss_ratio: 10. + temporal_smooth_reg: + trans: + w: 0.01 # optimal value may vary + smooth_range: 5 # range of frames to consider for smoothing + # joint_smooth: + # w: 0.01 # optimal value may vary + knn_reg: + # lambda_std_x: 0.0001 + lambda_std_q: 0.001 + lambda_std_s: 0.001 + lambda_std_o: 0.001 + lambda_std_shs_dc: 0.001 + lambda_std_shs_rest: 0.001 + max_s_square_reg: + w: 0.05 + x_offset: + w: 0.2 + voxel_deformer_reg: + lambda_std_w: 0.6 + lambda_std_w_rest: 0.5 + lambda_w_norm: 0.6 + lambda_w_rest_norm: 0.3 + optim: + xyz: + lr: 1.6e-04 + lr_final: 1.6e-06 + scale_factor: 3.0 + rotation: + lr: 0.005 + sh_dc: + lr: 0.005 + sh_rest: + lr: 0.00025 + ins_rotation: + lr: 0.00001 + lr_final: 0.000005 + ins_translation: + lr: 0.0005 + lr_final: 0.0001 + smpl_rotation: + lr: 0.00005 + lr_final: 0.00001 + w_dc_vox: + lr: 0.0001 + lr_final: 0.00001 + opt_after: 10000 + ctrl: + sh_degree: 1 + opacity_init_value: 0.99 + ball_gaussians: false + constrain_xyz_offset: false + knn_update_interval: 100 + knn_neighbors: 3 + use_voxel_deformer: true + freeze_x: false + freeze_o: false + freeze_q: false + freeze_s: false + freeze_shs_dc: false + freeze_shs_rest: false + Sky: + type: models.modules.EnvLight + params: + resolution: 1024 + optim: + all: + lr: 0.01 + Affine: + type: models.modules.AffineTransform + params: + embedding_dim: 4 + base_mlp_layer_width: 64 + pixel_affine: False + optim: + all: + lr: 1.0e-5 + weight_decay: 1.0e-6 + CamPose: + type: models.modules.CameraOptModule + optim: + all: + lr: 1.0e-5 + weight_decay: 1.0e-6 + +# ------------- render ------------ # +render: + fps: 10 # frames per second for the main rendered output + render_full: True # whether to render full resolution videos + render_test: True # whether to render test set + render_novel: + traj_types: + - front_center_interp # type of trajectory for novel view synthesis + fps: 24 # frames per second for novel view rendering + vis_lidar: False # whether to visualize lidar points on ground truth images + vis_sky: False # whether to include "rgb_sky" and "rgb_sky_blend" in rendered keys + vis_error: False # whether to include "rgb_error_map" in rendered keys + +# ------------- logging ------------ # +logging: + vis_freq: 2000 # how often to visualize training stats + print_freq: 500 # how often to print training stats + saveckpt_freq: 15000 # how often to save checkpoints + save_seperate_video: True # whether to save seperate videos for each scene \ No newline at end of file From ca1ca71ab6ca1f57f892d9933929cbc8589f4468 Mon Sep 17 00:00:00 2001 From: Eisoc Date: Wed, 18 Sep 2024 11:18:00 +0800 Subject: [PATCH 2/5] export background vanilla gaussian to ply --- **Note**: | 0 configs/3DGS.yaml | 214 +++++++----------------------- configs/datasets/waymo/5cams.yaml | 11 +- configs/omnire.yaml | 2 +- datasets/driving_dataset.py | 3 +- models/gaussians/vanilla.py | 22 ++- models/trainers/base.py | 3 +- models/trainers/single.py | 6 +- models/video_utils.py | 10 +- plyreader.py | 15 +++ tools/eval.py | 7 +- tools/train.py | 51 ++++++- utils/camera.py | 10 ++ utils/misc.py | 182 +++++++++++++++++++------ 14 files changed, 304 insertions(+), 232 deletions(-) create mode 100644 **Note**: create mode 100644 plyreader.py diff --git a/**Note**: b/**Note**: new file mode 100644 index 0000000..e69de29 diff --git a/configs/3DGS.yaml b/configs/3DGS.yaml index 33dd7bc..8f152f9 100644 --- a/configs/3DGS.yaml +++ b/configs/3DGS.yaml @@ -1,9 +1,9 @@ seed: 0 -dataset: waymo/3cams +dataset: waymo/5cams # ------------- Trainer ------------ # trainer: - type: models.trainers.MultiTrainer + type: models.trainers.SingleTrainer optim: num_iters: 30000 use_grad_scaler: false @@ -13,7 +13,7 @@ trainer: far_plane: 10000000000.0 # far plane for rendering antialiased: false # whether to use antialiasing for gaussian rendering, supported by gsplat kernel packed: false # whether to use packed rendering, supported by gsplat kernel - absgrad: true # whether to use absolute gradient for rendering, supported by gsplat kernel + absgrad: false # whether to use absolute gradient for rendering, supported by gsplat kernel sparse_grad: false # whether to use sparse gradient for rendering, supported by gsplat kernel batch_size: 1 # batch size for rendering, currently only support 1 losses: @@ -22,18 +22,33 @@ trainer: ssim: w: 0.2 mask: - w: 0.05 + w: 1.0 opacity_loss_type: bce # choose from [bce, safe_bce] + # bce: 二进制交叉熵(Binary Cross-Entropy) + # safe bce: 通过使用 clamp_min_ 和条件裁剪,此实现旨在提高数值稳定性,特别是当 x 接近 0 或 1 时,直接的 log(x) 或 log(1 - x) 可能导致数值问题。继承自pytorch的自动微分函数,计算成本大幅提高 depth: - w: 0.01 # weight of depth loss + w: 0.1 # weight of depth loss inverse_depth: False # whether to use inverse depth, NOTE that when set to True, must normalize=True + # 针对深度图的平滑性损失计算,使用了kornia.losses.inverse_depth_smoothness_loss normalize: False # whether to normalize depth loss loss_type: l1 # choose from ["l1", "l2"] - affine: - w: 0.00001 # weight of affine regularization + # reduction: mean_on_hw # choose from ["mean_on_hit", "mean_on_hw", "sum", "none"] + # refer to pvg codebase + # 根据不同的规约选项对损失(loss)进行处理。它支持多种数据类型和规约方法,可以使用可选的掩码(mask)来加权或选择性地应用规约操作。 + # mean: 如果没有掩码,直接返回所有元素的平均值;如果有掩码,返回加权的平均值(损失与掩码的元素乘积的平均值)。 + # mean_in_mask: 只在掩码指定的位置计算平均值,实际上是计算掩码内的加权和除以掩码值的和(确保分母不为零,避免除零错误)。 + # sum: 类似于平均值,但返回的是总和。 + # max/min: 返回损失中的最大值或最小值。如果有掩码,只在掩码非零的位置考虑损失值。 + # none: 返回原始损失,如果有掩码,返回损失与掩码的乘积。 + # weight_decay: 1.0 + # 调整L2对损失的贡献 + # opacity_entropy: + # w: 0.05 + # inverse_depth_smoothness: + # w: 0.001 res_schedule: - double_steps: 250 # training starts at 1/d resolution, every n steps this is doubled - downscale_times: 2 # at the beginning, resolution is 1/2^d, where d is this number + double_steps: 3000 # training starts at 1/d resolution, every n steps this is doubled + downscale_times: 3 # at the beginning, resolution is 1/2^d, where d is this number gaussian_optim_general_cfg: xyz: lr: 1.6e-04 @@ -44,7 +59,7 @@ trainer: sh_rest: lr: 0.000125 opacity: - lr: 0.05 + lr: 0.005 scaling: lr: 0.005 rotation: @@ -53,18 +68,18 @@ trainer: warmup_steps: 500 # warmup steps for alpha reset_alpha_interval: 3000 # reset alpha every n steps refine_interval: 100 # refine gaussians every n steps - sh_degree_interval: 1000 # every n intervals turn on another sh degree + sh_degree_interval: 2000 # every n intervals turn on another sh degree n_split_samples: 2 # number of samples to split gaussians into # may differ in different models reset_alpha_value: 0.01 # reset alpha to this value - densify_grad_thresh: 0.0005 # above this grad, gaussians are densified - densify_size_thresh: 0.003 # below this size, gaussians are *duplicated*, otherwise split - cull_alpha_thresh: 0.005 # threshold of opacity for culling gaussians + densify_grad_thresh: 0.0002 # above this grad, gaussians are densified + densify_size_thresh: 0.01 # below this size, gaussians are *duplicated*, otherwise split + cull_alpha_thresh: 0.005 # threshold of opacity for culling gaussians cull_scale_thresh: 0.5 # threshold of scale for culling gaussians cull_screen_size: 0.15 # if a gaussian is more than this percent of screen space, cull it split_screen_size: 0.05 # if a gaussian is more than this percent of screen space, split it stop_screen_size_at: 4000 # stop culling/splitting at this step WRT screen size of gaussians - stop_split_at: 15000 # stop splitting at this step + stop_split_at: 20000 # stop splitting at this step sh_degree: 3 # sh degree for gaussians # ------------- Model ------------ # @@ -82,157 +97,18 @@ model: w: 1. step_interval: 10 max_gauss_ratio: 10. # threshold of ratio of gaussian max to min scale before applying regularization loss from the PhysGaussian paper - RigidNodes: - type: models.nodes.RigidNodes - init: - instance_max_pts: 5000 # max initial points for each instance - only_moving: true # only optimize moving instances - traj_length_thres: 1.0 # threshold of trajectory length for moving instances - ctrl: - cull_scale_thresh: 0.1 - stop_screen_size_at: 30000 - stop_split_at: 20000 - cull_out_of_bound: true - reg: - sharp_shape_reg: - w: 1. - step_interval: 10 - max_gauss_ratio: 10. - temporal_smooth_reg: - trans: - w: 0.01 # optimal value may vary - smooth_range: 5 # no ablation - optim: - ins_rotation: - lr: 0.00001 - lr_final: 0.000005 - ins_translation: - lr: 0.0005 - lr_final: 0.0001 - DeformableNodes: - type: models.nodes.DeformableNodes - init: - instance_max_pts: 5000 - only_moving: true - traj_length_thres: 0.5 - networks: - D: 8 - W: 256 - embed_dim: 16 - x_multires: 10 # default 10 - t_multires: 10 # default 10 - deform_quat: True - deform_scale: False - ctrl: - reset_alpha_value: 0.1 - cull_scale_thresh: 0.1 - stop_screen_size_at: 30000 - stop_split_at: 20000 - sh_degree: 3 - cull_out_of_bound: false - use_deformgs_for_nonrigid: True # default True - use_deformgs_after: 3000 # default 3000 - stop_optimizing_canonical_xyz: True - reg: - sharp_shape_reg: - w: 1. - step_interval: 10 - max_gauss_ratio: 10. - out_of_bound_loss: - w: 1. - stop_after: 30000 - temporal_smooth_reg: - trans: - w: 0.01 # optimal value may vary - smooth_range: 5 # optimal value may vary - optim: - xyz: - lr: 1.6e-04 - lr_final: 1.6e-06 - scale_factor: 2. - embedding: - lr: 0.001 - lr_final: 0.0001 - deform_network: - lr: 1.6e-03 - lr_final: 1.6e-04 - scale_factor: 5. - ins_rotation: - lr: 0.00001 - lr_final: 0.000005 - ins_translation: - lr: 0.0005 - lr_final: 0.0001 - SMPLNodes: - type: models.nodes.SMPLNodes - init: - only_moving: true - traj_length_thres: 0.5 - reg: - sharp_shape_reg: - w: 1. - step_interval: 10 - max_gauss_ratio: 10. - temporal_smooth_reg: - trans: - w: 0.01 # optimal value may vary - smooth_range: 5 # range of frames to consider for smoothing - # joint_smooth: - # w: 0.01 # optimal value may vary - knn_reg: - # lambda_std_x: 0.0001 - lambda_std_q: 0.001 - lambda_std_s: 0.001 - lambda_std_o: 0.001 - lambda_std_shs_dc: 0.001 - lambda_std_shs_rest: 0.001 - max_s_square_reg: - w: 0.05 - x_offset: - w: 0.2 - voxel_deformer_reg: - lambda_std_w: 0.6 - lambda_std_w_rest: 0.5 - lambda_w_norm: 0.6 - lambda_w_rest_norm: 0.3 - optim: - xyz: - lr: 1.6e-04 - lr_final: 1.6e-06 - scale_factor: 3.0 - rotation: - lr: 0.005 - sh_dc: - lr: 0.005 - sh_rest: - lr: 0.00025 - ins_rotation: - lr: 0.00001 - lr_final: 0.000005 - ins_translation: - lr: 0.0005 - lr_final: 0.0001 - smpl_rotation: - lr: 0.00005 - lr_final: 0.00001 - w_dc_vox: - lr: 0.0001 - lr_final: 0.00001 - opt_after: 10000 - ctrl: - sh_degree: 1 - opacity_init_value: 0.99 - ball_gaussians: false - constrain_xyz_offset: false - knn_update_interval: 100 - knn_neighbors: 3 - use_voxel_deformer: true - freeze_x: false - freeze_o: false - freeze_q: false - freeze_s: false - freeze_shs_dc: false - freeze_shs_rest: false + # ctrl: + # # pvg specific + # cycle_length: 0.2 + # time_interval: 0.02 + # enable_temporal_smoothing: True + # smooth_probability: 0.5 + # distribution_span: 1.5 # unit: frame interval + # betas_init: 0.1 + # densify_until_num_points: 3_000_000 + # densify_t_grad_thresh: 0.002 + # densify_t_size_thresh: 0.01 + # no_time_split: true Sky: type: models.modules.EnvLight params: @@ -264,8 +140,10 @@ render: render_test: True # whether to render test set render_novel: traj_types: - - front_center_interp # type of trajectory for novel view synthesis - fps: 24 # frames per second for novel view rendering + #- front_center_interp # type of trajectory for novel view synthesis, /data/bing.han/drivestudio/utils/camera.py + - s_curve + #- three_key_poses + fps: 15 # frames per second for novel view rendering vis_lidar: False # whether to visualize lidar points on ground truth images vis_sky: False # whether to include "rgb_sky" and "rgb_sky_blend" in rendered keys vis_error: False # whether to include "rgb_error_map" in rendered keys diff --git a/configs/datasets/waymo/5cams.yaml b/configs/datasets/waymo/5cams.yaml index c791f63..8a71ece 100644 --- a/configs/datasets/waymo/5cams.yaml +++ b/configs/datasets/waymo/5cams.yaml @@ -10,21 +10,22 @@ data: data_root: data/waymo/processed/training # data root for the dataset dataset: waymo - scene_idx: 0 # which scene to use, [0, 798] for waymo's training set and [0, 849] for nuscenes's train/val sets, inclusive + scene_idx: 3 # which scene to use, [0, 798] for waymo's training set and [0, 849] for nuscenes's train/val sets, inclusive start_timestep: 0 # which timestep to start from - end_timestep: -1 # which timestep to end at, -1 means the last timestep + end_timestep: 50 # which timestep to end at, -1 means the last timestep preload_device: cpu # choose from ["cpu", "cuda"], cache the data on this device. pixel_source: # image source and object annotations type: datasets.waymo.waymo_sourceloader.WaymoPixelSource cameras: [0, 1, 2, 3, 4] # which cameras to use downscale_when_loading: [2, 2, 2, 2, 2] # the size of the images to load downscale: 1 # downscale factor wrt to the downscale_when_loading - undistort: True # whether to undistort the images + undistort: False # whether to undistort the images test_image_stride: 0 # use every Nth timestep for the test set. if 0, use all images for training and none for testing load_sky_mask: True # whether to load sky mask load_dynamic_mask: True # whether to load dynamic mask - load_objects: True # whether to load object bounding boxes - load_smpl: True # whether to load SMPL template for pedestrians + load_objects: False # whether to load object bounding boxes + load_smpl: False # whether to load SMPL template for pedestrians + # !!!!!!!忽略了人体姿态 sampler: # error based image sampler buffer_downscale: 8 # downscale factor for the buffer wrt load_size buffer_ratio: 0.5 # the percentage of images sampled according to the error buffer diff --git a/configs/omnire.yaml b/configs/omnire.yaml index 33dd7bc..2c0647e 100644 --- a/configs/omnire.yaml +++ b/configs/omnire.yaml @@ -1,5 +1,5 @@ seed: 0 -dataset: waymo/3cams +dataset: waymo/5cams # ------------- Trainer ------------ # trainer: diff --git a/datasets/driving_dataset.py b/datasets/driving_dataset.py index 38b3e07..7a83398 100644 --- a/datasets/driving_dataset.py +++ b/datasets/driving_dataset.py @@ -732,7 +732,8 @@ def get_novel_render_traj( novel_trajs = {} for traj_type in traj_types: - novel_trajs[traj_type] = get_interp_novel_trajectories( + # 非常重要,生成新视角的路线 + novel_trajs[traj_type] = get_interp_novel_trajectories( self.type, self.scene_idx, per_cam_poses, diff --git a/models/gaussians/vanilla.py b/models/gaussians/vanilla.py index d59ff14..e30b080 100644 --- a/models/gaussians/vanilla.py +++ b/models/gaussians/vanilla.py @@ -111,6 +111,9 @@ def colors(self): else: return torch.sigmoid(self._features_dc) @property + def means(self): + return self._means + @property def shs_0(self): return self._features_dc @property @@ -136,11 +139,20 @@ def get_scaling(self): else: return torch.exp(self._scales) @property + def pure_scaling(self): + return self._scales + @property def get_opacity(self): return torch.sigmoid(self._opacities) @property + def pure_opacity(self): + return self._opacities + @property def get_quats(self): return self.quat_act(self._quats) + @property + def pure_quats(self): + return self._quats def quat_act(self, x: torch.Tensor) -> torch.Tensor: return x / x.norm(dim=-1, keepdim=True) @@ -462,7 +474,10 @@ def load_state_dict(self, state_dict: Dict, **kwargs) -> str: msg = super().load_state_dict(state_dict, **kwargs) return msg + + def export_gaussians_to_ply(self, alpha_thresh: float) -> Dict: + import open3d as o3d means = self._means direct_color = self.colors @@ -471,4 +486,9 @@ def export_gaussians_to_ply(self, alpha_thresh: float) -> Dict: return { "positions": means[mask], "colors": direct_color[mask], - } \ No newline at end of file + } + # pcd = o3d.geometry.PointCloud() + # pcd.points = o3d.utility.Vector3dVector(means[mask]) + # pcd.colors = o3d.utility.Vector3dVector(direct_color[mask]) + # return pcd + diff --git a/models/trainers/base.py b/models/trainers/base.py index 2f959bf..9145920 100644 --- a/models/trainers/base.py +++ b/models/trainers/base.py @@ -490,7 +490,8 @@ def forward( # render sky sky_model = self.models['Sky'] outputs["rgb_sky"] = sky_model(image_infos) - outputs["rgb_sky_blend"] = outputs["rgb_sky"] * (1.0 - outputs["opacity"]) + outputs["rgb_sky_blend"] = outputs["rgb_sky"] * (1.0 - outputs["opacity"]) + # (1-alpha)代表透明度,高斯球越透明,则背景天空权重更大,更多的被填充进来 # affine transformation outputs["rgb"] = self.affine_transformation( diff --git a/models/trainers/single.py b/models/trainers/single.py index 3e07274..a59a3b2 100644 --- a/models/trainers/single.py +++ b/models/trainers/single.py @@ -52,7 +52,8 @@ def _init_models(self): device=self.device ) - if class_name in self.misc_classes_keys: + if class_name in self.misc_classes_keys: + # ['Sky', 'Affine', 'CamPose', 'CamPosePerturb'] model = import_str(model_cfg.type)( class_name=class_name, **model_cfg.get('params', {}), @@ -169,6 +170,7 @@ def forward( cam=processed_cam, image_ids=image_infos["img_idx"].flatten()[0] ) + # 重要!得到了GS球,合并了各种class # render gaussians outputs, _ = self.render_gaussians( @@ -190,7 +192,7 @@ def forward( outputs["rgb_gaussians"] + outputs["rgb_sky"] * (1.0 - outputs["opacity"]), image_infos ) - return outputs + return outputs,gs def compute_losses( self, diff --git a/models/video_utils.py b/models/video_utils.py index b3eae4b..45701e6 100644 --- a/models/video_utils.py +++ b/models/video_utils.py @@ -59,7 +59,7 @@ def render_images( vis_indices (Optional[List[int]], optional): Indices to visualize. Defaults to None. """ trainer.set_eval() - render_results = render( + render_results,gs_collection = render( dataset, trainer=trainer, compute_metrics=compute_metrics, @@ -81,7 +81,7 @@ def render_images( logger.info(f"\tVehicle-Only PSNR: {render_results['vehicle_psnr']:.4f}") logger.info(f"\tVehicle-Only SSIM: {render_results['vehicle_ssim']:.4f}") - return render_results + return render_results, gs_collection def render( @@ -137,7 +137,7 @@ def render( if isinstance(v, Tensor): cam_infos[k] = v.cuda(non_blocking=True) # render the image - results = trainer(image_infos, cam_infos) + results, gs_collection = trainer(image_infos, cam_infos) # 调用之前定义好的trainer # ------------- clip rgb ------------- # for k, v in results.items(): @@ -378,7 +378,7 @@ def render( results_dict["SMPLNodes_opacities"] = SMPLNodes_opacities if len(Dynamic_opacities) > 0: results_dict["Dynamic_opacities"] = Dynamic_opacities - return results_dict + return results_dict,gs_collection def save_videos( @@ -443,7 +443,7 @@ def render_novel_views(trainer, render_data: list, save_path: str, fps: int = 30 frame_data["image_infos"][key] = value.cuda(non_blocking=True) # Perform rendering - outputs = trainer( + outputs,_ = trainer( image_infos=frame_data["image_infos"], camera_infos=frame_data["cam_infos"], novel_view=True diff --git a/plyreader.py b/plyreader.py new file mode 100644 index 0000000..2e2bd8d --- /dev/null +++ b/plyreader.py @@ -0,0 +1,15 @@ +import numpy as np + +def verify_ply(filename): + with open(filename, 'rb') as f: + # 跳过头部 + while True: + line = f.readline() + if line.strip() == b"end_header": + break + + # 读取并解析数据 + data = np.fromfile(f, dtype=np.float32) # 根据具体写入的数据类型选择dtype + print(data[:]) + +verify_ply('/data/bing.han/Omni_Outputs/drivestudio/test/test_10000_Background.ply') \ No newline at end of file diff --git a/tools/eval.py b/tools/eval.py index df1e9f1..3c903b5 100644 --- a/tools/eval.py +++ b/tools/eval.py @@ -36,7 +36,7 @@ def do_evaluation( logger.info("Evaluating Pixels...") if dataset.test_image_set is not None and cfg.render.render_test: logger.info("Evaluating Test Set Pixels...") - render_results = render_images( + render_results, _ = render_images( trainer=trainer, dataset=dataset.test_image_set, compute_metrics=True, @@ -93,7 +93,7 @@ def do_evaluation( if cfg.render.render_full: logger.info("Evaluating Full Set...") - render_results = render_images( + render_results,_ = render_images( trainer=trainer, dataset=dataset.full_image_set, compute_metrics=True, @@ -147,9 +147,11 @@ def do_evaluation( del render_results, vis_frame_dict torch.cuda.empty_cache() + # 生成新视角图像 render_novel_cfg = cfg.render.get("render_novel", None) if render_novel_cfg is not None: logger.info("Rendering novel views...") + # traj定义运动路线,包括相机c2w render_traj = dataset.get_novel_render_traj( traj_types=render_novel_cfg.traj_types, target_frames=render_novel_cfg.get("frames", dataset.frame_num), @@ -160,6 +162,7 @@ def do_evaluation( for traj_type, traj in render_traj.items(): # Prepare rendering data + # 传入traj,使用数据集自带的相机内参, render_data = dataset.prepare_novel_view_render_data(traj) # Render and save video diff --git a/tools/train.py b/tools/train.py index f86411a..53dcf08 100644 --- a/tools/train.py +++ b/tools/train.py @@ -10,12 +10,14 @@ import torch from tools.eval import do_evaluation -from utils.misc import import_str +from utils.misc import import_str, export_gaussians_to_ply from utils.backup import backup_project from utils.logging import MetricLogger, setup_logging from models.video_utils import render_images, save_videos from datasets.driving_dataset import DrivingDataset +import open3d as o3d + logger = logging.getLogger() current_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()) @@ -107,11 +109,16 @@ def main(args): cfg = setup(args) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + import time + start_time_train = time.time() + time_last = time.time() + # build dataset dataset = DrivingDataset(data_cfg=cfg.data) # setup trainer - trainer = import_str(cfg.trainer.type)( + trainer = import_str(cfg.trainer.type)( + # 通过cfg设置trainer类型,在models/trainers里面定义,包括父类base和两个子类MultiTrainer和sigle trainer(vanilla,pvg,deformable) **cfg.trainer, num_timesteps=dataset.num_img_timesteps, model_config=cfg.model, @@ -185,6 +192,22 @@ def main(args): # ) for step in metric_logger.log_every(all_iters, cfg.logging.print_freq): + # timing module + if step > 0 and step % 5000 == 0: + time_now = time.time() + total_train_time = time_now - start_time_train + last5k_time = time_now - time_last + time_last = time_now + + minutes = int(last5k_time // 60) + seconds = int(last5k_time % 60) + print(f"Time of last 5k steps: {minutes} min {seconds} s") + + minutes = int(total_train_time // 60) + seconds = int(total_train_time % 60) + print(f"Total train time until now: {minutes} min {seconds} s") + + #---------------------------------------------------------------------------- #---------------------------- Validate ------------------------------ if step % cfg.logging.vis_freq == 0 and cfg.logging.vis_freq > 0: @@ -197,7 +220,8 @@ def main(args): dtype=int, )[step // cfg.logging.vis_freq] with torch.no_grad(): - render_results = render_images( + render_results, gs_collection = render_images( + # 入口,开始渲染 trainer=trainer, dataset=dataset.full_image_set, compute_metrics=True, @@ -207,6 +231,7 @@ def main(args): for i in range(dataset.pixel_source.num_cams) ], ) + if args.enable_wandb: wandb.log( { @@ -254,7 +279,18 @@ def main(args): cam_infos[k] = v.cuda(non_blocking=True) # forward & backward - outputs = trainer(image_infos, cam_infos) + outputs, gs_collection = trainer(image_infos, cam_infos) + + # if step > 0 and step % args.save_ply == 0: + if step > 0 and step % 10000 == 0: + # for class_name, model in trainer.models.items(): + export_gaussians_to_ply(trainer.models["Background"], cfg.log_dir, f"{args.run_name}_{step}_Background.ply") + # pcd = trainer.models["Background"].export_gaussians_to_ply(alpha_thresh=0) + # file_name = f"{args.run_name}_{step}_Background.ply" + # file_path = f"{cfg.log_dir}/{file_name}" + # o3d.io.write_point_cloud(file_path, pcd) + print(f"{args.run_name}_{step}_Background.ply stored in {cfg.log_dir}") + trainer.update_visibility_filter() loss_dict = trainer.compute_losses( @@ -312,7 +348,7 @@ def main(args): dataset.pixel_source.update_downscale_factor( 1 / dataset.pixel_source.buffer_downscale ) - render_results = render_images( + render_results, _ = render_images( trainer=trainer, dataset=dataset.full_image_set, ) @@ -363,7 +399,7 @@ def main(args): parser.add_argument("--enable_wandb", action="store_true", help="enable wandb logging") parser.add_argument("--entity", default="ziyc", type=str, help="wandb entity name") parser.add_argument("--project", default="drivestudio", type=str, help="wandb project name, also used to enhance log_dir") - parser.add_argument("--run_name", default="omnire", type=str, help="wandb run name, also used to enhance log_dir") + parser.add_argument("--run_name", default="test", type=str, help="wandb run name, also used to enhance log_dir") # viewer parser.add_argument("--enable_viewer", action="store_true", help="enable viewer") @@ -371,6 +407,7 @@ def main(args): # misc parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) - + parser.add_argument("--save_ply", default=2000, type=int, help="save ply every n steps", ) + args = parser.parse_args() final_step = main(args) diff --git a/utils/camera.py b/utils/camera.py index 0a31160..7ca4efb 100644 --- a/utils/camera.py +++ b/utils/camera.py @@ -64,6 +64,7 @@ def get_interp_novel_trajectories( ) -> torch.Tensor: original_frames = per_cam_poses[list(per_cam_poses.keys())[0]].shape[0] + # 在这里定义新轨迹! trajectory_generators = { "front_center_interp": front_center_interp, "s_curve": s_curve, @@ -87,6 +88,11 @@ def s_curve( dataset_type: str, per_cam_poses: Dict[int, torch.Tensor], original_frames: int, target_frames: int ) -> torch.Tensor: """Create an S-shaped trajectory using the front three cameras.""" + # ID 0选取第一帧。 + # 从第二个摄像机(ID 1)选取第一四分之一位置的帧。 + # 再次从前置中心摄像机(ID 0)选取中间帧(一半位置)。 + # 从第三个摄像机(ID 2)选取第三四分之一位置的帧。 + # 最后,再从前置中心摄像机(ID 0)选取最后一帧。 assert all(cam in per_cam_poses.keys() for cam in [0, 1, 2]), "Front three cameras (IDs 0, 1, 2) are required for s_curve" key_poses = torch.cat([ per_cam_poses[0][0:1], @@ -103,6 +109,10 @@ def three_key_poses_trajectory( original_frames: int, target_frames: int ) -> torch.Tensor: + # 第一个关键姿态:前置中心摄像机的第一帧 + # 中间关键姿态:随机选择摄像机1或2,获取该摄像机中间帧的姿态。 + # 使用插值方法(球面线性插值,SLERP)在起始姿态和中间姿态之间插值,生成中间关键姿态。插值仅应用于旋转部分,平移部分直接使用中间帧的数据。 + # 第三个关键姿态:前置中心摄像机的最后一帧。 """ Create a trajectory using three key poses: 1. First frame of front center camera diff --git a/utils/misc.py b/utils/misc.py index 7d54513..f995a66 100644 --- a/utils/misc.py +++ b/utils/misc.py @@ -47,55 +47,159 @@ def export_points_to_ply( pcd.colors = o3d.utility.Vector3dVector(colors) o3d.io.write_point_cloud(save_path, pcd) -def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None): - model.eval() - filename = os.path.join(path, name) - map_to_tensors = {} - - with torch.no_grad(): - positions = model.means - if aabb is not None: - aabb = aabb.to(positions.device) - aabb_min, aabb_max = aabb[:3], aabb[3:] - aabb_center = (aabb_min + aabb_max) / 2 - aabb_sacle_max = (aabb_max - aabb_min).max() / 2 * 1.1 - vis_mask = torch.logical_and(positions >= aabb_min, positions < aabb_max).all(-1) - else: - aabb_center = positions.mean(0) - aabb_sacle_max = (positions - aabb_center).abs().max() * 1.1 - vis_mask = torch.ones_like(positions[:, 0], dtype=torch.bool) +# def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None): +# model.eval() +# filename = os.path.join(path, name) +# from collections import OrderedDict +# map_to_tensors = OrderedDict() + +# with torch.no_grad(): +# positions = model.means +# if aabb is not None: +# aabb = aabb.to(positions.device) +# aabb_min, aabb_max = aabb[:3], aabb[3:] +# aabb_center = (aabb_min + aabb_max) / 2 +# aabb_sacle_max = (aabb_max - aabb_min).max() / 2 * 1.1 +# vis_mask = torch.logical_and(positions >= aabb_min, positions < aabb_max).all(-1) +# else: +# aabb_center = positions.mean(0) +# aabb_sacle_max = (positions - aabb_center).abs().max() * 1.1 +# vis_mask = torch.ones_like(positions[:, 0], dtype=torch.bool) - positions = ((positions[vis_mask] - aabb_center) / aabb_sacle_max).cpu().numpy() - map_to_tensors["positions"] = o3d.core.Tensor(positions, o3d.core.float32) - map_to_tensors["normals"] = o3d.core.Tensor(np.zeros_like(positions), o3d.core.float32) +# positions = ((positions[vis_mask] - aabb_center) / aabb_sacle_max).cpu().numpy() +# map_to_tensors["positions"] = o3d.core.Tensor(positions, o3d.core.float32) +# map_to_tensors["normals"] = o3d.core.Tensor(np.zeros_like(positions), o3d.core.float32) - colors = model.colors[vis_mask].data.cpu().numpy() - map_to_tensors["colors"] = (colors * 255).astype(np.uint8) - for i in range(colors.shape[1]): - map_to_tensors[f"f_dc_{i}"] = colors[:, i : i + 1] +# colors = model.colors[vis_mask].data.cpu().numpy() +# # map_to_tensors["colors"] = (colors * 255).astype(np.uint8) +# for i in range(colors.shape[1]): +# map_to_tensors[f"f_dc_{i}"] = colors[:, i : i + 1] - shs = model.shs_rest[vis_mask].data.cpu().numpy() - if model.config.sh_degree > 0: - shs = shs.reshape((colors.shape[0], -1, 1)) - for i in range(shs.shape[-1]): - map_to_tensors[f"f_rest_{i}"] = shs[:, i] +# shs = model.shs_rest[vis_mask].data.cpu().numpy() +# # if model.sh_degree > 0: +# if 3 > 0: +# shs = shs.reshape((colors.shape[0], -1, 1)) +# for i in range(shs.shape[-2]): +# map_to_tensors[f"f_rest_{i}"] = shs[:, i] - map_to_tensors["opacity"] = model.opacities[vis_mask].data.cpu().numpy() +# opacity = model.get_opacity +# map_to_tensors["opacity"] = opacity[vis_mask].data.cpu().numpy() - scales = model.scales[vis_mask].data.cpu().unsqueeze(-1).numpy() - for i in range(3): - map_to_tensors[f"scale_{i}"] = scales[:, i] +# scales = model.get_scaling +# scales = scales[vis_mask].data.cpu().unsqueeze(-1).numpy() +# for i in range(3): +# map_to_tensors[f"scale_{i}"] = scales[:, i] - quats = model.quats[vis_mask].data.cpu().unsqueeze(-1).numpy() +# quats = model.get_quats +# quats = quats[vis_mask].data.cpu().unsqueeze(-1).numpy() - for i in range(4): - map_to_tensors[f"rot_{i}"] = quats[:, i] +# for i in range(4): +# map_to_tensors[f"rot_{i}"] = quats[:, i] + +# # pcd = o3d.t.geometry.PointCloud(map_to_tensors) +# # o3d.t.io.write_point_cloud(str(filename), pcd) + +# logger.info(f"Exported point cloud to {filename}, containing {vis_mask.sum().item()} points.") + +def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None): + model.eval() + filename = os.path.join(path, name) + + with torch.no_grad(): + positions = model.means + # if aabb is not None: + # aabb = aabb.to(positions.device) + # aabb_min, aabb_max = aabb[:3], aabb[3:] + # aabb_center = (aabb_min + aabb_max) / 2 + # aabb_scale_max = (aabb_max - aabb_min).max() / 2 * 1.1 + # vis_mask = torch.logical_and(positions >= aabb_min, positions < aabb_max).all(-1) + # else: + # aabb_center = positions.mean(0) + # aabb_scale_max = (positions - aabb_center).abs().max() * 1.1 + # vis_mask = torch.ones_like(positions[:, 0], dtype=torch.bool) + + # positions = ((positions[vis_mask] - aabb_center) / aabb_scale_max).cpu().numpy() + vis_mask = torch.ones_like(positions[:, 0], dtype=torch.bool) + positions = positions[vis_mask].cpu().numpy() + positions = positions + + normals = np.zeros_like(positions) # normals: 0 + + # colors = model.colors[vis_mask].data.cpu().numpy() + colors = model.shs_0[vis_mask].data.cpu().numpy() + for i in range(colors.shape[1]): + pass + + shs = model.shs_rest[vis_mask].data.cpu().numpy() + shs = shs.reshape((colors.shape[0], -1)) + + # opacity = model.get_opacity[vis_mask].data.cpu().numpy() + opacity = model.pure_opacity[vis_mask].data.cpu().numpy() + # scales = model.get_scaling[vis_mask].data.cpu().numpy() + scales = model.pure_scaling[vis_mask].data.cpu().numpy() + # quats = model.get_quats[vis_mask].data.cpu().numpy() + quats = model.pure_quats[vis_mask].data.cpu().numpy() + num_points = positions.shape[0] + + with open(filename, 'wb') as f: + # 写入 PLY 文件头 + f.write(b"ply\n") + f.write(b"format binary_little_endian 1.0\n") + f.write(b"comment Generated by opensplat\n") + f.write(f"element vertex {num_points}\n".encode()) + f.write(b"property float x\n") + f.write(b"property float y\n") + f.write(b"property float z\n") + f.write(b"property float nx\n") + f.write(b"property float ny\n") + f.write(b"property float nz\n") + + for i in range(colors.shape[1]): + f.write(f"property float f_dc_{i}\n".encode()) + + for i in range(shs.shape[1]): + f.write(f"property float f_rest_{i}\n".encode()) + + f.write(b"property float opacity\n") + f.write(b"property float scale_0\n") + f.write(b"property float scale_1\n") + f.write(b"property float scale_2\n") + f.write(b"property float rot_0\n") + f.write(b"property float rot_1\n") + f.write(b"property float rot_2\n") + f.write(b"property float rot_3\n") + f.write(b"end_header\n") + + # 准备数据 + data_list = [ + positions[:, 0], positions[:, 1], positions[:, 2], + normals[:, 0], normals[:, 1], normals[:, 2] + ] + + for i in range(colors.shape[1]): + data_list.append(colors[:, i]) + + for i in range(shs.shape[1]): + data_list.append(shs[:, i]) + + data_list.append(opacity) + + data_list.append(scales[:, 0]) + data_list.append(scales[:, 1]) + data_list.append(scales[:, 2]) + + data_list.append(quats[:, 0]) + data_list.append(quats[:, 1]) + data_list.append(quats[:, 2]) + data_list.append(quats[:, 3]) + + # 将数据堆叠并写入文件 + data = np.column_stack(data_list).astype(np.float32) + data.tofile(f) - pcd = o3d.t.geometry.PointCloud(map_to_tensors) - o3d.t.io.write_point_cloud(str(filename), pcd) - logger.info(f"Exported point cloud to {filename}, containing {vis_mask.sum().item()} points.") + def is_enabled() -> bool: """ Returns: From 1a8e290b74ef675a57ad372b33d16d804bc3a685 Mon Sep 17 00:00:00 2001 From: Eisoc Date: Fri, 20 Sep 2024 10:25:01 +0800 Subject: [PATCH 3/5] cfg changes --- configs/3DGS.yaml | 4 +- configs/3DGS_test.yaml | 156 ++++++++++++++++++++++++++++++ configs/datasets/waymo/5cams.yaml | 4 +- tools/eval.py | 24 ++--- 4 files changed, 172 insertions(+), 16 deletions(-) create mode 100644 configs/3DGS_test.yaml diff --git a/configs/3DGS.yaml b/configs/3DGS.yaml index 8f152f9..1acaa01 100644 --- a/configs/3DGS.yaml +++ b/configs/3DGS.yaml @@ -72,8 +72,8 @@ trainer: n_split_samples: 2 # number of samples to split gaussians into # may differ in different models reset_alpha_value: 0.01 # reset alpha to this value - densify_grad_thresh: 0.0002 # above this grad, gaussians are densified - densify_size_thresh: 0.01 # below this size, gaussians are *duplicated*, otherwise split + densify_grad_thresh: 0.0005 # above this grad, gaussians are densified + densify_size_thresh: 0.003 # below this size, gaussians are *duplicated*, otherwise split cull_alpha_thresh: 0.005 # threshold of opacity for culling gaussians cull_scale_thresh: 0.5 # threshold of scale for culling gaussians cull_screen_size: 0.15 # if a gaussian is more than this percent of screen space, cull it diff --git a/configs/3DGS_test.yaml b/configs/3DGS_test.yaml new file mode 100644 index 0000000..1acaa01 --- /dev/null +++ b/configs/3DGS_test.yaml @@ -0,0 +1,156 @@ +seed: 0 +dataset: waymo/5cams + +# ------------- Trainer ------------ # +trainer: + type: models.trainers.SingleTrainer + optim: + num_iters: 30000 + use_grad_scaler: false + cache_buffer_freq: -1 # if > 0, use error based image sampler for training + render: + near_plane: 0.1 # near plane for rendering + far_plane: 10000000000.0 # far plane for rendering + antialiased: false # whether to use antialiasing for gaussian rendering, supported by gsplat kernel + packed: false # whether to use packed rendering, supported by gsplat kernel + absgrad: false # whether to use absolute gradient for rendering, supported by gsplat kernel + sparse_grad: false # whether to use sparse gradient for rendering, supported by gsplat kernel + batch_size: 1 # batch size for rendering, currently only support 1 + losses: + rgb: + w: 0.8 + ssim: + w: 0.2 + mask: + w: 1.0 + opacity_loss_type: bce # choose from [bce, safe_bce] + # bce: 二进制交叉熵(Binary Cross-Entropy) + # safe bce: 通过使用 clamp_min_ 和条件裁剪,此实现旨在提高数值稳定性,特别是当 x 接近 0 或 1 时,直接的 log(x) 或 log(1 - x) 可能导致数值问题。继承自pytorch的自动微分函数,计算成本大幅提高 + depth: + w: 0.1 # weight of depth loss + inverse_depth: False # whether to use inverse depth, NOTE that when set to True, must normalize=True + # 针对深度图的平滑性损失计算,使用了kornia.losses.inverse_depth_smoothness_loss + normalize: False # whether to normalize depth loss + loss_type: l1 # choose from ["l1", "l2"] + # reduction: mean_on_hw # choose from ["mean_on_hit", "mean_on_hw", "sum", "none"] + # refer to pvg codebase + # 根据不同的规约选项对损失(loss)进行处理。它支持多种数据类型和规约方法,可以使用可选的掩码(mask)来加权或选择性地应用规约操作。 + # mean: 如果没有掩码,直接返回所有元素的平均值;如果有掩码,返回加权的平均值(损失与掩码的元素乘积的平均值)。 + # mean_in_mask: 只在掩码指定的位置计算平均值,实际上是计算掩码内的加权和除以掩码值的和(确保分母不为零,避免除零错误)。 + # sum: 类似于平均值,但返回的是总和。 + # max/min: 返回损失中的最大值或最小值。如果有掩码,只在掩码非零的位置考虑损失值。 + # none: 返回原始损失,如果有掩码,返回损失与掩码的乘积。 + # weight_decay: 1.0 + # 调整L2对损失的贡献 + # opacity_entropy: + # w: 0.05 + # inverse_depth_smoothness: + # w: 0.001 + res_schedule: + double_steps: 3000 # training starts at 1/d resolution, every n steps this is doubled + downscale_times: 3 # at the beginning, resolution is 1/2^d, where d is this number + gaussian_optim_general_cfg: + xyz: + lr: 1.6e-04 + lr_final: 1.6e-06 + scale_factor: scene_radius # str or float, if "scene_scale", scale the learning rate by the scene scale + sh_dc: + lr: 0.0025 + sh_rest: + lr: 0.000125 + opacity: + lr: 0.005 + scaling: + lr: 0.005 + rotation: + lr: 0.001 + gaussian_ctrl_general_cfg: + warmup_steps: 500 # warmup steps for alpha + reset_alpha_interval: 3000 # reset alpha every n steps + refine_interval: 100 # refine gaussians every n steps + sh_degree_interval: 2000 # every n intervals turn on another sh degree + n_split_samples: 2 # number of samples to split gaussians into + # may differ in different models + reset_alpha_value: 0.01 # reset alpha to this value + densify_grad_thresh: 0.0005 # above this grad, gaussians are densified + densify_size_thresh: 0.003 # below this size, gaussians are *duplicated*, otherwise split + cull_alpha_thresh: 0.005 # threshold of opacity for culling gaussians + cull_scale_thresh: 0.5 # threshold of scale for culling gaussians + cull_screen_size: 0.15 # if a gaussian is more than this percent of screen space, cull it + split_screen_size: 0.05 # if a gaussian is more than this percent of screen space, split it + stop_screen_size_at: 4000 # stop culling/splitting at this step WRT screen size of gaussians + stop_split_at: 20000 # stop splitting at this step + sh_degree: 3 # sh degree for gaussians + +# ------------- Model ------------ # +model: + Background: + type: models.gaussians.VanillaGaussians + init: + from_lidar: + num_samples: 800_000 + return_color: True + near_randoms: 100_000 + far_randoms: 100_000 + reg: + sharp_shape_reg: + w: 1. + step_interval: 10 + max_gauss_ratio: 10. # threshold of ratio of gaussian max to min scale before applying regularization loss from the PhysGaussian paper + # ctrl: + # # pvg specific + # cycle_length: 0.2 + # time_interval: 0.02 + # enable_temporal_smoothing: True + # smooth_probability: 0.5 + # distribution_span: 1.5 # unit: frame interval + # betas_init: 0.1 + # densify_until_num_points: 3_000_000 + # densify_t_grad_thresh: 0.002 + # densify_t_size_thresh: 0.01 + # no_time_split: true + Sky: + type: models.modules.EnvLight + params: + resolution: 1024 + optim: + all: + lr: 0.01 + Affine: + type: models.modules.AffineTransform + params: + embedding_dim: 4 + base_mlp_layer_width: 64 + pixel_affine: False + optim: + all: + lr: 1.0e-5 + weight_decay: 1.0e-6 + CamPose: + type: models.modules.CameraOptModule + optim: + all: + lr: 1.0e-5 + weight_decay: 1.0e-6 + +# ------------- render ------------ # +render: + fps: 10 # frames per second for the main rendered output + render_full: True # whether to render full resolution videos + render_test: True # whether to render test set + render_novel: + traj_types: + #- front_center_interp # type of trajectory for novel view synthesis, /data/bing.han/drivestudio/utils/camera.py + - s_curve + #- three_key_poses + fps: 15 # frames per second for novel view rendering + vis_lidar: False # whether to visualize lidar points on ground truth images + vis_sky: False # whether to include "rgb_sky" and "rgb_sky_blend" in rendered keys + vis_error: False # whether to include "rgb_error_map" in rendered keys + +# ------------- logging ------------ # +logging: + vis_freq: 2000 # how often to visualize training stats + print_freq: 500 # how often to print training stats + saveckpt_freq: 15000 # how often to save checkpoints + save_seperate_video: True # whether to save seperate videos for each scene \ No newline at end of file diff --git a/configs/datasets/waymo/5cams.yaml b/configs/datasets/waymo/5cams.yaml index 8a71ece..abffca5 100644 --- a/configs/datasets/waymo/5cams.yaml +++ b/configs/datasets/waymo/5cams.yaml @@ -10,9 +10,9 @@ data: data_root: data/waymo/processed/training # data root for the dataset dataset: waymo - scene_idx: 3 # which scene to use, [0, 798] for waymo's training set and [0, 849] for nuscenes's train/val sets, inclusive + scene_idx: 16 # which scene to use, [0, 798] for waymo's training set and [0, 849] for nuscenes's train/val sets, inclusive start_timestep: 0 # which timestep to start from - end_timestep: 50 # which timestep to end at, -1 means the last timestep + end_timestep: -1 # which timestep to end at, -1 means the last timestep preload_device: cpu # choose from ["cpu", "cuda"], cache the data on this device. pixel_source: # image source and object annotations type: datasets.waymo.waymo_sourceloader.WaymoPixelSource diff --git a/tools/eval.py b/tools/eval.py index 3c903b5..a114481 100644 --- a/tools/eval.py +++ b/tools/eval.py @@ -125,10 +125,10 @@ def do_evaluation( logger.info(f"Image evaluation metrics saved to {full_metrics_file}") if args.render_video_postfix is None: - video_output_pth = f"{cfg.log_dir}/videos{post_fix}/full_set_{step}.mp4" + video_output_pth = f"{cfg.log_dir}/videos{post_fix}/full_set_{args.run_name}_{step}.mp4" else: video_output_pth = ( - f"{cfg.log_dir}/videos{post_fix}/full_set_{step}_{args.render_video_postfix}.mp4" + f"{cfg.log_dir}/videos{post_fix}/full_set_{args.run_name}_{step}_{args.render_video_postfix}.mp4" ) vis_frame_dict = save_videos( render_results, @@ -166,7 +166,7 @@ def do_evaluation( render_data = dataset.prepare_novel_view_render_data(traj) # Render and save video - save_path = os.path.join(video_output_dir, f"{traj_type}.mp4") + save_path = os.path.join(video_output_dir, f"{args.run_name}_{traj_type}.mp4") render_novel_views( trainer, render_data, save_path, fps=render_novel_cfg.get("fps", cfg.render.fps) @@ -212,18 +212,18 @@ def main(args): # define render keys render_keys = [ - "gt_rgbs", - "rgbs", - "Background_rgbs", - "RigidNodes_rgbs", - "DeformableNodes_rgbs", - "SMPLNodes_rgbs", - # "depths", + # "gt_rgbs", + # "rgbs", + # "Background_rgbs", + # "RigidNodes_rgbs", + # "DeformableNodes_rgbs", + # "SMPLNodes_rgbs", + "depths", # "Background_depths", # "RigidNodes_depths", # "DeformableNodes_depths", # "SMPLNodes_depths", - # "mask" + "mask" ] if cfg.render.vis_lidar: render_keys.insert(0, "lidar_on_images") @@ -255,7 +255,7 @@ def main(args): parser.add_argument("--resume_from", default=None, help="path to checkpoint to resume from", type=str, required=True) parser.add_argument("--render_video_postfix", type=str, default=None, help="an optional postfix for video") parser.add_argument("--save_catted_videos", type=bool, default=False, help="visualize lidar on image") - + parser.add_argument("--run_name", default="test", type=str, help="wandb run name, also used to enhance log_dir") # viewer parser.add_argument("--enable_viewer", action="store_true", help="enable viewer") parser.add_argument("--viewer_port", type=int, default=8080, help="viewer port") From eb46f45bc35bec4f6bbd68e71ff8da09a17b581d Mon Sep 17 00:00:00 2001 From: Eisoc Date: Fri, 20 Sep 2024 14:51:29 +0800 Subject: [PATCH 4/5] code-adapting changes --- configs/3DGS.yaml | 3 ++- configs/omnire.yaml | 3 ++- models/gaussians/vanilla.py | 6 +++--- tools/train.py | 23 ++++++++++++----------- utils/misc.py | 10 ++++------ 5 files changed, 23 insertions(+), 22 deletions(-) diff --git a/configs/3DGS.yaml b/configs/3DGS.yaml index 1acaa01..aca57dc 100644 --- a/configs/3DGS.yaml +++ b/configs/3DGS.yaml @@ -153,4 +153,5 @@ logging: vis_freq: 2000 # how often to visualize training stats print_freq: 500 # how often to print training stats saveckpt_freq: 15000 # how often to save checkpoints - save_seperate_video: True # whether to save seperate videos for each scene \ No newline at end of file + save_seperate_video: True # whether to save seperate videos for each scene + export_freq: 10000 # how often to export ply files \ No newline at end of file diff --git a/configs/omnire.yaml b/configs/omnire.yaml index 2c0647e..fa3256e 100644 --- a/configs/omnire.yaml +++ b/configs/omnire.yaml @@ -275,4 +275,5 @@ logging: vis_freq: 2000 # how often to visualize training stats print_freq: 500 # how often to print training stats saveckpt_freq: 15000 # how often to save checkpoints - save_seperate_video: True # whether to save seperate videos for each scene \ No newline at end of file + save_seperate_video: True # whether to save seperate videos for each scene + export_freq: 10000 # how often to export ply files \ No newline at end of file diff --git a/models/gaussians/vanilla.py b/models/gaussians/vanilla.py index e30b080..d94eeda 100644 --- a/models/gaussians/vanilla.py +++ b/models/gaussians/vanilla.py @@ -139,19 +139,19 @@ def get_scaling(self): else: return torch.exp(self._scales) @property - def pure_scaling(self): + def scales(self): return self._scales @property def get_opacity(self): return torch.sigmoid(self._opacities) @property - def pure_opacity(self): + def opacities(self): return self._opacities @property def get_quats(self): return self.quat_act(self._quats) @property - def pure_quats(self): + def quats(self): return self._quats def quat_act(self, x: torch.Tensor) -> torch.Tensor: diff --git a/tools/train.py b/tools/train.py index 53dcf08..bc59685 100644 --- a/tools/train.py +++ b/tools/train.py @@ -281,16 +281,6 @@ def main(args): # forward & backward outputs, gs_collection = trainer(image_infos, cam_infos) - # if step > 0 and step % args.save_ply == 0: - if step > 0 and step % 10000 == 0: - # for class_name, model in trainer.models.items(): - export_gaussians_to_ply(trainer.models["Background"], cfg.log_dir, f"{args.run_name}_{step}_Background.ply") - # pcd = trainer.models["Background"].export_gaussians_to_ply(alpha_thresh=0) - # file_name = f"{args.run_name}_{step}_Background.ply" - # file_path = f"{cfg.log_dir}/{file_name}" - # o3d.io.write_point_cloud(file_path, pcd) - print(f"{args.run_name}_{step}_Background.ply stored in {cfg.log_dir}") - trainer.update_visibility_filter() loss_dict = trainer.compute_losses( @@ -335,7 +325,18 @@ def main(args): save_only_model=True, is_final=step == trainer.num_iters, ) - + + do_save_ply = step > 0 and ( + (step % cfg.logging.export_freq == 0) or (step == trainer.num_iters) + ) and (args.resume_from is None) + if do_save_ply: + # for class_name, model in trainer.models.items(): + export_gaussians_to_ply(trainer.models["Background"], cfg.log_dir, f"{args.run_name}_{step}_Background.ply") + # pcd = trainer.models["Background"].export_gaussians_to_ply(alpha_thresh=0) + # file_name = f"{args.run_name}_{step}_Background.ply" + # file_path = f"{cfg.log_dir}/{file_name}" + # o3d.io.write_point_cloud(file_path, pcd) + #---------------------------------------------------------------------------- #------------------------ Cache Image Error --------------------------- if ( diff --git a/utils/misc.py b/utils/misc.py index f995a66..a00f774 100644 --- a/utils/misc.py +++ b/utils/misc.py @@ -134,15 +134,14 @@ def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None): shs = shs.reshape((colors.shape[0], -1)) # opacity = model.get_opacity[vis_mask].data.cpu().numpy() - opacity = model.pure_opacity[vis_mask].data.cpu().numpy() + opacity = model.opacities[vis_mask].data.cpu().numpy() # scales = model.get_scaling[vis_mask].data.cpu().numpy() - scales = model.pure_scaling[vis_mask].data.cpu().numpy() + scales = model.scales[vis_mask].data.cpu().numpy() # quats = model.get_quats[vis_mask].data.cpu().numpy() - quats = model.pure_quats[vis_mask].data.cpu().numpy() + quats = model.quats[vis_mask].data.cpu().numpy() num_points = positions.shape[0] with open(filename, 'wb') as f: - # 写入 PLY 文件头 f.write(b"ply\n") f.write(b"format binary_little_endian 1.0\n") f.write(b"comment Generated by opensplat\n") @@ -170,7 +169,6 @@ def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None): f.write(b"property float rot_3\n") f.write(b"end_header\n") - # 准备数据 data_list = [ positions[:, 0], positions[:, 1], positions[:, 2], normals[:, 0], normals[:, 1], normals[:, 2] @@ -193,7 +191,7 @@ def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None): data_list.append(quats[:, 2]) data_list.append(quats[:, 3]) - # 将数据堆叠并写入文件 + # stack and write to file data = np.column_stack(data_list).astype(np.float32) data.tofile(f) From a52f674a130be5d7e6cdc9fb3745b11c5f64604b Mon Sep 17 00:00:00 2001 From: Eisoc Date: Fri, 20 Sep 2024 19:17:53 +0800 Subject: [PATCH 5/5] adapting changes --- configs/omnire.yaml | 3 ++- tools/train.py | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/configs/omnire.yaml b/configs/omnire.yaml index 33dd7bc..a62b497 100644 --- a/configs/omnire.yaml +++ b/configs/omnire.yaml @@ -275,4 +275,5 @@ logging: vis_freq: 2000 # how often to visualize training stats print_freq: 500 # how often to print training stats saveckpt_freq: 15000 # how often to save checkpoints - save_seperate_video: True # whether to save seperate videos for each scene \ No newline at end of file + save_seperate_video: True # whether to save seperate videos for each scene + export_freq: 10000 # how often to export ply files \ No newline at end of file diff --git a/tools/train.py b/tools/train.py index f86411a..b7aa3f6 100644 --- a/tools/train.py +++ b/tools/train.py @@ -10,7 +10,7 @@ import torch from tools.eval import do_evaluation -from utils.misc import import_str +from utils.misc import import_str, export_gaussians_to_ply from utils.backup import backup_project from utils.logging import MetricLogger, setup_logging from models.video_utils import render_images, save_videos @@ -300,6 +300,16 @@ def main(args): is_final=step == trainer.num_iters, ) + do_save_ply = step > 0 and ( + (step % cfg.logging.export_freq == 0) or (step == trainer.num_iters) + ) and (args.resume_from is None) + if do_save_ply: + # for class_name, model in trainer.models.items(): + export_gaussians_to_ply(trainer.models["Background"], cfg.log_dir, f"{args.run_name}_{step}_Background.ply") + # pcd = trainer.models["Background"].export_gaussians_to_ply(alpha_thresh=0) + # file_name = f"{args.run_name}_{step}_Background.ply" + # file_path = f"{cfg.log_dir}/{file_name}" + # o3d.io.write_point_cloud(file_path, pcd) #---------------------------------------------------------------------------- #------------------------ Cache Image Error --------------------------- if (