ziyc · Eisoc · Sep 11, 2024 · Sep 18, 2024 · Sep 20, 2024 · Sep 20, 2024
diff --git a/configs/3DGS.yaml b/configs/3DGS.yaml
@@ -0,0 +1,157 @@
+seed: 0
+dataset: waymo/5cams
+
+# ------------- Trainer ------------ #
+trainer:
+  type: models.trainers.SingleTrainer
+  optim:
+    num_iters: 30000
+    use_grad_scaler: false
+    cache_buffer_freq: -1 # if > 0, use error based image sampler for training
+  render:
+    near_plane: 0.1 # near plane for rendering
+    far_plane: 10000000000.0 # far plane for rendering
+    antialiased: false # whether to use antialiasing for gaussian rendering, supported by gsplat kernel
+    packed: false # whether to use packed rendering, supported by gsplat kernel
+    absgrad: false # whether to use absolute gradient for rendering, supported by gsplat kernel
+    sparse_grad: false # whether to use sparse gradient for rendering, supported by gsplat kernel
+    batch_size: 1 # batch size for rendering, currently only support 1
+  losses:
+    rgb:
+      w: 0.8
+    ssim:
+      w: 0.2
+    mask:
+      w: 1.0
+      opacity_loss_type: bce # choose from [bce, safe_bce]
+      # bce: 二进制交叉熵（Binary Cross-Entropy)
+      # safe bce: 通过使用 clamp_min_ 和条件裁剪，此实现旨在提高数值稳定性，特别是当 x 接近 0 或 1 时，直接的 log(x) 或 log(1 - x) 可能导致数值问题。继承自pytorch的自动微分函数，计算成本大幅提高
+    depth:
+      w: 0.1 # weight of depth loss
+      inverse_depth: False # whether to use inverse depth, NOTE that when set to True, must normalize=True
+      # 针对深度图的平滑性损失计算，使用了kornia.losses.inverse_depth_smoothness_loss
+      normalize: False # whether to normalize depth loss
+      loss_type: l1 # choose from ["l1", "l2"]
+      # reduction: mean_on_hw # choose from ["mean_on_hit", "mean_on_hw", "sum", "none"]
+      # refer to pvg codebase
+      # 根据不同的规约选项对损失（loss）进行处理。它支持多种数据类型和规约方法，可以使用可选的掩码（mask）来加权或选择性地应用规约操作。
+      # mean: 如果没有掩码，直接返回所有元素的平均值；如果有掩码，返回加权的平均值（损失与掩码的元素乘积的平均值）。
+      # mean_in_mask: 只在掩码指定的位置计算平均值，实际上是计算掩码内的加权和除以掩码值的和（确保分母不为零，避免除零错误）。
+      # sum: 类似于平均值，但返回的是总和。
+      # max/min: 返回损失中的最大值或最小值。如果有掩码，只在掩码非零的位置考虑损失值。
+      # none: 返回原始损失，如果有掩码，返回损失与掩码的乘积。
+      # weight_decay: 1.0
+      # 调整L2对损失的贡献
+    # opacity_entropy:
+    #   w: 0.05
+    # inverse_depth_smoothness:
+    #   w: 0.001
+  res_schedule:
+    double_steps: 3000 # training starts at 1/d resolution, every n steps this is doubled
+    downscale_times: 3 # at the beginning, resolution is 1/2^d, where d is this number
+  gaussian_optim_general_cfg:
+    xyz:
+      lr: 1.6e-04
+      lr_final: 1.6e-06
+      scale_factor: scene_radius # str or float, if "scene_scale", scale the learning rate by the scene scale
+    sh_dc:
+      lr: 0.0025
+    sh_rest:
+      lr: 0.000125
+    opacity:
+      lr: 0.005
+    scaling:
+      lr: 0.005
+    rotation:
+      lr: 0.001
+  gaussian_ctrl_general_cfg:
+    warmup_steps: 500             # warmup steps for alpha
+    reset_alpha_interval: 3000    # reset alpha every n steps
+    refine_interval: 100          # refine gaussians every n steps
+    sh_degree_interval: 2000      # every n intervals turn on another sh degree
+    n_split_samples: 2            # number of samples to split gaussians into
+    # may differ in different models
+    reset_alpha_value: 0.01       # reset alpha to this value
+    densify_grad_thresh: 0.0005   # above this grad, gaussians are densified
+    densify_size_thresh: 0.003   # below this size, gaussians are *duplicated*, otherwise split
+    cull_alpha_thresh: 0.005       # threshold of opacity for culling gaussians
+    cull_scale_thresh: 0.5        # threshold of scale for culling gaussians
+    cull_screen_size: 0.15        # if a gaussian is more than this percent of screen space, cull it
+    split_screen_size: 0.05       # if a gaussian is more than this percent of screen space, split it
+    stop_screen_size_at: 4000     # stop culling/splitting at this step WRT screen size of gaussians
+    stop_split_at: 20000          # stop splitting at this step
+    sh_degree: 3                  # sh degree for gaussians
+
+# ------------- Model ------------ #
+model:
+  Background:
+    type: models.gaussians.VanillaGaussians
+    init:
+      from_lidar:
+        num_samples: 800_000
+        return_color: True
+      near_randoms: 100_000
+      far_randoms: 100_000
+    reg:
+      sharp_shape_reg:
+        w: 1.
+        step_interval: 10
+        max_gauss_ratio: 10.       # threshold of ratio of gaussian max to min scale before applying regularization loss from the PhysGaussian paper
+    # ctrl:
+    #   # pvg specific
+    #   cycle_length: 0.2
+    #   time_interval: 0.02
+    #   enable_temporal_smoothing: True
+    #   smooth_probability: 0.5
+    #   distribution_span: 1.5 # unit: frame interval
+    #   betas_init: 0.1
+    #   densify_until_num_points: 3_000_000
+    #   densify_t_grad_thresh: 0.002
+    #   densify_t_size_thresh: 0.01
+    #   no_time_split: true
+  Sky:
+    type: models.modules.EnvLight
+    params:
+      resolution: 1024
+    optim:
+      all:
+        lr: 0.01
+  Affine:
+    type: models.modules.AffineTransform
+    params:
+      embedding_dim: 4
+      base_mlp_layer_width: 64
+      pixel_affine: False
+    optim:
+      all:
+        lr: 1.0e-5
+        weight_decay: 1.0e-6
+  CamPose:
+    type: models.modules.CameraOptModule
+    optim:
+      all:
+        lr: 1.0e-5
+        weight_decay: 1.0e-6
+
+# ------------- render ------------ #
+render:
+  fps: 10 # frames per second for the main rendered output
+  render_full: True # whether to render full resolution videos
+  render_test: True # whether to render test set
+  render_novel: 
+    traj_types:
+      #- front_center_interp # type of trajectory for novel view synthesis, /data/bing.han/drivestudio/utils/camera.py
+      - s_curve
+      #- three_key_poses
+    fps: 15 # frames per second for novel view rendering
+  vis_lidar: False # whether to visualize lidar points on ground truth images
+  vis_sky: False # whether to include "rgb_sky" and "rgb_sky_blend" in rendered keys
+  vis_error: False # whether to include "rgb_error_map" in rendered keys
+
+# ------------- logging ------------ #
+logging:
+  vis_freq: 2000 # how often to visualize training stats
+  print_freq: 500 # how often to print training stats
+  saveckpt_freq: 15000 # how often to save checkpoints
+  save_seperate_video: True # whether to save seperate videos for each scene
+  export_freq: 10000 # how often to export ply files
diff --git a/configs/omnire.yaml b/configs/omnire.yaml
@@ -275,4 +275,5 @@ logging:
   vis_freq: 2000 # how often to visualize training stats
   print_freq: 500 # how often to print training stats
   saveckpt_freq: 15000 # how often to save checkpoints
-  save_seperate_video: True # whether to save seperate videos for each scene
+  save_seperate_video: True # whether to save seperate videos for each scene
+  export_freq: 10000 # how often to export ply files
diff --git a/models/gaussians/vanilla.py b/models/gaussians/vanilla.py
@@ -111,6 +111,9 @@ def colors(self):
         else:
             return torch.sigmoid(self._features_dc)
     @property
+    def means(self):
+        return self._means
+    @property
     def shs_0(self):
         return self._features_dc
     @property
@@ -136,11 +139,20 @@ def get_scaling(self):
             else:
                 return torch.exp(self._scales)
     @property
+    def scales(self):
+        return self._scales
+    @property
     def get_opacity(self):
         return torch.sigmoid(self._opacities)
     @property
+    def opacities(self):
+        return self._opacities
+    @property
     def get_quats(self):
         return self.quat_act(self._quats)
+    @property
+    def quats(self):
+        return self._quats
 
     def quat_act(self, x: torch.Tensor) -> torch.Tensor:
         return x / x.norm(dim=-1, keepdim=True)
@@ -462,7 +474,10 @@ def load_state_dict(self, state_dict: Dict, **kwargs) -> str:
         msg = super().load_state_dict(state_dict, **kwargs)
         return msg
 
+
+
     def export_gaussians_to_ply(self, alpha_thresh: float) -> Dict:
+        import open3d as o3d
         means = self._means
         direct_color = self.colors
 
@@ -471,4 +486,9 @@ def export_gaussians_to_ply(self, alpha_thresh: float) -> Dict:
         return {
             "positions": means[mask],
             "colors": direct_color[mask],
-        }
+        }
+        # pcd = o3d.geometry.PointCloud()
+        # pcd.points = o3d.utility.Vector3dVector(means[mask])
+        # pcd.colors = o3d.utility.Vector3dVector(direct_color[mask])
+        # return pcd
+
diff --git a/tools/train.py b/tools/train.py
@@ -10,7 +10,7 @@
 
 import torch
 from tools.eval import do_evaluation
-from utils.misc import import_str
+from utils.misc import import_str, export_gaussians_to_ply
 from utils.backup import backup_project
 from utils.logging import MetricLogger, setup_logging
 from models.video_utils import render_images, save_videos
@@ -300,6 +300,16 @@ def main(args):
                 is_final=step == trainer.num_iters,
             )
 
+        do_save_ply = step > 0 and (
+            (step % cfg.logging.export_freq == 0) or (step == trainer.num_iters)
+        ) and (args.resume_from is None)
+        if do_save_ply: 
+            # for class_name, model in trainer.models.items():
+            export_gaussians_to_ply(trainer.models["Background"], cfg.log_dir, f"{args.run_name}_{step}_Background.ply")
+            # pcd = trainer.models["Background"].export_gaussians_to_ply(alpha_thresh=0)
+            # file_name = f"{args.run_name}_{step}_Background.ply"
+            # file_path = f"{cfg.log_dir}/{file_name}"
+            # o3d.io.write_point_cloud(file_path, pcd)
         #----------------------------------------------------------------------------
         #------------------------    Cache Image Error    ---------------------------
         if (