From 5fcba3858baeaee8bf1d4186f19ac76e8d4f6f5a Mon Sep 17 00:00:00 2001
From: Eisoc <han_bing10@hotmail.com>
Date: Wed, 11 Sep 2024 14:37:11 +0800
Subject: [PATCH 1/5] init

---
 .gitmodules       |   6 +-
 configs/3DGS.yaml | 278 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 281 insertions(+), 3 deletions(-)
 create mode 100644 configs/3DGS.yaml

diff --git a/.gitmodules b/.gitmodules
index 66d0dc0..b91d861 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
-[submodule "third_party/Humans4D"]
-	path = third_party/Humans4D
-	url = https://github.com/shubham-goel/4D-Humans.git
+# [submodule "third_party/Humans4D"]
+# 	path = third_party/Humans4D
+# 	url = https://github.com/shubham-goel/4D-Humans.git
diff --git a/configs/3DGS.yaml b/configs/3DGS.yaml
new file mode 100644
index 0000000..33dd7bc
--- /dev/null
+++ b/configs/3DGS.yaml
@@ -0,0 +1,278 @@
+seed: 0
+dataset: waymo/3cams
+
+# ------------- Trainer ------------ #
+trainer:
+  type: models.trainers.MultiTrainer
+  optim:
+    num_iters: 30000
+    use_grad_scaler: false
+    cache_buffer_freq: -1 # if > 0, use error based image sampler for training
+  render:
+    near_plane: 0.1 # near plane for rendering
+    far_plane: 10000000000.0 # far plane for rendering
+    antialiased: false # whether to use antialiasing for gaussian rendering, supported by gsplat kernel
+    packed: false # whether to use packed rendering, supported by gsplat kernel
+    absgrad: true # whether to use absolute gradient for rendering, supported by gsplat kernel
+    sparse_grad: false # whether to use sparse gradient for rendering, supported by gsplat kernel
+    batch_size: 1 # batch size for rendering, currently only support 1
+  losses:
+    rgb:
+      w: 0.8
+    ssim:
+      w: 0.2
+    mask:
+      w: 0.05
+      opacity_loss_type: bce # choose from [bce, safe_bce]
+    depth:
+      w: 0.01 # weight of depth loss
+      inverse_depth: False # whether to use inverse depth, NOTE that when set to True, must normalize=True
+      normalize: False # whether to normalize depth loss
+      loss_type: l1 # choose from ["l1", "l2"]
+    affine:
+      w: 0.00001 # weight of affine regularization
+  res_schedule:
+    double_steps: 250 # training starts at 1/d resolution, every n steps this is doubled
+    downscale_times: 2 # at the beginning, resolution is 1/2^d, where d is this number
+  gaussian_optim_general_cfg:
+    xyz:
+      lr: 1.6e-04
+      lr_final: 1.6e-06
+      scale_factor: scene_radius # str or float, if "scene_scale", scale the learning rate by the scene scale
+    sh_dc:
+      lr: 0.0025
+    sh_rest:
+      lr: 0.000125
+    opacity:
+      lr: 0.05
+    scaling:
+      lr: 0.005
+    rotation:
+      lr: 0.001
+  gaussian_ctrl_general_cfg:
+    warmup_steps: 500             # warmup steps for alpha
+    reset_alpha_interval: 3000    # reset alpha every n steps
+    refine_interval: 100          # refine gaussians every n steps
+    sh_degree_interval: 1000      # every n intervals turn on another sh degree
+    n_split_samples: 2            # number of samples to split gaussians into
+    # may differ in different models
+    reset_alpha_value: 0.01       # reset alpha to this value
+    densify_grad_thresh: 0.0005   # above this grad, gaussians are densified
+    densify_size_thresh: 0.003    # below this size, gaussians are *duplicated*, otherwise split
+    cull_alpha_thresh: 0.005      # threshold of opacity for culling gaussians
+    cull_scale_thresh: 0.5        # threshold of scale for culling gaussians
+    cull_screen_size: 0.15        # if a gaussian is more than this percent of screen space, cull it
+    split_screen_size: 0.05       # if a gaussian is more than this percent of screen space, split it
+    stop_screen_size_at: 4000     # stop culling/splitting at this step WRT screen size of gaussians
+    stop_split_at: 15000          # stop splitting at this step
+    sh_degree: 3                  # sh degree for gaussians
+
+# ------------- Model ------------ #
+model:
+  Background:
+    type: models.gaussians.VanillaGaussians
+    init:
+      from_lidar:
+        num_samples: 800_000
+        return_color: True
+      near_randoms: 100_000
+      far_randoms: 100_000
+    reg:
+      sharp_shape_reg:
+        w: 1.
+        step_interval: 10
+        max_gauss_ratio: 10.       # threshold of ratio of gaussian max to min scale before applying regularization loss from the PhysGaussian paper
+  RigidNodes:
+    type: models.nodes.RigidNodes
+    init:
+      instance_max_pts: 5000 # max initial points for each instance
+      only_moving: true # only optimize moving instances
+      traj_length_thres: 1.0 # threshold of trajectory length for moving instances
+    ctrl:
+      cull_scale_thresh: 0.1
+      stop_screen_size_at: 30000
+      stop_split_at: 20000
+      cull_out_of_bound: true
+    reg:
+      sharp_shape_reg:
+        w: 1.
+        step_interval: 10
+        max_gauss_ratio: 10.
+      temporal_smooth_reg:
+        trans:
+          w: 0.01 # optimal value may vary
+          smooth_range: 5 # no ablation
+    optim:
+      ins_rotation:
+        lr: 0.00001
+        lr_final: 0.000005
+      ins_translation:
+        lr: 0.0005
+        lr_final: 0.0001
+  DeformableNodes:
+    type: models.nodes.DeformableNodes
+    init:
+      instance_max_pts: 5000
+      only_moving: true
+      traj_length_thres: 0.5
+    networks:
+      D: 8
+      W: 256
+      embed_dim: 16
+      x_multires: 10                # default 10
+      t_multires: 10                # default 10
+      deform_quat: True
+      deform_scale: False
+    ctrl:
+      reset_alpha_value: 0.1
+      cull_scale_thresh: 0.1
+      stop_screen_size_at: 30000
+      stop_split_at: 20000
+      sh_degree: 3
+      cull_out_of_bound: false
+      use_deformgs_for_nonrigid: True # default True
+      use_deformgs_after: 3000        # default 3000
+      stop_optimizing_canonical_xyz: True
+    reg:
+      sharp_shape_reg:
+        w: 1.
+        step_interval: 10
+        max_gauss_ratio: 10.
+      out_of_bound_loss:
+        w: 1.
+        stop_after: 30000
+      temporal_smooth_reg:
+        trans:
+          w: 0.01 # optimal value may vary
+          smooth_range: 5 # optimal value may vary
+    optim:
+      xyz:
+        lr: 1.6e-04
+        lr_final: 1.6e-06
+        scale_factor: 2.
+      embedding:
+        lr: 0.001
+        lr_final: 0.0001
+      deform_network:
+        lr: 1.6e-03
+        lr_final: 1.6e-04
+        scale_factor: 5.
+      ins_rotation:
+        lr: 0.00001
+        lr_final: 0.000005
+      ins_translation:
+        lr: 0.0005
+        lr_final: 0.0001
+  SMPLNodes: 
+    type: models.nodes.SMPLNodes
+    init:
+      only_moving: true
+      traj_length_thres: 0.5
+    reg:
+      sharp_shape_reg:
+        w: 1.
+        step_interval: 10
+        max_gauss_ratio: 10.
+      temporal_smooth_reg:
+        trans:
+          w: 0.01 # optimal value may vary
+          smooth_range: 5 # range of frames to consider for smoothing
+        # joint_smooth:
+        #   w: 0.01 # optimal value may vary
+      knn_reg:
+        # lambda_std_x: 0.0001
+        lambda_std_q: 0.001
+        lambda_std_s: 0.001
+        lambda_std_o: 0.001
+        lambda_std_shs_dc: 0.001
+        lambda_std_shs_rest: 0.001
+      max_s_square_reg:
+        w: 0.05
+      x_offset:
+        w: 0.2
+      voxel_deformer_reg:
+        lambda_std_w: 0.6
+        lambda_std_w_rest: 0.5
+        lambda_w_norm: 0.6
+        lambda_w_rest_norm: 0.3
+    optim:
+      xyz:
+        lr: 1.6e-04
+        lr_final: 1.6e-06
+        scale_factor: 3.0
+      rotation:
+        lr: 0.005
+      sh_dc:
+        lr: 0.005
+      sh_rest:
+        lr: 0.00025
+      ins_rotation:
+        lr: 0.00001
+        lr_final: 0.000005
+      ins_translation:
+        lr: 0.0005
+        lr_final: 0.0001
+      smpl_rotation:
+        lr: 0.00005
+        lr_final: 0.00001
+      w_dc_vox:
+        lr: 0.0001
+        lr_final: 0.00001
+        opt_after: 10000
+    ctrl:
+      sh_degree: 1
+      opacity_init_value: 0.99
+      ball_gaussians: false
+      constrain_xyz_offset: false
+      knn_update_interval: 100
+      knn_neighbors: 3
+      use_voxel_deformer: true
+      freeze_x: false
+      freeze_o: false
+      freeze_q: false
+      freeze_s: false
+      freeze_shs_dc: false
+      freeze_shs_rest: false
+  Sky:
+    type: models.modules.EnvLight
+    params:
+      resolution: 1024
+    optim:
+      all:
+        lr: 0.01
+  Affine:
+    type: models.modules.AffineTransform
+    params:
+      embedding_dim: 4
+      base_mlp_layer_width: 64
+      pixel_affine: False
+    optim:
+      all:
+        lr: 1.0e-5
+        weight_decay: 1.0e-6
+  CamPose:
+    type: models.modules.CameraOptModule
+    optim:
+      all:
+        lr: 1.0e-5
+        weight_decay: 1.0e-6
+
+# ------------- render ------------ #
+render:
+  fps: 10 # frames per second for the main rendered output
+  render_full: True # whether to render full resolution videos
+  render_test: True # whether to render test set
+  render_novel: 
+    traj_types:
+      - front_center_interp # type of trajectory for novel view synthesis
+    fps: 24 # frames per second for novel view rendering
+  vis_lidar: False # whether to visualize lidar points on ground truth images
+  vis_sky: False # whether to include "rgb_sky" and "rgb_sky_blend" in rendered keys
+  vis_error: False # whether to include "rgb_error_map" in rendered keys
+
+# ------------- logging ------------ #
+logging:
+  vis_freq: 2000 # how often to visualize training stats
+  print_freq: 500 # how often to print training stats
+  saveckpt_freq: 15000 # how often to save checkpoints
+  save_seperate_video: True # whether to save seperate videos for each scene
\ No newline at end of file

From ca1ca71ab6ca1f57f892d9933929cbc8589f4468 Mon Sep 17 00:00:00 2001
From: Eisoc <han_bing10@hotmail.com>
Date: Wed, 18 Sep 2024 11:18:00 +0800
Subject: [PATCH 2/5] export background vanilla gaussian to ply

---
 **Note**:                         |   0
 configs/3DGS.yaml                 | 214 +++++++-----------------------
 configs/datasets/waymo/5cams.yaml |  11 +-
 configs/omnire.yaml               |   2 +-
 datasets/driving_dataset.py       |   3 +-
 models/gaussians/vanilla.py       |  22 ++-
 models/trainers/base.py           |   3 +-
 models/trainers/single.py         |   6 +-
 models/video_utils.py             |  10 +-
 plyreader.py                      |  15 +++
 tools/eval.py                     |   7 +-
 tools/train.py                    |  51 ++++++-
 utils/camera.py                   |  10 ++
 utils/misc.py                     | 182 +++++++++++++++++++------
 14 files changed, 304 insertions(+), 232 deletions(-)
 create mode 100644 **Note**:
 create mode 100644 plyreader.py

diff --git a/**Note**: b/**Note**:
new file mode 100644
index 0000000..e69de29
diff --git a/configs/3DGS.yaml b/configs/3DGS.yaml
index 33dd7bc..8f152f9 100644
--- a/configs/3DGS.yaml
+++ b/configs/3DGS.yaml
@@ -1,9 +1,9 @@
 seed: 0
-dataset: waymo/3cams
+dataset: waymo/5cams
 
 # ------------- Trainer ------------ #
 trainer:
-  type: models.trainers.MultiTrainer
+  type: models.trainers.SingleTrainer
   optim:
     num_iters: 30000
     use_grad_scaler: false
@@ -13,7 +13,7 @@ trainer:
     far_plane: 10000000000.0 # far plane for rendering
     antialiased: false # whether to use antialiasing for gaussian rendering, supported by gsplat kernel
     packed: false # whether to use packed rendering, supported by gsplat kernel
-    absgrad: true # whether to use absolute gradient for rendering, supported by gsplat kernel
+    absgrad: false # whether to use absolute gradient for rendering, supported by gsplat kernel
     sparse_grad: false # whether to use sparse gradient for rendering, supported by gsplat kernel
     batch_size: 1 # batch size for rendering, currently only support 1
   losses:
@@ -22,18 +22,33 @@ trainer:
     ssim:
       w: 0.2
     mask:
-      w: 0.05
+      w: 1.0
       opacity_loss_type: bce # choose from [bce, safe_bce]
+      # bce: 二进制交叉熵（Binary Cross-Entropy)
+      # safe bce: 通过使用 clamp_min_ 和条件裁剪，此实现旨在提高数值稳定性，特别是当 x 接近 0 或 1 时，直接的 log(x) 或 log(1 - x) 可能导致数值问题。继承自pytorch的自动微分函数，计算成本大幅提高
     depth:
-      w: 0.01 # weight of depth loss
+      w: 0.1 # weight of depth loss
       inverse_depth: False # whether to use inverse depth, NOTE that when set to True, must normalize=True
+      # 针对深度图的平滑性损失计算，使用了kornia.losses.inverse_depth_smoothness_loss
       normalize: False # whether to normalize depth loss
       loss_type: l1 # choose from ["l1", "l2"]
-    affine:
-      w: 0.00001 # weight of affine regularization
+      # reduction: mean_on_hw # choose from ["mean_on_hit", "mean_on_hw", "sum", "none"]
+      # refer to pvg codebase
+      # 根据不同的规约选项对损失（loss）进行处理。它支持多种数据类型和规约方法，可以使用可选的掩码（mask）来加权或选择性地应用规约操作。
+      # mean: 如果没有掩码，直接返回所有元素的平均值；如果有掩码，返回加权的平均值（损失与掩码的元素乘积的平均值）。
+      # mean_in_mask: 只在掩码指定的位置计算平均值，实际上是计算掩码内的加权和除以掩码值的和（确保分母不为零，避免除零错误）。
+      # sum: 类似于平均值，但返回的是总和。
+      # max/min: 返回损失中的最大值或最小值。如果有掩码，只在掩码非零的位置考虑损失值。
+      # none: 返回原始损失，如果有掩码，返回损失与掩码的乘积。
+      # weight_decay: 1.0
+      # 调整L2对损失的贡献
+    # opacity_entropy:
+    #   w: 0.05
+    # inverse_depth_smoothness:
+    #   w: 0.001
   res_schedule:
-    double_steps: 250 # training starts at 1/d resolution, every n steps this is doubled
-    downscale_times: 2 # at the beginning, resolution is 1/2^d, where d is this number
+    double_steps: 3000 # training starts at 1/d resolution, every n steps this is doubled
+    downscale_times: 3 # at the beginning, resolution is 1/2^d, where d is this number
   gaussian_optim_general_cfg:
     xyz:
       lr: 1.6e-04
@@ -44,7 +59,7 @@ trainer:
     sh_rest:
       lr: 0.000125
     opacity:
-      lr: 0.05
+      lr: 0.005
     scaling:
       lr: 0.005
     rotation:
@@ -53,18 +68,18 @@ trainer:
     warmup_steps: 500             # warmup steps for alpha
     reset_alpha_interval: 3000    # reset alpha every n steps
     refine_interval: 100          # refine gaussians every n steps
-    sh_degree_interval: 1000      # every n intervals turn on another sh degree
+    sh_degree_interval: 2000      # every n intervals turn on another sh degree
     n_split_samples: 2            # number of samples to split gaussians into
     # may differ in different models
     reset_alpha_value: 0.01       # reset alpha to this value
-    densify_grad_thresh: 0.0005   # above this grad, gaussians are densified
-    densify_size_thresh: 0.003    # below this size, gaussians are *duplicated*, otherwise split
-    cull_alpha_thresh: 0.005      # threshold of opacity for culling gaussians
+    densify_grad_thresh: 0.0002   # above this grad, gaussians are densified
+    densify_size_thresh: 0.01   # below this size, gaussians are *duplicated*, otherwise split
+    cull_alpha_thresh: 0.005       # threshold of opacity for culling gaussians
     cull_scale_thresh: 0.5        # threshold of scale for culling gaussians
     cull_screen_size: 0.15        # if a gaussian is more than this percent of screen space, cull it
     split_screen_size: 0.05       # if a gaussian is more than this percent of screen space, split it
     stop_screen_size_at: 4000     # stop culling/splitting at this step WRT screen size of gaussians
-    stop_split_at: 15000          # stop splitting at this step
+    stop_split_at: 20000          # stop splitting at this step
     sh_degree: 3                  # sh degree for gaussians
 
 # ------------- Model ------------ #
@@ -82,157 +97,18 @@ model:
         w: 1.
         step_interval: 10
         max_gauss_ratio: 10.       # threshold of ratio of gaussian max to min scale before applying regularization loss from the PhysGaussian paper
-  RigidNodes:
-    type: models.nodes.RigidNodes
-    init:
-      instance_max_pts: 5000 # max initial points for each instance
-      only_moving: true # only optimize moving instances
-      traj_length_thres: 1.0 # threshold of trajectory length for moving instances
-    ctrl:
-      cull_scale_thresh: 0.1
-      stop_screen_size_at: 30000
-      stop_split_at: 20000
-      cull_out_of_bound: true
-    reg:
-      sharp_shape_reg:
-        w: 1.
-        step_interval: 10
-        max_gauss_ratio: 10.
-      temporal_smooth_reg:
-        trans:
-          w: 0.01 # optimal value may vary
-          smooth_range: 5 # no ablation
-    optim:
-      ins_rotation:
-        lr: 0.00001
-        lr_final: 0.000005
-      ins_translation:
-        lr: 0.0005
-        lr_final: 0.0001
-  DeformableNodes:
-    type: models.nodes.DeformableNodes
-    init:
-      instance_max_pts: 5000
-      only_moving: true
-      traj_length_thres: 0.5
-    networks:
-      D: 8
-      W: 256
-      embed_dim: 16
-      x_multires: 10                # default 10
-      t_multires: 10                # default 10
-      deform_quat: True
-      deform_scale: False
-    ctrl:
-      reset_alpha_value: 0.1
-      cull_scale_thresh: 0.1
-      stop_screen_size_at: 30000
-      stop_split_at: 20000
-      sh_degree: 3
-      cull_out_of_bound: false
-      use_deformgs_for_nonrigid: True # default True
-      use_deformgs_after: 3000        # default 3000
-      stop_optimizing_canonical_xyz: True
-    reg:
-      sharp_shape_reg:
-        w: 1.
-        step_interval: 10
-        max_gauss_ratio: 10.
-      out_of_bound_loss:
-        w: 1.
-        stop_after: 30000
-      temporal_smooth_reg:
-        trans:
-          w: 0.01 # optimal value may vary
-          smooth_range: 5 # optimal value may vary
-    optim:
-      xyz:
-        lr: 1.6e-04
-        lr_final: 1.6e-06
-        scale_factor: 2.
-      embedding:
-        lr: 0.001
-        lr_final: 0.0001
-      deform_network:
-        lr: 1.6e-03
-        lr_final: 1.6e-04
-        scale_factor: 5.
-      ins_rotation:
-        lr: 0.00001
-        lr_final: 0.000005
-      ins_translation:
-        lr: 0.0005
-        lr_final: 0.0001
-  SMPLNodes: 
-    type: models.nodes.SMPLNodes
-    init:
-      only_moving: true
-      traj_length_thres: 0.5
-    reg:
-      sharp_shape_reg:
-        w: 1.
-        step_interval: 10
-        max_gauss_ratio: 10.
-      temporal_smooth_reg:
-        trans:
-          w: 0.01 # optimal value may vary
-          smooth_range: 5 # range of frames to consider for smoothing
-        # joint_smooth:
-        #   w: 0.01 # optimal value may vary
-      knn_reg:
-        # lambda_std_x: 0.0001
-        lambda_std_q: 0.001
-        lambda_std_s: 0.001
-        lambda_std_o: 0.001
-        lambda_std_shs_dc: 0.001
-        lambda_std_shs_rest: 0.001
-      max_s_square_reg:
-        w: 0.05
-      x_offset:
-        w: 0.2
-      voxel_deformer_reg:
-        lambda_std_w: 0.6
-        lambda_std_w_rest: 0.5
-        lambda_w_norm: 0.6
-        lambda_w_rest_norm: 0.3
-    optim:
-      xyz:
-        lr: 1.6e-04
-        lr_final: 1.6e-06
-        scale_factor: 3.0
-      rotation:
-        lr: 0.005
-      sh_dc:
-        lr: 0.005
-      sh_rest:
-        lr: 0.00025
-      ins_rotation:
-        lr: 0.00001
-        lr_final: 0.000005
-      ins_translation:
-        lr: 0.0005
-        lr_final: 0.0001
-      smpl_rotation:
-        lr: 0.00005
-        lr_final: 0.00001
-      w_dc_vox:
-        lr: 0.0001
-        lr_final: 0.00001
-        opt_after: 10000
-    ctrl:
-      sh_degree: 1
-      opacity_init_value: 0.99
-      ball_gaussians: false
-      constrain_xyz_offset: false
-      knn_update_interval: 100
-      knn_neighbors: 3
-      use_voxel_deformer: true
-      freeze_x: false
-      freeze_o: false
-      freeze_q: false
-      freeze_s: false
-      freeze_shs_dc: false
-      freeze_shs_rest: false
+    # ctrl:
+    #   # pvg specific
+    #   cycle_length: 0.2
+    #   time_interval: 0.02
+    #   enable_temporal_smoothing: True
+    #   smooth_probability: 0.5
+    #   distribution_span: 1.5 # unit: frame interval
+    #   betas_init: 0.1
+    #   densify_until_num_points: 3_000_000
+    #   densify_t_grad_thresh: 0.002
+    #   densify_t_size_thresh: 0.01
+    #   no_time_split: true
   Sky:
     type: models.modules.EnvLight
     params:
@@ -264,8 +140,10 @@ render:
   render_test: True # whether to render test set
   render_novel: 
     traj_types:
-      - front_center_interp # type of trajectory for novel view synthesis
-    fps: 24 # frames per second for novel view rendering
+      #- front_center_interp # type of trajectory for novel view synthesis, /data/bing.han/drivestudio/utils/camera.py
+      - s_curve
+      #- three_key_poses
+    fps: 15 # frames per second for novel view rendering
   vis_lidar: False # whether to visualize lidar points on ground truth images
   vis_sky: False # whether to include "rgb_sky" and "rgb_sky_blend" in rendered keys
   vis_error: False # whether to include "rgb_error_map" in rendered keys
diff --git a/configs/datasets/waymo/5cams.yaml b/configs/datasets/waymo/5cams.yaml
index c791f63..8a71ece 100644
--- a/configs/datasets/waymo/5cams.yaml
+++ b/configs/datasets/waymo/5cams.yaml
@@ -10,21 +10,22 @@
 data:
   data_root: data/waymo/processed/training # data root for the dataset
   dataset: waymo
-  scene_idx: 0 # which scene to use, [0, 798] for waymo's training set and [0, 849] for nuscenes's train/val sets, inclusive
+  scene_idx: 3 # which scene to use, [0, 798] for waymo's training set and [0, 849] for nuscenes's train/val sets, inclusive
   start_timestep: 0 # which timestep to start from
-  end_timestep: -1 # which timestep to end at, -1 means the last timestep
+  end_timestep: 50 # which timestep to end at, -1 means the last timestep
   preload_device: cpu # choose from ["cpu", "cuda"], cache the data on this device.
   pixel_source: # image source and object annotations
     type: datasets.waymo.waymo_sourceloader.WaymoPixelSource
     cameras: [0, 1, 2, 3, 4] # which cameras to use
     downscale_when_loading: [2, 2, 2, 2, 2] # the size of the images to load
     downscale: 1 # downscale factor wrt to the downscale_when_loading
-    undistort: True # whether to undistort the images
+    undistort: False # whether to undistort the images
     test_image_stride: 0 # use every Nth timestep for the test set. if 0, use all images for training and none for testing
     load_sky_mask: True # whether to load sky mask
     load_dynamic_mask: True # whether to load dynamic mask
-    load_objects: True # whether to load object bounding boxes
-    load_smpl: True # whether to load SMPL template for pedestrians
+    load_objects: False # whether to load object bounding boxes
+    load_smpl: False # whether to load SMPL template for pedestrians 
+    # !!!!!!!忽略了人体姿态
     sampler: # error based image sampler
       buffer_downscale: 8 # downscale factor for the buffer wrt load_size
       buffer_ratio: 0.5 # the percentage of images sampled according to the error buffer
diff --git a/configs/omnire.yaml b/configs/omnire.yaml
index 33dd7bc..2c0647e 100644
--- a/configs/omnire.yaml
+++ b/configs/omnire.yaml
@@ -1,5 +1,5 @@
 seed: 0
-dataset: waymo/3cams
+dataset: waymo/5cams
 
 # ------------- Trainer ------------ #
 trainer:
diff --git a/datasets/driving_dataset.py b/datasets/driving_dataset.py
index 38b3e07..7a83398 100644
--- a/datasets/driving_dataset.py
+++ b/datasets/driving_dataset.py
@@ -732,7 +732,8 @@ def get_novel_render_traj(
         
         novel_trajs = {}
         for traj_type in traj_types:
-            novel_trajs[traj_type] = get_interp_novel_trajectories(
+            # 非常重要，生成新视角的路线
+            novel_trajs[traj_type] = get_interp_novel_trajectories( 
                 self.type,
                 self.scene_idx,
                 per_cam_poses,
diff --git a/models/gaussians/vanilla.py b/models/gaussians/vanilla.py
index d59ff14..e30b080 100644
--- a/models/gaussians/vanilla.py
+++ b/models/gaussians/vanilla.py
@@ -111,6 +111,9 @@ def colors(self):
         else:
             return torch.sigmoid(self._features_dc)
     @property
+    def means(self):
+        return self._means
+    @property
     def shs_0(self):
         return self._features_dc
     @property
@@ -136,11 +139,20 @@ def get_scaling(self):
             else:
                 return torch.exp(self._scales)
     @property
+    def pure_scaling(self):
+        return self._scales
+    @property
     def get_opacity(self):
         return torch.sigmoid(self._opacities)
     @property
+    def pure_opacity(self):
+        return self._opacities
+    @property
     def get_quats(self):
         return self.quat_act(self._quats)
+    @property
+    def pure_quats(self):
+        return self._quats
     
     def quat_act(self, x: torch.Tensor) -> torch.Tensor:
         return x / x.norm(dim=-1, keepdim=True)
@@ -462,7 +474,10 @@ def load_state_dict(self, state_dict: Dict, **kwargs) -> str:
         msg = super().load_state_dict(state_dict, **kwargs)
         return msg
     
+    
+
     def export_gaussians_to_ply(self, alpha_thresh: float) -> Dict:
+        import open3d as o3d
         means = self._means
         direct_color = self.colors
         
@@ -471,4 +486,9 @@ def export_gaussians_to_ply(self, alpha_thresh: float) -> Dict:
         return {
             "positions": means[mask],
             "colors": direct_color[mask],
-        }
\ No newline at end of file
+        }
+        # pcd = o3d.geometry.PointCloud()
+        # pcd.points = o3d.utility.Vector3dVector(means[mask])
+        # pcd.colors = o3d.utility.Vector3dVector(direct_color[mask])
+        # return pcd
+
diff --git a/models/trainers/base.py b/models/trainers/base.py
index 2f959bf..9145920 100644
--- a/models/trainers/base.py
+++ b/models/trainers/base.py
@@ -490,7 +490,8 @@ def forward(
         # render sky
         sky_model = self.models['Sky']
         outputs["rgb_sky"] = sky_model(image_infos)
-        outputs["rgb_sky_blend"] = outputs["rgb_sky"] * (1.0 - outputs["opacity"])
+        outputs["rgb_sky_blend"] = outputs["rgb_sky"] * (1.0 - outputs["opacity"]) 
+        # (1-alpha)代表透明度，高斯球越透明，则背景天空权重更大，更多的被填充进来
         
         # affine transformation
         outputs["rgb"] = self.affine_transformation(
diff --git a/models/trainers/single.py b/models/trainers/single.py
index 3e07274..a59a3b2 100644
--- a/models/trainers/single.py
+++ b/models/trainers/single.py
@@ -52,7 +52,8 @@ def _init_models(self):
                     device=self.device
                 )
                 
-            if class_name in self.misc_classes_keys:
+            if class_name in self.misc_classes_keys: 
+                # ['Sky', 'Affine', 'CamPose', 'CamPosePerturb']
                 model = import_str(model_cfg.type)(
                     class_name=class_name,
                     **model_cfg.get('params', {}),
@@ -169,6 +170,7 @@ def forward(
             cam=processed_cam,
             image_ids=image_infos["img_idx"].flatten()[0]
         )
+        # 重要！得到了GS球，合并了各种class
 
         # render gaussians
         outputs, _ = self.render_gaussians(
@@ -190,7 +192,7 @@ def forward(
             outputs["rgb_gaussians"] + outputs["rgb_sky"] * (1.0 - outputs["opacity"]), image_infos
         )
         
-        return outputs
+        return outputs,gs
         
     def compute_losses(
         self,
diff --git a/models/video_utils.py b/models/video_utils.py
index b3eae4b..45701e6 100644
--- a/models/video_utils.py
+++ b/models/video_utils.py
@@ -59,7 +59,7 @@ def render_images(
         vis_indices (Optional[List[int]], optional): Indices to visualize. Defaults to None.
     """
     trainer.set_eval()
-    render_results = render(
+    render_results,gs_collection = render(
         dataset,
         trainer=trainer,
         compute_metrics=compute_metrics,
@@ -81,7 +81,7 @@ def render_images(
         logger.info(f"\tVehicle-Only PSNR: {render_results['vehicle_psnr']:.4f}")
         logger.info(f"\tVehicle-Only SSIM: {render_results['vehicle_ssim']:.4f}")
 
-    return render_results
+    return render_results, gs_collection
 
 
 def render(
@@ -137,7 +137,7 @@ def render(
                 if isinstance(v, Tensor):
                     cam_infos[k] = v.cuda(non_blocking=True)
             # render the image
-            results = trainer(image_infos, cam_infos)
+            results, gs_collection = trainer(image_infos, cam_infos) # 调用之前定义好的trainer
             
             # ------------- clip rgb ------------- #
             for k, v in results.items():
@@ -378,7 +378,7 @@ def render(
         results_dict["SMPLNodes_opacities"] = SMPLNodes_opacities
     if len(Dynamic_opacities) > 0:
         results_dict["Dynamic_opacities"] = Dynamic_opacities
-    return results_dict
+    return results_dict,gs_collection
 
 
 def save_videos(
@@ -443,7 +443,7 @@ def render_novel_views(trainer, render_data: list, save_path: str, fps: int = 30
                 frame_data["image_infos"][key] = value.cuda(non_blocking=True)
             
             # Perform rendering
-            outputs = trainer(
+            outputs,_ = trainer(
                 image_infos=frame_data["image_infos"],
                 camera_infos=frame_data["cam_infos"],
                 novel_view=True
diff --git a/plyreader.py b/plyreader.py
new file mode 100644
index 0000000..2e2bd8d
--- /dev/null
+++ b/plyreader.py
@@ -0,0 +1,15 @@
+import numpy as np
+
+def verify_ply(filename):
+    with open(filename, 'rb') as f:
+        # 跳过头部
+        while True:
+            line = f.readline()
+            if line.strip() == b"end_header":
+                break
+        
+        # 读取并解析数据
+        data = np.fromfile(f, dtype=np.float32)  # 根据具体写入的数据类型选择dtype
+        print(data[:])
+
+verify_ply('/data/bing.han/Omni_Outputs/drivestudio/test/test_10000_Background.ply')
\ No newline at end of file
diff --git a/tools/eval.py b/tools/eval.py
index df1e9f1..3c903b5 100644
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -36,7 +36,7 @@ def do_evaluation(
     logger.info("Evaluating Pixels...")
     if dataset.test_image_set is not None and cfg.render.render_test:
         logger.info("Evaluating Test Set Pixels...")
-        render_results = render_images(
+        render_results, _ = render_images(
             trainer=trainer,
             dataset=dataset.test_image_set,
             compute_metrics=True,
@@ -93,7 +93,7 @@ def do_evaluation(
         
     if cfg.render.render_full:
         logger.info("Evaluating Full Set...")
-        render_results = render_images(
+        render_results,_ = render_images(
             trainer=trainer,
             dataset=dataset.full_image_set,
             compute_metrics=True,
@@ -147,9 +147,11 @@ def do_evaluation(
         del render_results, vis_frame_dict
         torch.cuda.empty_cache()
     
+    # 生成新视角图像
     render_novel_cfg = cfg.render.get("render_novel", None)
     if render_novel_cfg is not None:
         logger.info("Rendering novel views...")
+        # traj定义运动路线，包括相机c2w
         render_traj = dataset.get_novel_render_traj(
             traj_types=render_novel_cfg.traj_types,
             target_frames=render_novel_cfg.get("frames", dataset.frame_num),
@@ -160,6 +162,7 @@ def do_evaluation(
         
         for traj_type, traj in render_traj.items():
             # Prepare rendering data
+            # 传入traj，使用数据集自带的相机内参，
             render_data = dataset.prepare_novel_view_render_data(traj)
             
             # Render and save video
diff --git a/tools/train.py b/tools/train.py
index f86411a..53dcf08 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -10,12 +10,14 @@
 
 import torch
 from tools.eval import do_evaluation
-from utils.misc import import_str
+from utils.misc import import_str, export_gaussians_to_ply
 from utils.backup import backup_project
 from utils.logging import MetricLogger, setup_logging
 from models.video_utils import render_images, save_videos
 from datasets.driving_dataset import DrivingDataset
 
+import open3d as o3d
+
 logger = logging.getLogger()
 current_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
 
@@ -107,11 +109,16 @@ def main(args):
     cfg = setup(args)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
+    import time
+    start_time_train = time.time()
+    time_last = time.time()
+
     # build dataset
     dataset = DrivingDataset(data_cfg=cfg.data)
 
     # setup trainer
-    trainer = import_str(cfg.trainer.type)(
+    trainer = import_str(cfg.trainer.type)( 
+        # 通过cfg设置trainer类型，在models/trainers里面定义，包括父类base和两个子类MultiTrainer和sigle trainer（vanilla,pvg,deformable）
         **cfg.trainer,
         num_timesteps=dataset.num_img_timesteps,
         model_config=cfg.model,
@@ -185,6 +192,22 @@ def main(args):
     # )
 
     for step in metric_logger.log_every(all_iters, cfg.logging.print_freq):
+        # timing module
+        if step > 0 and step % 5000 == 0:
+            time_now = time.time()
+            total_train_time = time_now - start_time_train
+            last5k_time = time_now - time_last
+            time_last = time_now
+
+            minutes = int(last5k_time // 60)
+            seconds = int(last5k_time % 60)
+            print(f"Time of last 5k steps: {minutes} min {seconds} s")
+
+            minutes = int(total_train_time // 60)
+            seconds = int(total_train_time % 60)
+            print(f"Total train time until now: {minutes} min {seconds} s")
+
+
         #----------------------------------------------------------------------------
         #----------------------------     Validate     ------------------------------
         if step % cfg.logging.vis_freq == 0 and cfg.logging.vis_freq > 0:
@@ -197,7 +220,8 @@ def main(args):
                 dtype=int,
             )[step // cfg.logging.vis_freq]
             with torch.no_grad():
-                render_results = render_images(
+                render_results, gs_collection = render_images(
+                    # 入口，开始渲染
                     trainer=trainer,
                     dataset=dataset.full_image_set,
                     compute_metrics=True,
@@ -207,6 +231,7 @@ def main(args):
                         for i in range(dataset.pixel_source.num_cams)
                     ],
                 )
+
             if args.enable_wandb:
                 wandb.log(
                     {
@@ -254,7 +279,18 @@ def main(args):
                 cam_infos[k] = v.cuda(non_blocking=True)
         
         # forward & backward
-        outputs = trainer(image_infos, cam_infos)
+        outputs, gs_collection = trainer(image_infos, cam_infos)
+
+        # if step > 0 and step % args.save_ply == 0:
+        if step > 0 and step % 10000 == 0:
+            # for class_name, model in trainer.models.items():
+            export_gaussians_to_ply(trainer.models["Background"], cfg.log_dir, f"{args.run_name}_{step}_Background.ply")
+            # pcd = trainer.models["Background"].export_gaussians_to_ply(alpha_thresh=0)
+            # file_name = f"{args.run_name}_{step}_Background.ply"
+            # file_path = f"{cfg.log_dir}/{file_name}"
+            # o3d.io.write_point_cloud(file_path, pcd)
+            print(f"{args.run_name}_{step}_Background.ply stored in {cfg.log_dir}")
+
         trainer.update_visibility_filter()
 
         loss_dict = trainer.compute_losses(
@@ -312,7 +348,7 @@ def main(args):
                 dataset.pixel_source.update_downscale_factor(
                     1 / dataset.pixel_source.buffer_downscale
                 )
-                render_results = render_images(
+                render_results, _ = render_images(
                     trainer=trainer,
                     dataset=dataset.full_image_set,
                 )
@@ -363,7 +399,7 @@ def main(args):
     parser.add_argument("--enable_wandb", action="store_true", help="enable wandb logging")
     parser.add_argument("--entity", default="ziyc", type=str, help="wandb entity name")
     parser.add_argument("--project", default="drivestudio", type=str, help="wandb project name, also used to enhance log_dir")
-    parser.add_argument("--run_name", default="omnire", type=str, help="wandb run name, also used to enhance log_dir")
+    parser.add_argument("--run_name", default="test", type=str, help="wandb run name, also used to enhance log_dir")
     
     # viewer
     parser.add_argument("--enable_viewer", action="store_true", help="enable viewer")
@@ -371,6 +407,7 @@ def main(args):
     
     # misc
     parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER)
-    
+    parser.add_argument("--save_ply", default=2000, type=int, help="save ply every n steps", )
+
     args = parser.parse_args()
     final_step = main(args)
diff --git a/utils/camera.py b/utils/camera.py
index 0a31160..7ca4efb 100644
--- a/utils/camera.py
+++ b/utils/camera.py
@@ -64,6 +64,7 @@ def get_interp_novel_trajectories(
 ) -> torch.Tensor:
     original_frames = per_cam_poses[list(per_cam_poses.keys())[0]].shape[0]
     
+    # 在这里定义新轨迹！
     trajectory_generators = {
         "front_center_interp": front_center_interp,
         "s_curve": s_curve,
@@ -87,6 +88,11 @@ def s_curve(
     dataset_type: str, per_cam_poses: Dict[int, torch.Tensor], original_frames: int, target_frames: int
 ) -> torch.Tensor:
     """Create an S-shaped trajectory using the front three cameras."""
+    # ID 0选取第一帧。
+    # 从第二个摄像机（ID 1）选取第一四分之一位置的帧。
+    # 再次从前置中心摄像机（ID 0）选取中间帧（一半位置）。
+    # 从第三个摄像机（ID 2）选取第三四分之一位置的帧。
+    # 最后，再从前置中心摄像机（ID 0）选取最后一帧。
     assert all(cam in per_cam_poses.keys() for cam in [0, 1, 2]), "Front three cameras (IDs 0, 1, 2) are required for s_curve"
     key_poses = torch.cat([
         per_cam_poses[0][0:1],
@@ -103,6 +109,10 @@ def three_key_poses_trajectory(
     original_frames: int,
     target_frames: int
 ) -> torch.Tensor:
+    # 第一个关键姿态：前置中心摄像机的第一帧
+    # 中间关键姿态：随机选择摄像机1或2，获取该摄像机中间帧的姿态。
+    # 使用插值方法（球面线性插值，SLERP）在起始姿态和中间姿态之间插值，生成中间关键姿态。插值仅应用于旋转部分，平移部分直接使用中间帧的数据。
+    # 第三个关键姿态：前置中心摄像机的最后一帧。
     """
     Create a trajectory using three key poses:
     1. First frame of front center camera
diff --git a/utils/misc.py b/utils/misc.py
index 7d54513..f995a66 100644
--- a/utils/misc.py
+++ b/utils/misc.py
@@ -47,55 +47,159 @@ def export_points_to_ply(
     pcd.colors = o3d.utility.Vector3dVector(colors)
     o3d.io.write_point_cloud(save_path, pcd)
 
-def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None):
-    model.eval()
-    filename = os.path.join(path, name)
-    map_to_tensors = {}
-    
-    with torch.no_grad():
-        positions = model.means
-        if aabb is not None:
-            aabb = aabb.to(positions.device)
-            aabb_min, aabb_max = aabb[:3], aabb[3:]
-            aabb_center = (aabb_min + aabb_max) / 2
-            aabb_sacle_max = (aabb_max - aabb_min).max() / 2 * 1.1
-            vis_mask = torch.logical_and(positions >= aabb_min, positions < aabb_max).all(-1)
-        else:
-            aabb_center = positions.mean(0)
-            aabb_sacle_max = (positions - aabb_center).abs().max() * 1.1
-            vis_mask = torch.ones_like(positions[:, 0], dtype=torch.bool)
+# def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None):
+#     model.eval()
+#     filename = os.path.join(path, name)
+#     from collections import OrderedDict
+#     map_to_tensors = OrderedDict()
+
+#     with torch.no_grad():
+#         positions = model.means
+#         if aabb is not None:
+#             aabb = aabb.to(positions.device)
+#             aabb_min, aabb_max = aabb[:3], aabb[3:]
+#             aabb_center = (aabb_min + aabb_max) / 2
+#             aabb_sacle_max = (aabb_max - aabb_min).max() / 2 * 1.1
+#             vis_mask = torch.logical_and(positions >= aabb_min, positions < aabb_max).all(-1)
+#         else:
+#             aabb_center = positions.mean(0)
+#             aabb_sacle_max = (positions - aabb_center).abs().max() * 1.1
+#             vis_mask = torch.ones_like(positions[:, 0], dtype=torch.bool)
             
-        positions = ((positions[vis_mask] - aabb_center) / aabb_sacle_max).cpu().numpy()
-        map_to_tensors["positions"] = o3d.core.Tensor(positions, o3d.core.float32)
-        map_to_tensors["normals"] = o3d.core.Tensor(np.zeros_like(positions), o3d.core.float32)
+#         positions = ((positions[vis_mask] - aabb_center) / aabb_sacle_max).cpu().numpy()
+#         map_to_tensors["positions"] = o3d.core.Tensor(positions, o3d.core.float32)
+#         map_to_tensors["normals"] = o3d.core.Tensor(np.zeros_like(positions), o3d.core.float32)
 
-        colors = model.colors[vis_mask].data.cpu().numpy()
-        map_to_tensors["colors"] = (colors * 255).astype(np.uint8)
-        for i in range(colors.shape[1]):
-            map_to_tensors[f"f_dc_{i}"] = colors[:, i : i + 1]
+#         colors = model.colors[vis_mask].data.cpu().numpy()
+#         # map_to_tensors["colors"] = (colors * 255).astype(np.uint8)
+#         for i in range(colors.shape[1]):
+#             map_to_tensors[f"f_dc_{i}"] = colors[:, i : i + 1]
 
-        shs = model.shs_rest[vis_mask].data.cpu().numpy()
-        if model.config.sh_degree > 0:
-            shs = shs.reshape((colors.shape[0], -1, 1))
-            for i in range(shs.shape[-1]):
-                map_to_tensors[f"f_rest_{i}"] = shs[:, i]
+#         shs = model.shs_rest[vis_mask].data.cpu().numpy()
+#         # if model.sh_degree > 0:
+#         if 3 > 0:
+#             shs = shs.reshape((colors.shape[0], -1, 1))
+#             for i in range(shs.shape[-2]):
+#                 map_to_tensors[f"f_rest_{i}"] = shs[:, i]
 
-        map_to_tensors["opacity"] = model.opacities[vis_mask].data.cpu().numpy()
+#         opacity = model.get_opacity
+#         map_to_tensors["opacity"] = opacity[vis_mask].data.cpu().numpy()
 
-        scales = model.scales[vis_mask].data.cpu().unsqueeze(-1).numpy()
-        for i in range(3):
-            map_to_tensors[f"scale_{i}"] = scales[:, i]
+#         scales = model.get_scaling
+#         scales = scales[vis_mask].data.cpu().unsqueeze(-1).numpy()
+#         for i in range(3):
+#             map_to_tensors[f"scale_{i}"] = scales[:, i]
 
-        quats = model.quats[vis_mask].data.cpu().unsqueeze(-1).numpy()
+#         quats = model.get_quats
+#         quats = quats[vis_mask].data.cpu().unsqueeze(-1).numpy()
 
-        for i in range(4):
-            map_to_tensors[f"rot_{i}"] = quats[:, i]
+#         for i in range(4):
+#             map_to_tensors[f"rot_{i}"] = quats[:, i]
+
+#     # pcd = o3d.t.geometry.PointCloud(map_to_tensors)
+#     # o3d.t.io.write_point_cloud(str(filename), pcd)
+
+#     logger.info(f"Exported point cloud to {filename}, containing {vis_mask.sum().item()} points.")
+
+def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None):
+    model.eval()
+    filename = os.path.join(path, name)
+
+    with torch.no_grad():
+        positions = model.means 
+        # if aabb is not None:
+        #     aabb = aabb.to(positions.device)
+        #     aabb_min, aabb_max = aabb[:3], aabb[3:]
+        #     aabb_center = (aabb_min + aabb_max) / 2
+        #     aabb_scale_max = (aabb_max - aabb_min).max() / 2 * 1.1
+        #     vis_mask = torch.logical_and(positions >= aabb_min, positions < aabb_max).all(-1)
+        # else:
+        #     aabb_center = positions.mean(0)
+        #     aabb_scale_max = (positions - aabb_center).abs().max() * 1.1
+        #     vis_mask = torch.ones_like(positions[:, 0], dtype=torch.bool)
+
+        # positions = ((positions[vis_mask] - aabb_center) / aabb_scale_max).cpu().numpy()
+        vis_mask = torch.ones_like(positions[:, 0], dtype=torch.bool)
+        positions = positions[vis_mask].cpu().numpy()
+        positions = positions
+
+        normals = np.zeros_like(positions)  # normals: 0
+
+        # colors = model.colors[vis_mask].data.cpu().numpy()
+        colors = model.shs_0[vis_mask].data.cpu().numpy()
+        for i in range(colors.shape[1]):
+            pass 
+
+        shs = model.shs_rest[vis_mask].data.cpu().numpy() 
+        shs = shs.reshape((colors.shape[0], -1))
+
+        # opacity = model.get_opacity[vis_mask].data.cpu().numpy() 
+        opacity = model.pure_opacity[vis_mask].data.cpu().numpy() 
+        # scales = model.get_scaling[vis_mask].data.cpu().numpy()
+        scales = model.pure_scaling[vis_mask].data.cpu().numpy()
+        # quats = model.get_quats[vis_mask].data.cpu().numpy()
+        quats = model.pure_quats[vis_mask].data.cpu().numpy()
+        num_points = positions.shape[0]
+
+        with open(filename, 'wb') as f:
+            # 写入 PLY 文件头
+            f.write(b"ply\n")
+            f.write(b"format binary_little_endian 1.0\n")
+            f.write(b"comment Generated by opensplat\n")
+            f.write(f"element vertex {num_points}\n".encode())
+            f.write(b"property float x\n")
+            f.write(b"property float y\n")
+            f.write(b"property float z\n")
+            f.write(b"property float nx\n")
+            f.write(b"property float ny\n")
+            f.write(b"property float nz\n")
+
+            for i in range(colors.shape[1]):
+                f.write(f"property float f_dc_{i}\n".encode())
+
+            for i in range(shs.shape[1]):
+                f.write(f"property float f_rest_{i}\n".encode())
+
+            f.write(b"property float opacity\n")
+            f.write(b"property float scale_0\n")
+            f.write(b"property float scale_1\n")
+            f.write(b"property float scale_2\n")
+            f.write(b"property float rot_0\n")
+            f.write(b"property float rot_1\n")
+            f.write(b"property float rot_2\n")
+            f.write(b"property float rot_3\n")
+            f.write(b"end_header\n")
+
+            # 准备数据
+            data_list = [
+                positions[:, 0], positions[:, 1], positions[:, 2],
+                normals[:, 0], normals[:, 1], normals[:, 2]
+            ]
+
+            for i in range(colors.shape[1]):
+                data_list.append(colors[:, i])
+
+            for i in range(shs.shape[1]):
+                data_list.append(shs[:, i])
+
+            data_list.append(opacity)
+
+            data_list.append(scales[:, 0])
+            data_list.append(scales[:, 1])
+            data_list.append(scales[:, 2])
+
+            data_list.append(quats[:, 0])
+            data_list.append(quats[:, 1])
+            data_list.append(quats[:, 2])
+            data_list.append(quats[:, 3])
+
+            # 将数据堆叠并写入文件
+            data = np.column_stack(data_list).astype(np.float32)
+            data.tofile(f)
 
-    pcd = o3d.t.geometry.PointCloud(map_to_tensors)
-    o3d.t.io.write_point_cloud(str(filename), pcd)
-    
     logger.info(f"Exported point cloud to {filename}, containing {vis_mask.sum().item()} points.")
 
+
 def is_enabled() -> bool:
     """
     Returns:

From 1a8e290b74ef675a57ad372b33d16d804bc3a685 Mon Sep 17 00:00:00 2001
From: Eisoc <han_bing10@hotmail.com>
Date: Fri, 20 Sep 2024 10:25:01 +0800
Subject: [PATCH 3/5] cfg changes

---
 configs/3DGS.yaml                 |   4 +-
 configs/3DGS_test.yaml            | 156 ++++++++++++++++++++++++++++++
 configs/datasets/waymo/5cams.yaml |   4 +-
 tools/eval.py                     |  24 ++---
 4 files changed, 172 insertions(+), 16 deletions(-)
 create mode 100644 configs/3DGS_test.yaml

diff --git a/configs/3DGS.yaml b/configs/3DGS.yaml
index 8f152f9..1acaa01 100644
--- a/configs/3DGS.yaml
+++ b/configs/3DGS.yaml
@@ -72,8 +72,8 @@ trainer:
     n_split_samples: 2            # number of samples to split gaussians into
     # may differ in different models
     reset_alpha_value: 0.01       # reset alpha to this value
-    densify_grad_thresh: 0.0002   # above this grad, gaussians are densified
-    densify_size_thresh: 0.01   # below this size, gaussians are *duplicated*, otherwise split
+    densify_grad_thresh: 0.0005   # above this grad, gaussians are densified
+    densify_size_thresh: 0.003   # below this size, gaussians are *duplicated*, otherwise split
     cull_alpha_thresh: 0.005       # threshold of opacity for culling gaussians
     cull_scale_thresh: 0.5        # threshold of scale for culling gaussians
     cull_screen_size: 0.15        # if a gaussian is more than this percent of screen space, cull it
diff --git a/configs/3DGS_test.yaml b/configs/3DGS_test.yaml
new file mode 100644
index 0000000..1acaa01
--- /dev/null
+++ b/configs/3DGS_test.yaml
@@ -0,0 +1,156 @@
+seed: 0
+dataset: waymo/5cams
+
+# ------------- Trainer ------------ #
+trainer:
+  type: models.trainers.SingleTrainer
+  optim:
+    num_iters: 30000
+    use_grad_scaler: false
+    cache_buffer_freq: -1 # if > 0, use error based image sampler for training
+  render:
+    near_plane: 0.1 # near plane for rendering
+    far_plane: 10000000000.0 # far plane for rendering
+    antialiased: false # whether to use antialiasing for gaussian rendering, supported by gsplat kernel
+    packed: false # whether to use packed rendering, supported by gsplat kernel
+    absgrad: false # whether to use absolute gradient for rendering, supported by gsplat kernel
+    sparse_grad: false # whether to use sparse gradient for rendering, supported by gsplat kernel
+    batch_size: 1 # batch size for rendering, currently only support 1
+  losses:
+    rgb:
+      w: 0.8
+    ssim:
+      w: 0.2
+    mask:
+      w: 1.0
+      opacity_loss_type: bce # choose from [bce, safe_bce]
+      # bce: 二进制交叉熵（Binary Cross-Entropy)
+      # safe bce: 通过使用 clamp_min_ 和条件裁剪，此实现旨在提高数值稳定性，特别是当 x 接近 0 或 1 时，直接的 log(x) 或 log(1 - x) 可能导致数值问题。继承自pytorch的自动微分函数，计算成本大幅提高
+    depth:
+      w: 0.1 # weight of depth loss
+      inverse_depth: False # whether to use inverse depth, NOTE that when set to True, must normalize=True
+      # 针对深度图的平滑性损失计算，使用了kornia.losses.inverse_depth_smoothness_loss
+      normalize: False # whether to normalize depth loss
+      loss_type: l1 # choose from ["l1", "l2"]
+      # reduction: mean_on_hw # choose from ["mean_on_hit", "mean_on_hw", "sum", "none"]
+      # refer to pvg codebase
+      # 根据不同的规约选项对损失（loss）进行处理。它支持多种数据类型和规约方法，可以使用可选的掩码（mask）来加权或选择性地应用规约操作。
+      # mean: 如果没有掩码，直接返回所有元素的平均值；如果有掩码，返回加权的平均值（损失与掩码的元素乘积的平均值）。
+      # mean_in_mask: 只在掩码指定的位置计算平均值，实际上是计算掩码内的加权和除以掩码值的和（确保分母不为零，避免除零错误）。
+      # sum: 类似于平均值，但返回的是总和。
+      # max/min: 返回损失中的最大值或最小值。如果有掩码，只在掩码非零的位置考虑损失值。
+      # none: 返回原始损失，如果有掩码，返回损失与掩码的乘积。
+      # weight_decay: 1.0
+      # 调整L2对损失的贡献
+    # opacity_entropy:
+    #   w: 0.05
+    # inverse_depth_smoothness:
+    #   w: 0.001
+  res_schedule:
+    double_steps: 3000 # training starts at 1/d resolution, every n steps this is doubled
+    downscale_times: 3 # at the beginning, resolution is 1/2^d, where d is this number
+  gaussian_optim_general_cfg:
+    xyz:
+      lr: 1.6e-04
+      lr_final: 1.6e-06
+      scale_factor: scene_radius # str or float, if "scene_scale", scale the learning rate by the scene scale
+    sh_dc:
+      lr: 0.0025
+    sh_rest:
+      lr: 0.000125
+    opacity:
+      lr: 0.005
+    scaling:
+      lr: 0.005
+    rotation:
+      lr: 0.001
+  gaussian_ctrl_general_cfg:
+    warmup_steps: 500             # warmup steps for alpha
+    reset_alpha_interval: 3000    # reset alpha every n steps
+    refine_interval: 100          # refine gaussians every n steps
+    sh_degree_interval: 2000      # every n intervals turn on another sh degree
+    n_split_samples: 2            # number of samples to split gaussians into
+    # may differ in different models
+    reset_alpha_value: 0.01       # reset alpha to this value
+    densify_grad_thresh: 0.0005   # above this grad, gaussians are densified
+    densify_size_thresh: 0.003   # below this size, gaussians are *duplicated*, otherwise split
+    cull_alpha_thresh: 0.005       # threshold of opacity for culling gaussians
+    cull_scale_thresh: 0.5        # threshold of scale for culling gaussians
+    cull_screen_size: 0.15        # if a gaussian is more than this percent of screen space, cull it
+    split_screen_size: 0.05       # if a gaussian is more than this percent of screen space, split it
+    stop_screen_size_at: 4000     # stop culling/splitting at this step WRT screen size of gaussians
+    stop_split_at: 20000          # stop splitting at this step
+    sh_degree: 3                  # sh degree for gaussians
+
+# ------------- Model ------------ #
+model:
+  Background:
+    type: models.gaussians.VanillaGaussians
+    init:
+      from_lidar:
+        num_samples: 800_000
+        return_color: True
+      near_randoms: 100_000
+      far_randoms: 100_000
+    reg:
+      sharp_shape_reg:
+        w: 1.
+        step_interval: 10
+        max_gauss_ratio: 10.       # threshold of ratio of gaussian max to min scale before applying regularization loss from the PhysGaussian paper
+    # ctrl:
+    #   # pvg specific
+    #   cycle_length: 0.2
+    #   time_interval: 0.02
+    #   enable_temporal_smoothing: True
+    #   smooth_probability: 0.5
+    #   distribution_span: 1.5 # unit: frame interval
+    #   betas_init: 0.1
+    #   densify_until_num_points: 3_000_000
+    #   densify_t_grad_thresh: 0.002
+    #   densify_t_size_thresh: 0.01
+    #   no_time_split: true
+  Sky:
+    type: models.modules.EnvLight
+    params:
+      resolution: 1024
+    optim:
+      all:
+        lr: 0.01
+  Affine:
+    type: models.modules.AffineTransform
+    params:
+      embedding_dim: 4
+      base_mlp_layer_width: 64
+      pixel_affine: False
+    optim:
+      all:
+        lr: 1.0e-5
+        weight_decay: 1.0e-6
+  CamPose:
+    type: models.modules.CameraOptModule
+    optim:
+      all:
+        lr: 1.0e-5
+        weight_decay: 1.0e-6
+
+# ------------- render ------------ #
+render:
+  fps: 10 # frames per second for the main rendered output
+  render_full: True # whether to render full resolution videos
+  render_test: True # whether to render test set
+  render_novel: 
+    traj_types:
+      #- front_center_interp # type of trajectory for novel view synthesis, /data/bing.han/drivestudio/utils/camera.py
+      - s_curve
+      #- three_key_poses
+    fps: 15 # frames per second for novel view rendering
+  vis_lidar: False # whether to visualize lidar points on ground truth images
+  vis_sky: False # whether to include "rgb_sky" and "rgb_sky_blend" in rendered keys
+  vis_error: False # whether to include "rgb_error_map" in rendered keys
+
+# ------------- logging ------------ #
+logging:
+  vis_freq: 2000 # how often to visualize training stats
+  print_freq: 500 # how often to print training stats
+  saveckpt_freq: 15000 # how often to save checkpoints
+  save_seperate_video: True # whether to save seperate videos for each scene
\ No newline at end of file
diff --git a/configs/datasets/waymo/5cams.yaml b/configs/datasets/waymo/5cams.yaml
index 8a71ece..abffca5 100644
--- a/configs/datasets/waymo/5cams.yaml
+++ b/configs/datasets/waymo/5cams.yaml
@@ -10,9 +10,9 @@
 data:
   data_root: data/waymo/processed/training # data root for the dataset
   dataset: waymo
-  scene_idx: 3 # which scene to use, [0, 798] for waymo's training set and [0, 849] for nuscenes's train/val sets, inclusive
+  scene_idx: 16 # which scene to use, [0, 798] for waymo's training set and [0, 849] for nuscenes's train/val sets, inclusive
   start_timestep: 0 # which timestep to start from
-  end_timestep: 50 # which timestep to end at, -1 means the last timestep
+  end_timestep: -1 # which timestep to end at, -1 means the last timestep
   preload_device: cpu # choose from ["cpu", "cuda"], cache the data on this device.
   pixel_source: # image source and object annotations
     type: datasets.waymo.waymo_sourceloader.WaymoPixelSource
diff --git a/tools/eval.py b/tools/eval.py
index 3c903b5..a114481 100644
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -125,10 +125,10 @@ def do_evaluation(
             logger.info(f"Image evaluation metrics saved to {full_metrics_file}")
 
         if args.render_video_postfix is None:
-            video_output_pth = f"{cfg.log_dir}/videos{post_fix}/full_set_{step}.mp4"
+            video_output_pth = f"{cfg.log_dir}/videos{post_fix}/full_set_{args.run_name}_{step}.mp4"
         else:
             video_output_pth = (
-                f"{cfg.log_dir}/videos{post_fix}/full_set_{step}_{args.render_video_postfix}.mp4"
+                f"{cfg.log_dir}/videos{post_fix}/full_set_{args.run_name}_{step}_{args.render_video_postfix}.mp4"
             )
         vis_frame_dict = save_videos(
             render_results,
@@ -166,7 +166,7 @@ def do_evaluation(
             render_data = dataset.prepare_novel_view_render_data(traj)
             
             # Render and save video
-            save_path = os.path.join(video_output_dir, f"{traj_type}.mp4")
+            save_path = os.path.join(video_output_dir, f"{args.run_name}_{traj_type}.mp4")
             render_novel_views(
                 trainer, render_data, save_path,
                 fps=render_novel_cfg.get("fps", cfg.render.fps)
@@ -212,18 +212,18 @@ def main(args):
     
     # define render keys
     render_keys = [
-        "gt_rgbs",
-        "rgbs",
-        "Background_rgbs",
-        "RigidNodes_rgbs",
-        "DeformableNodes_rgbs",
-        "SMPLNodes_rgbs",
-        # "depths",
+        # "gt_rgbs",
+        # "rgbs",
+        # "Background_rgbs",
+        # "RigidNodes_rgbs",
+        # "DeformableNodes_rgbs",
+        # "SMPLNodes_rgbs",
+        "depths",
         # "Background_depths",
         # "RigidNodes_depths",
         # "DeformableNodes_depths",
         # "SMPLNodes_depths",
-        # "mask"
+        "mask"
     ]
     if cfg.render.vis_lidar:
         render_keys.insert(0, "lidar_on_images")
@@ -255,7 +255,7 @@ def main(args):
     parser.add_argument("--resume_from", default=None, help="path to checkpoint to resume from", type=str, required=True)
     parser.add_argument("--render_video_postfix", type=str, default=None, help="an optional postfix for video")    
     parser.add_argument("--save_catted_videos", type=bool, default=False, help="visualize lidar on image")
-    
+    parser.add_argument("--run_name", default="test", type=str, help="wandb run name, also used to enhance log_dir")
     # viewer
     parser.add_argument("--enable_viewer", action="store_true", help="enable viewer")
     parser.add_argument("--viewer_port", type=int, default=8080, help="viewer port")

From eb46f45bc35bec4f6bbd68e71ff8da09a17b581d Mon Sep 17 00:00:00 2001
From: Eisoc <han_bing10@hotmail.com>
Date: Fri, 20 Sep 2024 14:51:29 +0800
Subject: [PATCH 4/5] code-adapting changes

---
 configs/3DGS.yaml           |  3 ++-
 configs/omnire.yaml         |  3 ++-
 models/gaussians/vanilla.py |  6 +++---
 tools/train.py              | 23 ++++++++++++-----------
 utils/misc.py               | 10 ++++------
 5 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/configs/3DGS.yaml b/configs/3DGS.yaml
index 1acaa01..aca57dc 100644
--- a/configs/3DGS.yaml
+++ b/configs/3DGS.yaml
@@ -153,4 +153,5 @@ logging:
   vis_freq: 2000 # how often to visualize training stats
   print_freq: 500 # how often to print training stats
   saveckpt_freq: 15000 # how often to save checkpoints
-  save_seperate_video: True # whether to save seperate videos for each scene
\ No newline at end of file
+  save_seperate_video: True # whether to save seperate videos for each scene
+  export_freq: 10000 # how often to export ply files
\ No newline at end of file
diff --git a/configs/omnire.yaml b/configs/omnire.yaml
index 2c0647e..fa3256e 100644
--- a/configs/omnire.yaml
+++ b/configs/omnire.yaml
@@ -275,4 +275,5 @@ logging:
   vis_freq: 2000 # how often to visualize training stats
   print_freq: 500 # how often to print training stats
   saveckpt_freq: 15000 # how often to save checkpoints
-  save_seperate_video: True # whether to save seperate videos for each scene
\ No newline at end of file
+  save_seperate_video: True # whether to save seperate videos for each scene
+  export_freq: 10000 # how often to export ply files
\ No newline at end of file
diff --git a/models/gaussians/vanilla.py b/models/gaussians/vanilla.py
index e30b080..d94eeda 100644
--- a/models/gaussians/vanilla.py
+++ b/models/gaussians/vanilla.py
@@ -139,19 +139,19 @@ def get_scaling(self):
             else:
                 return torch.exp(self._scales)
     @property
-    def pure_scaling(self):
+    def scales(self):
         return self._scales
     @property
     def get_opacity(self):
         return torch.sigmoid(self._opacities)
     @property
-    def pure_opacity(self):
+    def opacities(self):
         return self._opacities
     @property
     def get_quats(self):
         return self.quat_act(self._quats)
     @property
-    def pure_quats(self):
+    def quats(self):
         return self._quats
     
     def quat_act(self, x: torch.Tensor) -> torch.Tensor:
diff --git a/tools/train.py b/tools/train.py
index 53dcf08..bc59685 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -281,16 +281,6 @@ def main(args):
         # forward & backward
         outputs, gs_collection = trainer(image_infos, cam_infos)
 
-        # if step > 0 and step % args.save_ply == 0:
-        if step > 0 and step % 10000 == 0:
-            # for class_name, model in trainer.models.items():
-            export_gaussians_to_ply(trainer.models["Background"], cfg.log_dir, f"{args.run_name}_{step}_Background.ply")
-            # pcd = trainer.models["Background"].export_gaussians_to_ply(alpha_thresh=0)
-            # file_name = f"{args.run_name}_{step}_Background.ply"
-            # file_path = f"{cfg.log_dir}/{file_name}"
-            # o3d.io.write_point_cloud(file_path, pcd)
-            print(f"{args.run_name}_{step}_Background.ply stored in {cfg.log_dir}")
-
         trainer.update_visibility_filter()
 
         loss_dict = trainer.compute_losses(
@@ -335,7 +325,18 @@ def main(args):
                 save_only_model=True,
                 is_final=step == trainer.num_iters,
             )
-        
+
+        do_save_ply = step > 0 and (
+            (step % cfg.logging.export_freq == 0) or (step == trainer.num_iters)
+        ) and (args.resume_from is None)
+        if do_save_ply: 
+            # for class_name, model in trainer.models.items():
+            export_gaussians_to_ply(trainer.models["Background"], cfg.log_dir, f"{args.run_name}_{step}_Background.ply")
+            # pcd = trainer.models["Background"].export_gaussians_to_ply(alpha_thresh=0)
+            # file_name = f"{args.run_name}_{step}_Background.ply"
+            # file_path = f"{cfg.log_dir}/{file_name}"
+            # o3d.io.write_point_cloud(file_path, pcd)
+
         #----------------------------------------------------------------------------
         #------------------------    Cache Image Error    ---------------------------
         if (
diff --git a/utils/misc.py b/utils/misc.py
index f995a66..a00f774 100644
--- a/utils/misc.py
+++ b/utils/misc.py
@@ -134,15 +134,14 @@ def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None):
         shs = shs.reshape((colors.shape[0], -1))
 
         # opacity = model.get_opacity[vis_mask].data.cpu().numpy() 
-        opacity = model.pure_opacity[vis_mask].data.cpu().numpy() 
+        opacity = model.opacities[vis_mask].data.cpu().numpy() 
         # scales = model.get_scaling[vis_mask].data.cpu().numpy()
-        scales = model.pure_scaling[vis_mask].data.cpu().numpy()
+        scales = model.scales[vis_mask].data.cpu().numpy()
         # quats = model.get_quats[vis_mask].data.cpu().numpy()
-        quats = model.pure_quats[vis_mask].data.cpu().numpy()
+        quats = model.quats[vis_mask].data.cpu().numpy()
         num_points = positions.shape[0]
 
         with open(filename, 'wb') as f:
-            # 写入 PLY 文件头
             f.write(b"ply\n")
             f.write(b"format binary_little_endian 1.0\n")
             f.write(b"comment Generated by opensplat\n")
@@ -170,7 +169,6 @@ def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None):
             f.write(b"property float rot_3\n")
             f.write(b"end_header\n")
 
-            # 准备数据
             data_list = [
                 positions[:, 0], positions[:, 1], positions[:, 2],
                 normals[:, 0], normals[:, 1], normals[:, 2]
@@ -193,7 +191,7 @@ def export_gaussians_to_ply(model, path, name='point_cloud.ply', aabb=None):
             data_list.append(quats[:, 2])
             data_list.append(quats[:, 3])
 
-            # 将数据堆叠并写入文件
+            # stack and write to file
             data = np.column_stack(data_list).astype(np.float32)
             data.tofile(f)
 

From a52f674a130be5d7e6cdc9fb3745b11c5f64604b Mon Sep 17 00:00:00 2001
From: Eisoc <han_bing10@hotmail.com>
Date: Fri, 20 Sep 2024 19:17:53 +0800
Subject: [PATCH 5/5] adapting changes

---
 configs/omnire.yaml |  3 ++-
 tools/train.py      | 12 +++++++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/configs/omnire.yaml b/configs/omnire.yaml
index 33dd7bc..a62b497 100644
--- a/configs/omnire.yaml
+++ b/configs/omnire.yaml
@@ -275,4 +275,5 @@ logging:
   vis_freq: 2000 # how often to visualize training stats
   print_freq: 500 # how often to print training stats
   saveckpt_freq: 15000 # how often to save checkpoints
-  save_seperate_video: True # whether to save seperate videos for each scene
\ No newline at end of file
+  save_seperate_video: True # whether to save seperate videos for each scene
+  export_freq: 10000 # how often to export ply files
\ No newline at end of file
diff --git a/tools/train.py b/tools/train.py
index f86411a..b7aa3f6 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -10,7 +10,7 @@
 
 import torch
 from tools.eval import do_evaluation
-from utils.misc import import_str
+from utils.misc import import_str, export_gaussians_to_ply
 from utils.backup import backup_project
 from utils.logging import MetricLogger, setup_logging
 from models.video_utils import render_images, save_videos
@@ -300,6 +300,16 @@ def main(args):
                 is_final=step == trainer.num_iters,
             )
         
+        do_save_ply = step > 0 and (
+            (step % cfg.logging.export_freq == 0) or (step == trainer.num_iters)
+        ) and (args.resume_from is None)
+        if do_save_ply: 
+            # for class_name, model in trainer.models.items():
+            export_gaussians_to_ply(trainer.models["Background"], cfg.log_dir, f"{args.run_name}_{step}_Background.ply")
+            # pcd = trainer.models["Background"].export_gaussians_to_ply(alpha_thresh=0)
+            # file_name = f"{args.run_name}_{step}_Background.ply"
+            # file_path = f"{cfg.log_dir}/{file_name}"
+            # o3d.io.write_point_cloud(file_path, pcd)
         #----------------------------------------------------------------------------
         #------------------------    Cache Image Error    ---------------------------
         if (