From b09b72a60ec46a546d94070229a0bd629aa99bc4 Mon Sep 17 00:00:00 2001
From: KafuuChikai <810658920qq@gmail.com>
Date: Fri, 27 Dec 2024 15:46:12 +0800
Subject: [PATCH 1/6] add random sample target points

---
 examples/drone/hover_env.py   | 44 +++++++++++++++++++----------------
 examples/drone/hover_eval.py  | 14 ++++++++---
 examples/drone/hover_train.py | 10 ++++----
 3 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/examples/drone/hover_env.py b/examples/drone/hover_env.py
index 7c6bc980..0ca0f6c8 100644
--- a/examples/drone/hover_env.py
+++ b/examples/drone/hover_env.py
@@ -3,11 +3,9 @@
 import genesis as gs
 from genesis.utils.geom import quat_to_xyz, transform_by_quat, inv_quat, transform_quat_by_quat
 
-
 def gs_rand_float(lower, upper, shape, device):
     return (upper - lower) * torch.rand(size=shape, device=device) + lower
 
-
 class HoverEnv:
     def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_viewer=False, device="cuda"):
         self.device = torch.device(device)
@@ -18,7 +16,7 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
         self.num_actions = env_cfg["num_actions"]
         self.num_commands = command_cfg["num_commands"]
 
-        # self.simulate_action_latency = env_cfg["simulate_action_latency"]
+        self.simulate_action_latency = env_cfg["simulate_action_latency"]
         self.dt = 0.01  # run in 100hz
         self.max_episode_length = math.ceil(env_cfg["episode_length_s"] / self.dt)
 
@@ -56,9 +54,16 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
         self.base_init_pos = torch.tensor(self.env_cfg["base_init_pos"], device=self.device)
         self.base_init_quat = torch.tensor(self.env_cfg["base_init_quat"], device=self.device)
         self.inv_base_init_quat = inv_quat(self.base_init_quat)
-        # self.base_init_pos = torch.tensor(self.env_cfg["base_init_pos"], device=self.device)
         self.drone = self.scene.add_entity(gs.morphs.Drone(file="urdf/drones/cf2x.urdf"))
 
+        self.cam = self.scene.add_camera(
+            res=(640, 480),
+            pos=(3.5, 0.0, 2.5),
+            lookat=(0, 0, 0.5),
+            fov=30,
+            GUI=True,
+        )
+
         # build scene
         self.scene.build(n_envs=num_envs)
 
@@ -111,7 +116,6 @@ def step(self, actions):
         self.rel_pos = self.commands - self.base_pos
         self.last_rel_pos = self.commands - self.last_base_pos
         self.base_quat[:] = self.drone.get_quat()
-        # self.base_euler = quat_to_xyz(self.base_quat)
         self.base_euler = quat_to_xyz(
             transform_quat_by_quat(torch.ones_like(self.base_quat) * self.inv_base_init_quat, self.base_quat)
         )
@@ -120,12 +124,12 @@ def step(self, actions):
         self.base_ang_vel[:] = transform_by_quat(self.drone.get_ang(), inv_base_quat)
 
         # resample commands
-        # envs_idx = (
-        #     (self.episode_length_buf % int(self.env_cfg["resampling_time_s"] / self.dt) == 0)
-        #     .nonzero(as_tuple=False)
-        #     .flatten()
-        # )
-        # self._resample_commands(envs_idx)
+        envs_idx = (
+            (self.episode_length_buf % int(self.env_cfg["resampling_time_s"] / self.dt) == 0)
+            .nonzero(as_tuple=False)
+            .flatten()
+        )
+        self._resample_commands(envs_idx)
 
         # check termination and reset
         self.reset_buf = self.episode_length_buf > self.max_episode_length
@@ -215,17 +219,17 @@ def _reward_target(self):
     def _reward_smooth(self):
         smooth_rew = torch.sum(torch.square(self.actions - self.last_actions), dim=1)
         return smooth_rew
-
+    
     def _reward_crash(self):
         crash_rew = torch.zeros((self.num_envs,), device=self.device, dtype=gs.tc_float)
-
+        
         crash_condition = (
-            (torch.abs(self.base_euler[:, 1]) > self.env_cfg["termination_if_pitch_greater_than"])
-            | (torch.abs(self.base_euler[:, 0]) > self.env_cfg["termination_if_roll_greater_than"])
-            | (torch.abs(self.rel_pos[:, 0]) > self.env_cfg["termination_if_x_greater_than"])
-            | (torch.abs(self.rel_pos[:, 1]) > self.env_cfg["termination_if_y_greater_than"])
-            | (torch.abs(self.rel_pos[:, 2]) > self.env_cfg["termination_if_z_greater_than"])
-            | (self.base_pos[:, 2] < self.env_cfg["termination_if_close_to_ground"])
+            (torch.abs(self.base_euler[:, 1]) > self.env_cfg["termination_if_pitch_greater_than"]) |
+            (torch.abs(self.base_euler[:, 0]) > self.env_cfg["termination_if_roll_greater_than"]) |
+            (torch.abs(self.rel_pos[:, 0]) > self.env_cfg["termination_if_x_greater_than"]) |
+            (torch.abs(self.rel_pos[:, 1]) > self.env_cfg["termination_if_y_greater_than"]) |
+            (torch.abs(self.rel_pos[:, 2]) > self.env_cfg["termination_if_z_greater_than"]) |
+            (self.base_pos[:, 2] < self.env_cfg["termination_if_close_to_ground"])
         )
         crash_rew[crash_condition] = -1
-        return crash_rew
+        return crash_rew
\ No newline at end of file
diff --git a/examples/drone/hover_eval.py b/examples/drone/hover_eval.py
index 79352396..79cd7a2e 100644
--- a/examples/drone/hover_eval.py
+++ b/examples/drone/hover_eval.py
@@ -12,7 +12,8 @@
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("-e", "--exp_name", type=str, default="drone-hovering")
-    parser.add_argument("--ckpt", type=int, default=100)
+    parser.add_argument("--ckpt", type=int, default=500)
+    parser.add_argument("--record", action="store_true", default=False)
     args = parser.parse_args()
 
     gs.init()
@@ -36,11 +37,18 @@ def main():
     policy = runner.get_inference_policy(device="cuda:0")
 
     obs, _ = env.reset()
+    if args.record:
+        env.cam.start_recording()
+
+    max_sim_step = int(env_cfg["episode_length_s"]/0.01)    # 0.01 is the simulation time step
     with torch.no_grad():
-        while True:
+        for i in range(max_sim_step):
             actions = policy(obs)
             obs, _, rews, dones, infos = env.step(actions)
-
+            if args.record:
+                env.cam.render()
+    if args.record:
+        env.cam.stop_recording(save_to_filename="video.mp4", fps=60)
 
 if __name__ == "__main__":
     main()
diff --git a/examples/drone/hover_train.py b/examples/drone/hover_train.py
index 82b4d829..dcd43823 100644
--- a/examples/drone/hover_train.py
+++ b/examples/drone/hover_train.py
@@ -69,18 +69,16 @@ def get_cfgs():
         # base pose
         "base_init_pos": [0.0, 0.0, 1.0],
         "base_init_quat": [1.0, 0.0, 0.0, 0.0],
-        "episode_length_s": 5.0,
-        "resampling_time_s": 5.0,
+        "episode_length_s": 15.0,
+        "resampling_time_s": 3.0,
         # "action_scale": 0.25,
-        # "simulate_action_latency": True,
+        "simulate_action_latency": True,
         "clip_actions": 1.0,
     }
     obs_cfg = {
         "num_obs": 17,
         "obs_scales": {
             "rel_pos": 1 / 3.0,
-            "euler_xy": 1 / 180,
-            "euler_z": 1 / 360,
             "lin_vel": 1 / 3.0,
             "ang_vel": 1 / 3.14159,
         },
@@ -106,7 +104,7 @@ def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("-e", "--exp_name", type=str, default="drone-hovering")
     parser.add_argument("-B", "--num_envs", type=int, default=4096)
-    parser.add_argument("--max_iterations", type=int, default=1000)
+    parser.add_argument("--max_iterations", type=int, default=500)
     args = parser.parse_args()
 
     gs.init(logging_level="warning")

From ebf2871ed8f7d49b47139a8950354facae983db9 Mon Sep 17 00:00:00 2001
From: KafuuChikai <810658920qq@gmail.com>
Date: Fri, 27 Dec 2024 15:51:51 +0800
Subject: [PATCH 2/6] update crash reward

---
 examples/drone/hover_env.py | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/examples/drone/hover_env.py b/examples/drone/hover_env.py
index 0ca0f6c8..487b0905 100644
--- a/examples/drone/hover_env.py
+++ b/examples/drone/hover_env.py
@@ -132,13 +132,15 @@ def step(self, actions):
         self._resample_commands(envs_idx)
 
         # check termination and reset
-        self.reset_buf = self.episode_length_buf > self.max_episode_length
-        self.reset_buf |= torch.abs(self.base_euler[:, 1]) > self.env_cfg["termination_if_pitch_greater_than"]
-        self.reset_buf |= torch.abs(self.base_euler[:, 0]) > self.env_cfg["termination_if_roll_greater_than"]
-        self.reset_buf |= torch.abs(self.rel_pos[:, 0]) > self.env_cfg["termination_if_x_greater_than"]
-        self.reset_buf |= torch.abs(self.rel_pos[:, 1]) > self.env_cfg["termination_if_y_greater_than"]
-        self.reset_buf |= torch.abs(self.rel_pos[:, 2]) > self.env_cfg["termination_if_z_greater_than"]
-        self.reset_buf |= self.base_pos[:, 2] < self.env_cfg["termination_if_close_to_ground"]
+        self.crash_condition = (
+            (torch.abs(self.base_euler[:, 1]) > self.env_cfg["termination_if_pitch_greater_than"]) |
+            (torch.abs(self.base_euler[:, 0]) > self.env_cfg["termination_if_roll_greater_than"]) |
+            (torch.abs(self.rel_pos[:, 0]) > self.env_cfg["termination_if_x_greater_than"]) |
+            (torch.abs(self.rel_pos[:, 1]) > self.env_cfg["termination_if_y_greater_than"]) |
+            (torch.abs(self.rel_pos[:, 2]) > self.env_cfg["termination_if_z_greater_than"]) |
+            (self.base_pos[:, 2] < self.env_cfg["termination_if_close_to_ground"])
+        )
+        self.reset_buf = (self.episode_length_buf > self.max_episode_length) | self.crash_condition
 
         time_out_idx = (self.episode_length_buf > self.max_episode_length).nonzero(as_tuple=False).flatten()
         self.extras["time_outs"] = torch.zeros_like(self.reset_buf, device=self.device, dtype=gs.tc_float)
@@ -222,14 +224,5 @@ def _reward_smooth(self):
     
     def _reward_crash(self):
         crash_rew = torch.zeros((self.num_envs,), device=self.device, dtype=gs.tc_float)
-        
-        crash_condition = (
-            (torch.abs(self.base_euler[:, 1]) > self.env_cfg["termination_if_pitch_greater_than"]) |
-            (torch.abs(self.base_euler[:, 0]) > self.env_cfg["termination_if_roll_greater_than"]) |
-            (torch.abs(self.rel_pos[:, 0]) > self.env_cfg["termination_if_x_greater_than"]) |
-            (torch.abs(self.rel_pos[:, 1]) > self.env_cfg["termination_if_y_greater_than"]) |
-            (torch.abs(self.rel_pos[:, 2]) > self.env_cfg["termination_if_z_greater_than"]) |
-            (self.base_pos[:, 2] < self.env_cfg["termination_if_close_to_ground"])
-        )
-        crash_rew[crash_condition] = -1
+        crash_rew[self.crash_condition] = -1
         return crash_rew
\ No newline at end of file

From 8cd2af1c05ac93de2cebe36d78aab385bec9de42 Mon Sep 17 00:00:00 2001
From: KafuuChikai <810658920qq@gmail.com>
Date: Fri, 27 Dec 2024 19:07:52 +0800
Subject: [PATCH 3/6] Update Random Point Task

---
 examples/drone/hover_env.py   | 47 +++++++++++++++++++++++++++--------
 examples/drone/hover_train.py | 16 +++++++-----
 2 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/examples/drone/hover_env.py b/examples/drone/hover_env.py
index 487b0905..49aaa678 100644
--- a/examples/drone/hover_env.py
+++ b/examples/drone/hover_env.py
@@ -33,7 +33,7 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
             sim_options=gs.options.SimOptions(dt=self.dt, substeps=2),
             viewer_options=gs.options.ViewerOptions(
                 max_FPS=60,
-                camera_pos=(2.0, 0.0, 2.5),
+                camera_pos=(3.0, 0.0, 3.0),
                 camera_lookat=(0.0, 0.0, 1.0),
                 camera_fov=40,
             ),
@@ -97,16 +97,31 @@ def _resample_commands(self, envs_idx):
         self.commands[envs_idx, 1] = gs_rand_float(*self.command_cfg["pos_y_range"], (len(envs_idx),), self.device)
         self.commands[envs_idx, 2] = gs_rand_float(*self.command_cfg["pos_z_range"], (len(envs_idx),), self.device)
 
+    # def _at_target(self, envs_idx):
+    #     at_target = (
+    #         (torch.norm(self.rel_pos[envs_idx], dim=1) < self.env_cfg["at_target_threshold"])
+    #         .nonzero(as_tuple=False)
+    #         .flatten()
+    #     )
+    #     return envs_idx[at_target]
+    
+    def _at_target(self):
+        at_target = (
+            (torch.norm(self.rel_pos, dim=1) < self.env_cfg["at_target_threshold"])
+            .nonzero(as_tuple=False)
+            .flatten()
+        )
+        return at_target
+
     def step(self, actions):
         self.actions = torch.clip(actions, -self.env_cfg["clip_actions"], self.env_cfg["clip_actions"])
         exec_actions = self.actions.cpu()
-
-        # exec_actions = self.last_actions if self.simulate_action_latency else self.actions
+        # exec_actions = self.last_actions.cpu() if self.simulate_action_latency else self.actions.cpu()
         # target_dof_pos = exec_actions * self.env_cfg["action_scale"] + self.default_dof_pos
         # self.drone.control_dofs_position(target_dof_pos)
 
         # 14468 is hover rpm
-        self.drone.set_propellels_rpm((1 + exec_actions) * 14468.429183500699)
+        self.drone.set_propellels_rpm((1 + exec_actions*0.8) * 14468.429183500699)
         self.scene.step()
 
         # update buffers
@@ -124,11 +139,13 @@ def step(self, actions):
         self.base_ang_vel[:] = transform_by_quat(self.drone.get_ang(), inv_base_quat)
 
         # resample commands
-        envs_idx = (
-            (self.episode_length_buf % int(self.env_cfg["resampling_time_s"] / self.dt) == 0)
-            .nonzero(as_tuple=False)
-            .flatten()
-        )
+        # envs_idx = (
+        #     (self.episode_length_buf % int(self.env_cfg["resampling_time_s"] / self.dt) == 0)
+        #     .nonzero(as_tuple=False)
+        #     .flatten()
+        # )
+        # envs_idx = self._at_target(envs_idx)
+        envs_idx = self._at_target()
         self._resample_commands(envs_idx)
 
         # check termination and reset
@@ -221,8 +238,18 @@ def _reward_target(self):
     def _reward_smooth(self):
         smooth_rew = torch.sum(torch.square(self.actions - self.last_actions), dim=1)
         return smooth_rew
+
+    def _reward_yaw(self):
+        yaw = self.base_euler[:, 2]
+        yaw = torch.where(yaw > 180, yaw - 360, yaw)/180*3.14159    # use rad for yaw_reward
+        yaw_rew = torch.exp(self.reward_cfg["yaw_lambda"] * torch.abs(yaw))
+        return yaw_rew
     
+    def _reward_angular(self):
+        angular_rew = torch.norm(self.base_ang_vel/3.14159, dim=1)
+        return angular_rew
+
     def _reward_crash(self):
         crash_rew = torch.zeros((self.num_envs,), device=self.device, dtype=gs.tc_float)
-        crash_rew[self.crash_condition] = -1
+        crash_rew[self.crash_condition] = 1
         return crash_rew
\ No newline at end of file
diff --git a/examples/drone/hover_train.py b/examples/drone/hover_train.py
index dcd43823..dfb1f803 100644
--- a/examples/drone/hover_train.py
+++ b/examples/drone/hover_train.py
@@ -15,10 +15,10 @@ def get_train_cfg(exp_name, max_iterations):
         "algorithm": {
             "clip_param": 0.2,
             "desired_kl": 0.01,
-            "entropy_coef": 0.01,
+            "entropy_coef": 0.002,
             "gamma": 0.99,
             "lam": 0.95,
-            "learning_rate": 0.001,
+            "learning_rate": 0.0003,
             "max_grad_norm": 1.0,
             "num_learning_epochs": 5,
             "num_mini_batches": 4,
@@ -70,6 +70,7 @@ def get_cfgs():
         "base_init_pos": [0.0, 0.0, 1.0],
         "base_init_quat": [1.0, 0.0, 0.0, 0.0],
         "episode_length_s": 15.0,
+        "at_target_threshold": 0.1,
         "resampling_time_s": 3.0,
         # "action_scale": 0.25,
         "simulate_action_latency": True,
@@ -84,10 +85,13 @@ def get_cfgs():
         },
     }
     reward_cfg = {
+        "yaw_lambda": -10.0,
         "reward_scales": {
-            "target": 5.0,
-            "smooth": -0.001,
-            "crash": 1.0,
+            "target": 10.0,
+            "smooth": -1e-4,
+            "yaw": 0.01,
+            "angular": -2e-4,
+            "crash": -10.0,
         }
     }
     command_cfg = {
@@ -103,7 +107,7 @@ def get_cfgs():
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("-e", "--exp_name", type=str, default="drone-hovering")
-    parser.add_argument("-B", "--num_envs", type=int, default=4096)
+    parser.add_argument("-B", "--num_envs", type=int, default=8192)
     parser.add_argument("--max_iterations", type=int, default=500)
     args = parser.parse_args()
 

From 20a566bcad6bcea971a87b7ed19c5ffa451294c4 Mon Sep 17 00:00:00 2001
From: KafuuChikai <810658920qq@gmail.com>
Date: Fri, 27 Dec 2024 19:56:37 +0800
Subject: [PATCH 4/6] add target visualization and video record

---
 examples/drone/hover_env.py   | 51 +++++++++++++++++++----------------
 examples/drone/hover_eval.py  | 27 ++++++++++++-------
 examples/drone/hover_train.py |  5 +++-
 3 files changed, 50 insertions(+), 33 deletions(-)

diff --git a/examples/drone/hover_env.py b/examples/drone/hover_env.py
index 49aaa678..3d48d6b3 100644
--- a/examples/drone/hover_env.py
+++ b/examples/drone/hover_env.py
@@ -32,7 +32,7 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
         self.scene = gs.Scene(
             sim_options=gs.options.SimOptions(dt=self.dt, substeps=2),
             viewer_options=gs.options.ViewerOptions(
-                max_FPS=60,
+                max_FPS=env_cfg["max_visualize_FPS"],
                 camera_pos=(3.0, 0.0, 3.0),
                 camera_lookat=(0.0, 0.0, 1.0),
                 camera_fov=40,
@@ -50,20 +50,37 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
         # add plane
         self.scene.add_entity(gs.morphs.Plane())
 
+        # add target
+        if self.env_cfg["visualize_target"]:
+            self.target = self.scene.add_entity(morph=gs.morphs.Mesh(
+                                    file="meshes/sphere.obj",
+                                    scale=0.05, 
+                                    fixed=True, 
+                                    collision=False,
+                                ),
+                                surface=gs.surfaces.Rough(
+                                    diffuse_texture=gs.textures.ColorTexture(
+                                        color=(1.0, 0.5, 0.5),
+                                    ),
+                                ),
+                            )
+
+        # add camera
+        if self.env_cfg["visualize_camera"]:
+            self.cam = self.scene.add_camera(
+                res=(640, 480),
+                pos=(3.5, 0.0, 2.5),
+                lookat=(0, 0, 0.5),
+                fov=30,
+                GUI=True,
+            )
+
         # add drone
         self.base_init_pos = torch.tensor(self.env_cfg["base_init_pos"], device=self.device)
         self.base_init_quat = torch.tensor(self.env_cfg["base_init_quat"], device=self.device)
         self.inv_base_init_quat = inv_quat(self.base_init_quat)
         self.drone = self.scene.add_entity(gs.morphs.Drone(file="urdf/drones/cf2x.urdf"))
 
-        self.cam = self.scene.add_camera(
-            res=(640, 480),
-            pos=(3.5, 0.0, 2.5),
-            lookat=(0, 0, 0.5),
-            fov=30,
-            GUI=True,
-        )
-
         # build scene
         self.scene.build(n_envs=num_envs)
 
@@ -96,15 +113,9 @@ def _resample_commands(self, envs_idx):
         self.commands[envs_idx, 0] = gs_rand_float(*self.command_cfg["pos_x_range"], (len(envs_idx),), self.device)
         self.commands[envs_idx, 1] = gs_rand_float(*self.command_cfg["pos_y_range"], (len(envs_idx),), self.device)
         self.commands[envs_idx, 2] = gs_rand_float(*self.command_cfg["pos_z_range"], (len(envs_idx),), self.device)
+        if self.target is not None:
+            self.target.set_pos(self.commands[envs_idx], zero_velocity=True, envs_idx=envs_idx)
 
-    # def _at_target(self, envs_idx):
-    #     at_target = (
-    #         (torch.norm(self.rel_pos[envs_idx], dim=1) < self.env_cfg["at_target_threshold"])
-    #         .nonzero(as_tuple=False)
-    #         .flatten()
-    #     )
-    #     return envs_idx[at_target]
-    
     def _at_target(self):
         at_target = (
             (torch.norm(self.rel_pos, dim=1) < self.env_cfg["at_target_threshold"])
@@ -139,12 +150,6 @@ def step(self, actions):
         self.base_ang_vel[:] = transform_by_quat(self.drone.get_ang(), inv_base_quat)
 
         # resample commands
-        # envs_idx = (
-        #     (self.episode_length_buf % int(self.env_cfg["resampling_time_s"] / self.dt) == 0)
-        #     .nonzero(as_tuple=False)
-        #     .flatten()
-        # )
-        # envs_idx = self._at_target(envs_idx)
         envs_idx = self._at_target()
         self._resample_commands(envs_idx)
 
diff --git a/examples/drone/hover_eval.py b/examples/drone/hover_eval.py
index 79cd7a2e..9ef11e98 100644
--- a/examples/drone/hover_eval.py
+++ b/examples/drone/hover_eval.py
@@ -22,6 +22,13 @@ def main():
     env_cfg, obs_cfg, reward_cfg, command_cfg, train_cfg = pickle.load(open(f"logs/{args.exp_name}/cfgs.pkl", "rb"))
     reward_cfg["reward_scales"] = {}
 
+    # visualize the target
+    env_cfg["visualize_target"] = True
+    # for video recording
+    env_cfg["visualize_camera"] = args.record
+    # set the max FPS for visualization
+    env_cfg["max_visualize_FPS"] = 60
+
     env = HoverEnv(
         num_envs=1,
         env_cfg=env_cfg,
@@ -37,18 +44,20 @@ def main():
     policy = runner.get_inference_policy(device="cuda:0")
 
     obs, _ = env.reset()
-    if args.record:
-        env.cam.start_recording()
 
-    max_sim_step = int(env_cfg["episode_length_s"]/0.01)    # 0.01 is the simulation time step
+    max_sim_step = int(env_cfg["episode_length_s"]*env_cfg["max_visualize_FPS"])
     with torch.no_grad():
-        for i in range(max_sim_step):
-            actions = policy(obs)
-            obs, _, rews, dones, infos = env.step(actions)
-            if args.record:
+        if args.record:
+            env.cam.start_recording()
+            for _ in range(max_sim_step):
+                actions = policy(obs)
+                obs, _, rews, dones, infos = env.step(actions)
                 env.cam.render()
-    if args.record:
-        env.cam.stop_recording(save_to_filename="video.mp4", fps=60)
+            env.cam.stop_recording(save_to_filename="video.mp4", fps=env_cfg["max_visualize_FPS"])
+        else:
+            for _ in range(max_sim_step):
+                actions = policy(obs)
+                obs, _, rews, dones, infos = env.step(actions)
 
 if __name__ == "__main__":
     main()
diff --git a/examples/drone/hover_train.py b/examples/drone/hover_train.py
index dfb1f803..200d42d0 100644
--- a/examples/drone/hover_train.py
+++ b/examples/drone/hover_train.py
@@ -72,9 +72,12 @@ def get_cfgs():
         "episode_length_s": 15.0,
         "at_target_threshold": 0.1,
         "resampling_time_s": 3.0,
-        # "action_scale": 0.25,
         "simulate_action_latency": True,
         "clip_actions": 1.0,
+        # visualization
+        "visualize_target": False,
+        "visualize_camera": False,
+        "max_visualize_FPS": 60,
     }
     obs_cfg = {
         "num_obs": 17,

From 0f8d57f730cebc34d7f7c6803809d887c3e2af3a Mon Sep 17 00:00:00 2001
From: KafuuChikai <810658920qq@gmail.com>
Date: Fri, 27 Dec 2024 20:25:25 +0800
Subject: [PATCH 5/6] update readme for drone-rl

---
 examples/drone/README.md | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/examples/drone/README.md b/examples/drone/README.md
index 349a8d18..d48bdd4c 100644
--- a/examples/drone/README.md
+++ b/examples/drone/README.md
@@ -27,6 +27,37 @@ Run with:
 python fly.py -v -m
 ```
 
+### 3. Hover Environment (`hover_env.py`, `hover_train.py`, `hover_eval.py`)
+
+The hover environment (`hover_env.py`) is designed to train a drone to maintain a stable hover position by reaching randomly generated target points. The environment includes:
+
+ - Initialization of the scene and entities (plane, drone and target).
+ - Reward functions to provide feedback to the agent based on its performance in reaching the target points.
+ - **Command resampling to generate new random target points** and environment reset functionalities to ensure continuous training.
+
+**Acknowledgement**: The reward design is inspired by [Champion-level drone racing using deep
+reinforcement learning (Nature 2023)](https://www.nature.com/articles/s41586-023-06419-4.pdf)
+
+#### 3.1 Training
+
+Train the drone hovering policy using the `HoverEnv` environment.
+
+Run with:
+
+```bash
+python hover_train.py -e drone-hovering -B 8192 --max_iterations 500
+```
+
+#### 3.2 Evaluation
+
+Evaluate the trained drone hovering policy.
+
+Run with:
+
+```bash
+python hover_eval.py -e drone-hovering --ckpt 500 --record
+```
+
 ## Technical Details
 
 - The drone model used is the Crazyflie 2.X (`urdf/drones/cf2x.urdf`)

From 613bfdc1225d2e63d0ed350b900a33938361e297 Mon Sep 17 00:00:00 2001
From: KafuuChikai <810658920qq@gmail.com>
Date: Fri, 27 Dec 2024 20:34:01 +0800
Subject: [PATCH 6/6] add installation

---
 examples/drone/README.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/examples/drone/README.md b/examples/drone/README.md
index d48bdd4c..39eb37c1 100644
--- a/examples/drone/README.md
+++ b/examples/drone/README.md
@@ -38,6 +38,19 @@ The hover environment (`hover_env.py`) is designed to train a drone to maintain
 **Acknowledgement**: The reward design is inspired by [Champion-level drone racing using deep
 reinforcement learning (Nature 2023)](https://www.nature.com/articles/s41586-023-06419-4.pdf)
 
+#### 3.0 Installation
+
+At this stage, we have defined the environments. Now, we use the PPO implementation from `rsl-rl` to train the policy. Follow these installation steps:
+
+```bash
+# Install rsl_rl.
+git clone https://github.com/leggedrobotics/rsl_rl
+cd rsl_rl && git checkout v1.0.2 && pip install -e .
+
+# Install tensorboard.
+pip install tensorboard
+```
+
 #### 3.1 Training
 
 Train the drone hovering policy using the `HoverEnv` environment.