From b09b72a60ec46a546d94070229a0bd629aa99bc4 Mon Sep 17 00:00:00 2001 From: KafuuChikai <810658920qq@gmail.com> Date: Fri, 27 Dec 2024 15:46:12 +0800 Subject: [PATCH 1/6] add random sample target points --- examples/drone/hover_env.py | 44 +++++++++++++++++++---------------- examples/drone/hover_eval.py | 14 ++++++++--- examples/drone/hover_train.py | 10 ++++---- 3 files changed, 39 insertions(+), 29 deletions(-) diff --git a/examples/drone/hover_env.py b/examples/drone/hover_env.py index 7c6bc980..0ca0f6c8 100644 --- a/examples/drone/hover_env.py +++ b/examples/drone/hover_env.py @@ -3,11 +3,9 @@ import genesis as gs from genesis.utils.geom import quat_to_xyz, transform_by_quat, inv_quat, transform_quat_by_quat - def gs_rand_float(lower, upper, shape, device): return (upper - lower) * torch.rand(size=shape, device=device) + lower - class HoverEnv: def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_viewer=False, device="cuda"): self.device = torch.device(device) @@ -18,7 +16,7 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie self.num_actions = env_cfg["num_actions"] self.num_commands = command_cfg["num_commands"] - # self.simulate_action_latency = env_cfg["simulate_action_latency"] + self.simulate_action_latency = env_cfg["simulate_action_latency"] self.dt = 0.01 # run in 100hz self.max_episode_length = math.ceil(env_cfg["episode_length_s"] / self.dt) @@ -56,9 +54,16 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie self.base_init_pos = torch.tensor(self.env_cfg["base_init_pos"], device=self.device) self.base_init_quat = torch.tensor(self.env_cfg["base_init_quat"], device=self.device) self.inv_base_init_quat = inv_quat(self.base_init_quat) - # self.base_init_pos = torch.tensor(self.env_cfg["base_init_pos"], device=self.device) self.drone = self.scene.add_entity(gs.morphs.Drone(file="urdf/drones/cf2x.urdf")) + self.cam = self.scene.add_camera( + res=(640, 480), + pos=(3.5, 0.0, 2.5), + lookat=(0, 0, 0.5), + fov=30, + GUI=True, + ) + # build scene self.scene.build(n_envs=num_envs) @@ -111,7 +116,6 @@ def step(self, actions): self.rel_pos = self.commands - self.base_pos self.last_rel_pos = self.commands - self.last_base_pos self.base_quat[:] = self.drone.get_quat() - # self.base_euler = quat_to_xyz(self.base_quat) self.base_euler = quat_to_xyz( transform_quat_by_quat(torch.ones_like(self.base_quat) * self.inv_base_init_quat, self.base_quat) ) @@ -120,12 +124,12 @@ def step(self, actions): self.base_ang_vel[:] = transform_by_quat(self.drone.get_ang(), inv_base_quat) # resample commands - # envs_idx = ( - # (self.episode_length_buf % int(self.env_cfg["resampling_time_s"] / self.dt) == 0) - # .nonzero(as_tuple=False) - # .flatten() - # ) - # self._resample_commands(envs_idx) + envs_idx = ( + (self.episode_length_buf % int(self.env_cfg["resampling_time_s"] / self.dt) == 0) + .nonzero(as_tuple=False) + .flatten() + ) + self._resample_commands(envs_idx) # check termination and reset self.reset_buf = self.episode_length_buf > self.max_episode_length @@ -215,17 +219,17 @@ def _reward_target(self): def _reward_smooth(self): smooth_rew = torch.sum(torch.square(self.actions - self.last_actions), dim=1) return smooth_rew - + def _reward_crash(self): crash_rew = torch.zeros((self.num_envs,), device=self.device, dtype=gs.tc_float) - + crash_condition = ( - (torch.abs(self.base_euler[:, 1]) > self.env_cfg["termination_if_pitch_greater_than"]) - | (torch.abs(self.base_euler[:, 0]) > self.env_cfg["termination_if_roll_greater_than"]) - | (torch.abs(self.rel_pos[:, 0]) > self.env_cfg["termination_if_x_greater_than"]) - | (torch.abs(self.rel_pos[:, 1]) > self.env_cfg["termination_if_y_greater_than"]) - | (torch.abs(self.rel_pos[:, 2]) > self.env_cfg["termination_if_z_greater_than"]) - | (self.base_pos[:, 2] < self.env_cfg["termination_if_close_to_ground"]) + (torch.abs(self.base_euler[:, 1]) > self.env_cfg["termination_if_pitch_greater_than"]) | + (torch.abs(self.base_euler[:, 0]) > self.env_cfg["termination_if_roll_greater_than"]) | + (torch.abs(self.rel_pos[:, 0]) > self.env_cfg["termination_if_x_greater_than"]) | + (torch.abs(self.rel_pos[:, 1]) > self.env_cfg["termination_if_y_greater_than"]) | + (torch.abs(self.rel_pos[:, 2]) > self.env_cfg["termination_if_z_greater_than"]) | + (self.base_pos[:, 2] < self.env_cfg["termination_if_close_to_ground"]) ) crash_rew[crash_condition] = -1 - return crash_rew + return crash_rew \ No newline at end of file diff --git a/examples/drone/hover_eval.py b/examples/drone/hover_eval.py index 79352396..79cd7a2e 100644 --- a/examples/drone/hover_eval.py +++ b/examples/drone/hover_eval.py @@ -12,7 +12,8 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("-e", "--exp_name", type=str, default="drone-hovering") - parser.add_argument("--ckpt", type=int, default=100) + parser.add_argument("--ckpt", type=int, default=500) + parser.add_argument("--record", action="store_true", default=False) args = parser.parse_args() gs.init() @@ -36,11 +37,18 @@ def main(): policy = runner.get_inference_policy(device="cuda:0") obs, _ = env.reset() + if args.record: + env.cam.start_recording() + + max_sim_step = int(env_cfg["episode_length_s"]/0.01) # 0.01 is the simulation time step with torch.no_grad(): - while True: + for i in range(max_sim_step): actions = policy(obs) obs, _, rews, dones, infos = env.step(actions) - + if args.record: + env.cam.render() + if args.record: + env.cam.stop_recording(save_to_filename="video.mp4", fps=60) if __name__ == "__main__": main() diff --git a/examples/drone/hover_train.py b/examples/drone/hover_train.py index 82b4d829..dcd43823 100644 --- a/examples/drone/hover_train.py +++ b/examples/drone/hover_train.py @@ -69,18 +69,16 @@ def get_cfgs(): # base pose "base_init_pos": [0.0, 0.0, 1.0], "base_init_quat": [1.0, 0.0, 0.0, 0.0], - "episode_length_s": 5.0, - "resampling_time_s": 5.0, + "episode_length_s": 15.0, + "resampling_time_s": 3.0, # "action_scale": 0.25, - # "simulate_action_latency": True, + "simulate_action_latency": True, "clip_actions": 1.0, } obs_cfg = { "num_obs": 17, "obs_scales": { "rel_pos": 1 / 3.0, - "euler_xy": 1 / 180, - "euler_z": 1 / 360, "lin_vel": 1 / 3.0, "ang_vel": 1 / 3.14159, }, @@ -106,7 +104,7 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("-e", "--exp_name", type=str, default="drone-hovering") parser.add_argument("-B", "--num_envs", type=int, default=4096) - parser.add_argument("--max_iterations", type=int, default=1000) + parser.add_argument("--max_iterations", type=int, default=500) args = parser.parse_args() gs.init(logging_level="warning") From ebf2871ed8f7d49b47139a8950354facae983db9 Mon Sep 17 00:00:00 2001 From: KafuuChikai <810658920qq@gmail.com> Date: Fri, 27 Dec 2024 15:51:51 +0800 Subject: [PATCH 2/6] update crash reward --- examples/drone/hover_env.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/examples/drone/hover_env.py b/examples/drone/hover_env.py index 0ca0f6c8..487b0905 100644 --- a/examples/drone/hover_env.py +++ b/examples/drone/hover_env.py @@ -132,13 +132,15 @@ def step(self, actions): self._resample_commands(envs_idx) # check termination and reset - self.reset_buf = self.episode_length_buf > self.max_episode_length - self.reset_buf |= torch.abs(self.base_euler[:, 1]) > self.env_cfg["termination_if_pitch_greater_than"] - self.reset_buf |= torch.abs(self.base_euler[:, 0]) > self.env_cfg["termination_if_roll_greater_than"] - self.reset_buf |= torch.abs(self.rel_pos[:, 0]) > self.env_cfg["termination_if_x_greater_than"] - self.reset_buf |= torch.abs(self.rel_pos[:, 1]) > self.env_cfg["termination_if_y_greater_than"] - self.reset_buf |= torch.abs(self.rel_pos[:, 2]) > self.env_cfg["termination_if_z_greater_than"] - self.reset_buf |= self.base_pos[:, 2] < self.env_cfg["termination_if_close_to_ground"] + self.crash_condition = ( + (torch.abs(self.base_euler[:, 1]) > self.env_cfg["termination_if_pitch_greater_than"]) | + (torch.abs(self.base_euler[:, 0]) > self.env_cfg["termination_if_roll_greater_than"]) | + (torch.abs(self.rel_pos[:, 0]) > self.env_cfg["termination_if_x_greater_than"]) | + (torch.abs(self.rel_pos[:, 1]) > self.env_cfg["termination_if_y_greater_than"]) | + (torch.abs(self.rel_pos[:, 2]) > self.env_cfg["termination_if_z_greater_than"]) | + (self.base_pos[:, 2] < self.env_cfg["termination_if_close_to_ground"]) + ) + self.reset_buf = (self.episode_length_buf > self.max_episode_length) | self.crash_condition time_out_idx = (self.episode_length_buf > self.max_episode_length).nonzero(as_tuple=False).flatten() self.extras["time_outs"] = torch.zeros_like(self.reset_buf, device=self.device, dtype=gs.tc_float) @@ -222,14 +224,5 @@ def _reward_smooth(self): def _reward_crash(self): crash_rew = torch.zeros((self.num_envs,), device=self.device, dtype=gs.tc_float) - - crash_condition = ( - (torch.abs(self.base_euler[:, 1]) > self.env_cfg["termination_if_pitch_greater_than"]) | - (torch.abs(self.base_euler[:, 0]) > self.env_cfg["termination_if_roll_greater_than"]) | - (torch.abs(self.rel_pos[:, 0]) > self.env_cfg["termination_if_x_greater_than"]) | - (torch.abs(self.rel_pos[:, 1]) > self.env_cfg["termination_if_y_greater_than"]) | - (torch.abs(self.rel_pos[:, 2]) > self.env_cfg["termination_if_z_greater_than"]) | - (self.base_pos[:, 2] < self.env_cfg["termination_if_close_to_ground"]) - ) - crash_rew[crash_condition] = -1 + crash_rew[self.crash_condition] = -1 return crash_rew \ No newline at end of file From 8cd2af1c05ac93de2cebe36d78aab385bec9de42 Mon Sep 17 00:00:00 2001 From: KafuuChikai <810658920qq@gmail.com> Date: Fri, 27 Dec 2024 19:07:52 +0800 Subject: [PATCH 3/6] Update Random Point Task --- examples/drone/hover_env.py | 47 +++++++++++++++++++++++++++-------- examples/drone/hover_train.py | 16 +++++++----- 2 files changed, 47 insertions(+), 16 deletions(-) diff --git a/examples/drone/hover_env.py b/examples/drone/hover_env.py index 487b0905..49aaa678 100644 --- a/examples/drone/hover_env.py +++ b/examples/drone/hover_env.py @@ -33,7 +33,7 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie sim_options=gs.options.SimOptions(dt=self.dt, substeps=2), viewer_options=gs.options.ViewerOptions( max_FPS=60, - camera_pos=(2.0, 0.0, 2.5), + camera_pos=(3.0, 0.0, 3.0), camera_lookat=(0.0, 0.0, 1.0), camera_fov=40, ), @@ -97,16 +97,31 @@ def _resample_commands(self, envs_idx): self.commands[envs_idx, 1] = gs_rand_float(*self.command_cfg["pos_y_range"], (len(envs_idx),), self.device) self.commands[envs_idx, 2] = gs_rand_float(*self.command_cfg["pos_z_range"], (len(envs_idx),), self.device) + # def _at_target(self, envs_idx): + # at_target = ( + # (torch.norm(self.rel_pos[envs_idx], dim=1) < self.env_cfg["at_target_threshold"]) + # .nonzero(as_tuple=False) + # .flatten() + # ) + # return envs_idx[at_target] + + def _at_target(self): + at_target = ( + (torch.norm(self.rel_pos, dim=1) < self.env_cfg["at_target_threshold"]) + .nonzero(as_tuple=False) + .flatten() + ) + return at_target + def step(self, actions): self.actions = torch.clip(actions, -self.env_cfg["clip_actions"], self.env_cfg["clip_actions"]) exec_actions = self.actions.cpu() - - # exec_actions = self.last_actions if self.simulate_action_latency else self.actions + # exec_actions = self.last_actions.cpu() if self.simulate_action_latency else self.actions.cpu() # target_dof_pos = exec_actions * self.env_cfg["action_scale"] + self.default_dof_pos # self.drone.control_dofs_position(target_dof_pos) # 14468 is hover rpm - self.drone.set_propellels_rpm((1 + exec_actions) * 14468.429183500699) + self.drone.set_propellels_rpm((1 + exec_actions*0.8) * 14468.429183500699) self.scene.step() # update buffers @@ -124,11 +139,13 @@ def step(self, actions): self.base_ang_vel[:] = transform_by_quat(self.drone.get_ang(), inv_base_quat) # resample commands - envs_idx = ( - (self.episode_length_buf % int(self.env_cfg["resampling_time_s"] / self.dt) == 0) - .nonzero(as_tuple=False) - .flatten() - ) + # envs_idx = ( + # (self.episode_length_buf % int(self.env_cfg["resampling_time_s"] / self.dt) == 0) + # .nonzero(as_tuple=False) + # .flatten() + # ) + # envs_idx = self._at_target(envs_idx) + envs_idx = self._at_target() self._resample_commands(envs_idx) # check termination and reset @@ -221,8 +238,18 @@ def _reward_target(self): def _reward_smooth(self): smooth_rew = torch.sum(torch.square(self.actions - self.last_actions), dim=1) return smooth_rew + + def _reward_yaw(self): + yaw = self.base_euler[:, 2] + yaw = torch.where(yaw > 180, yaw - 360, yaw)/180*3.14159 # use rad for yaw_reward + yaw_rew = torch.exp(self.reward_cfg["yaw_lambda"] * torch.abs(yaw)) + return yaw_rew + def _reward_angular(self): + angular_rew = torch.norm(self.base_ang_vel/3.14159, dim=1) + return angular_rew + def _reward_crash(self): crash_rew = torch.zeros((self.num_envs,), device=self.device, dtype=gs.tc_float) - crash_rew[self.crash_condition] = -1 + crash_rew[self.crash_condition] = 1 return crash_rew \ No newline at end of file diff --git a/examples/drone/hover_train.py b/examples/drone/hover_train.py index dcd43823..dfb1f803 100644 --- a/examples/drone/hover_train.py +++ b/examples/drone/hover_train.py @@ -15,10 +15,10 @@ def get_train_cfg(exp_name, max_iterations): "algorithm": { "clip_param": 0.2, "desired_kl": 0.01, - "entropy_coef": 0.01, + "entropy_coef": 0.002, "gamma": 0.99, "lam": 0.95, - "learning_rate": 0.001, + "learning_rate": 0.0003, "max_grad_norm": 1.0, "num_learning_epochs": 5, "num_mini_batches": 4, @@ -70,6 +70,7 @@ def get_cfgs(): "base_init_pos": [0.0, 0.0, 1.0], "base_init_quat": [1.0, 0.0, 0.0, 0.0], "episode_length_s": 15.0, + "at_target_threshold": 0.1, "resampling_time_s": 3.0, # "action_scale": 0.25, "simulate_action_latency": True, @@ -84,10 +85,13 @@ def get_cfgs(): }, } reward_cfg = { + "yaw_lambda": -10.0, "reward_scales": { - "target": 5.0, - "smooth": -0.001, - "crash": 1.0, + "target": 10.0, + "smooth": -1e-4, + "yaw": 0.01, + "angular": -2e-4, + "crash": -10.0, } } command_cfg = { @@ -103,7 +107,7 @@ def get_cfgs(): def main(): parser = argparse.ArgumentParser() parser.add_argument("-e", "--exp_name", type=str, default="drone-hovering") - parser.add_argument("-B", "--num_envs", type=int, default=4096) + parser.add_argument("-B", "--num_envs", type=int, default=8192) parser.add_argument("--max_iterations", type=int, default=500) args = parser.parse_args() From 20a566bcad6bcea971a87b7ed19c5ffa451294c4 Mon Sep 17 00:00:00 2001 From: KafuuChikai <810658920qq@gmail.com> Date: Fri, 27 Dec 2024 19:56:37 +0800 Subject: [PATCH 4/6] add target visualization and video record --- examples/drone/hover_env.py | 51 +++++++++++++++++++---------------- examples/drone/hover_eval.py | 27 ++++++++++++------- examples/drone/hover_train.py | 5 +++- 3 files changed, 50 insertions(+), 33 deletions(-) diff --git a/examples/drone/hover_env.py b/examples/drone/hover_env.py index 49aaa678..3d48d6b3 100644 --- a/examples/drone/hover_env.py +++ b/examples/drone/hover_env.py @@ -32,7 +32,7 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie self.scene = gs.Scene( sim_options=gs.options.SimOptions(dt=self.dt, substeps=2), viewer_options=gs.options.ViewerOptions( - max_FPS=60, + max_FPS=env_cfg["max_visualize_FPS"], camera_pos=(3.0, 0.0, 3.0), camera_lookat=(0.0, 0.0, 1.0), camera_fov=40, @@ -50,20 +50,37 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie # add plane self.scene.add_entity(gs.morphs.Plane()) + # add target + if self.env_cfg["visualize_target"]: + self.target = self.scene.add_entity(morph=gs.morphs.Mesh( + file="meshes/sphere.obj", + scale=0.05, + fixed=True, + collision=False, + ), + surface=gs.surfaces.Rough( + diffuse_texture=gs.textures.ColorTexture( + color=(1.0, 0.5, 0.5), + ), + ), + ) + + # add camera + if self.env_cfg["visualize_camera"]: + self.cam = self.scene.add_camera( + res=(640, 480), + pos=(3.5, 0.0, 2.5), + lookat=(0, 0, 0.5), + fov=30, + GUI=True, + ) + # add drone self.base_init_pos = torch.tensor(self.env_cfg["base_init_pos"], device=self.device) self.base_init_quat = torch.tensor(self.env_cfg["base_init_quat"], device=self.device) self.inv_base_init_quat = inv_quat(self.base_init_quat) self.drone = self.scene.add_entity(gs.morphs.Drone(file="urdf/drones/cf2x.urdf")) - self.cam = self.scene.add_camera( - res=(640, 480), - pos=(3.5, 0.0, 2.5), - lookat=(0, 0, 0.5), - fov=30, - GUI=True, - ) - # build scene self.scene.build(n_envs=num_envs) @@ -96,15 +113,9 @@ def _resample_commands(self, envs_idx): self.commands[envs_idx, 0] = gs_rand_float(*self.command_cfg["pos_x_range"], (len(envs_idx),), self.device) self.commands[envs_idx, 1] = gs_rand_float(*self.command_cfg["pos_y_range"], (len(envs_idx),), self.device) self.commands[envs_idx, 2] = gs_rand_float(*self.command_cfg["pos_z_range"], (len(envs_idx),), self.device) + if self.target is not None: + self.target.set_pos(self.commands[envs_idx], zero_velocity=True, envs_idx=envs_idx) - # def _at_target(self, envs_idx): - # at_target = ( - # (torch.norm(self.rel_pos[envs_idx], dim=1) < self.env_cfg["at_target_threshold"]) - # .nonzero(as_tuple=False) - # .flatten() - # ) - # return envs_idx[at_target] - def _at_target(self): at_target = ( (torch.norm(self.rel_pos, dim=1) < self.env_cfg["at_target_threshold"]) @@ -139,12 +150,6 @@ def step(self, actions): self.base_ang_vel[:] = transform_by_quat(self.drone.get_ang(), inv_base_quat) # resample commands - # envs_idx = ( - # (self.episode_length_buf % int(self.env_cfg["resampling_time_s"] / self.dt) == 0) - # .nonzero(as_tuple=False) - # .flatten() - # ) - # envs_idx = self._at_target(envs_idx) envs_idx = self._at_target() self._resample_commands(envs_idx) diff --git a/examples/drone/hover_eval.py b/examples/drone/hover_eval.py index 79cd7a2e..9ef11e98 100644 --- a/examples/drone/hover_eval.py +++ b/examples/drone/hover_eval.py @@ -22,6 +22,13 @@ def main(): env_cfg, obs_cfg, reward_cfg, command_cfg, train_cfg = pickle.load(open(f"logs/{args.exp_name}/cfgs.pkl", "rb")) reward_cfg["reward_scales"] = {} + # visualize the target + env_cfg["visualize_target"] = True + # for video recording + env_cfg["visualize_camera"] = args.record + # set the max FPS for visualization + env_cfg["max_visualize_FPS"] = 60 + env = HoverEnv( num_envs=1, env_cfg=env_cfg, @@ -37,18 +44,20 @@ def main(): policy = runner.get_inference_policy(device="cuda:0") obs, _ = env.reset() - if args.record: - env.cam.start_recording() - max_sim_step = int(env_cfg["episode_length_s"]/0.01) # 0.01 is the simulation time step + max_sim_step = int(env_cfg["episode_length_s"]*env_cfg["max_visualize_FPS"]) with torch.no_grad(): - for i in range(max_sim_step): - actions = policy(obs) - obs, _, rews, dones, infos = env.step(actions) - if args.record: + if args.record: + env.cam.start_recording() + for _ in range(max_sim_step): + actions = policy(obs) + obs, _, rews, dones, infos = env.step(actions) env.cam.render() - if args.record: - env.cam.stop_recording(save_to_filename="video.mp4", fps=60) + env.cam.stop_recording(save_to_filename="video.mp4", fps=env_cfg["max_visualize_FPS"]) + else: + for _ in range(max_sim_step): + actions = policy(obs) + obs, _, rews, dones, infos = env.step(actions) if __name__ == "__main__": main() diff --git a/examples/drone/hover_train.py b/examples/drone/hover_train.py index dfb1f803..200d42d0 100644 --- a/examples/drone/hover_train.py +++ b/examples/drone/hover_train.py @@ -72,9 +72,12 @@ def get_cfgs(): "episode_length_s": 15.0, "at_target_threshold": 0.1, "resampling_time_s": 3.0, - # "action_scale": 0.25, "simulate_action_latency": True, "clip_actions": 1.0, + # visualization + "visualize_target": False, + "visualize_camera": False, + "max_visualize_FPS": 60, } obs_cfg = { "num_obs": 17, From 0f8d57f730cebc34d7f7c6803809d887c3e2af3a Mon Sep 17 00:00:00 2001 From: KafuuChikai <810658920qq@gmail.com> Date: Fri, 27 Dec 2024 20:25:25 +0800 Subject: [PATCH 5/6] update readme for drone-rl --- examples/drone/README.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/examples/drone/README.md b/examples/drone/README.md index 349a8d18..d48bdd4c 100644 --- a/examples/drone/README.md +++ b/examples/drone/README.md @@ -27,6 +27,37 @@ Run with: python fly.py -v -m ``` +### 3. Hover Environment (`hover_env.py`, `hover_train.py`, `hover_eval.py`) + +The hover environment (`hover_env.py`) is designed to train a drone to maintain a stable hover position by reaching randomly generated target points. The environment includes: + + - Initialization of the scene and entities (plane, drone and target). + - Reward functions to provide feedback to the agent based on its performance in reaching the target points. + - **Command resampling to generate new random target points** and environment reset functionalities to ensure continuous training. + +**Acknowledgement**: The reward design is inspired by [Champion-level drone racing using deep +reinforcement learning (Nature 2023)](https://www.nature.com/articles/s41586-023-06419-4.pdf) + +#### 3.1 Training + +Train the drone hovering policy using the `HoverEnv` environment. + +Run with: + +```bash +python hover_train.py -e drone-hovering -B 8192 --max_iterations 500 +``` + +#### 3.2 Evaluation + +Evaluate the trained drone hovering policy. + +Run with: + +```bash +python hover_eval.py -e drone-hovering --ckpt 500 --record +``` + ## Technical Details - The drone model used is the Crazyflie 2.X (`urdf/drones/cf2x.urdf`) From 613bfdc1225d2e63d0ed350b900a33938361e297 Mon Sep 17 00:00:00 2001 From: KafuuChikai <810658920qq@gmail.com> Date: Fri, 27 Dec 2024 20:34:01 +0800 Subject: [PATCH 6/6] add installation --- examples/drone/README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/examples/drone/README.md b/examples/drone/README.md index d48bdd4c..39eb37c1 100644 --- a/examples/drone/README.md +++ b/examples/drone/README.md @@ -38,6 +38,19 @@ The hover environment (`hover_env.py`) is designed to train a drone to maintain **Acknowledgement**: The reward design is inspired by [Champion-level drone racing using deep reinforcement learning (Nature 2023)](https://www.nature.com/articles/s41586-023-06419-4.pdf) +#### 3.0 Installation + +At this stage, we have defined the environments. Now, we use the PPO implementation from `rsl-rl` to train the policy. Follow these installation steps: + +```bash +# Install rsl_rl. +git clone https://github.com/leggedrobotics/rsl_rl +cd rsl_rl && git checkout v1.0.2 && pip install -e . + +# Install tensorboard. +pip install tensorboard +``` + #### 3.1 Training Train the drone hovering policy using the `HoverEnv` environment.