diff --git a/README.md b/README.md index 2dcd0e9d..a2983b8b 100644 --- a/README.md +++ b/README.md @@ -57,9 +57,9 @@ Denoising World Model Learning(DWL) presents an advanced sim-to-real framework t #### Examples ```bash -# Launching PPO Policy Training for 'v1' Across 8192 Environments +# Launching PPO Policy Training for 'v1' Across 4096 Environments # This command initiates the PPO algorithm-based training for the humanoid task. -python scripts/train.py --task=humanoid_ppo --run_name v1 --headless --num_envs 8192 +python scripts/train.py --task=humanoid_ppo --run_name v1 --headless --num_envs 4096 # Evaluating the Trained PPO Policy 'v1' # This command loads the 'v1' policy for performance assessment in its environment. diff --git a/humanoid/envs/base/legged_robot.py b/humanoid/envs/base/legged_robot.py index 046825d2..287fb3c2 100644 --- a/humanoid/envs/base/legged_robot.py +++ b/humanoid/envs/base/legged_robot.py @@ -258,21 +258,15 @@ def _process_rigid_shape_props(self, props, env_id): if env_id==0: # prepare friction randomization friction_range = self.cfg.domain_rand.friction_range - restitution_range = self.cfg.domain_rand.restitution_range num_buckets = 256 bucket_ids = torch.randint(0, num_buckets, (self.num_envs, 1)) friction_buckets = torch_rand_float(friction_range[0], friction_range[1], (num_buckets,1), device='cpu') - restitution_buckets = torch_rand_float(restitution_range[0], restitution_range[1], (num_buckets,1), device='cpu') self.friction_coeffs = friction_buckets[bucket_ids] - self.restitution_coeffs = restitution_buckets[bucket_ids] for s in range(len(props)): props[s].friction = self.friction_coeffs[env_id] - props[s].restitution = self.restitution_coeffs[env_id] - - self.env_frictions[env_id] = self.friction_coeffs[env_id] - return props + def _process_dof_props(self, props, env_id): """ Callback allowing to store/change/randomize the DOF properties of each environment. @@ -354,12 +348,9 @@ def _compute_torques(self, actions): """ # pd controller actions_scaled = actions * self.cfg.control.action_scale - # control_type = self.cfg.control.control_type p_gains = self.p_gains d_gains = self.d_gains - torques = p_gains * \ - (actions_scaled + self.default_dof_pos - - self.dof_pos) - d_gains * self.dof_vel + torques = p_gains * (actions_scaled + self.default_dof_pos - self.dof_pos) - d_gains * self.dof_vel return torch.clip(torques, -self.torque_limits, self.torque_limits) @@ -389,12 +380,12 @@ def _reset_root_states(self, env_ids): if self.custom_origins: self.root_states[env_ids] = self.base_init_state self.root_states[env_ids, :3] += self.env_origins[env_ids] - self.root_states[env_ids, :2] += torch_rand_float(-4., 4., (len(env_ids), 2), device=self.device) # xy position within 1m of the center + self.root_states[env_ids, :2] += torch_rand_float(-1., 1., (len(env_ids), 2), device=self.device) # xy position within 1m of the center else: self.root_states[env_ids] = self.base_init_state self.root_states[env_ids, :3] += self.env_origins[env_ids] # base velocities - # self.root_states[env_ids, 7:13] = torch_rand_float(-0.01, 0.01, (len(env_ids), 6), device=self.device) # [7:10]: lin vel, [10:13]: ang vel + # self.root_states[env_ids, 7:13] = torch_rand_float(-0.05, 0.05, (len(env_ids), 6), device=self.device) # [7:10]: lin vel, [10:13]: ang vel if self.cfg.asset.fix_base_link: self.root_states[env_ids, 7:13] = 0 self.root_states[env_ids, 2] += 1.8 @@ -403,15 +394,6 @@ def _reset_root_states(self, env_ids): gymtorch.unwrap_tensor(self.root_states), gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) - # def _push_robots(self): - # """ Random pushes the robots. Emulates an impulse by setting a randomized base velocity. - # """ - # max_vel = self.cfg.domain_rand.max_push_vel_xy - # max_push_angular = self.cfg.domain_rand.max_push_ang_vel - # self.root_states[:, 7:9] = torch_rand_float(-max_vel, max_vel, (self.num_envs, 2), device=self.device) # lin vel x/y - # self.root_states[:, 10:13] = torch_rand_float(-max_push_angular, max_push_angular, (self.num_envs, 3), device=self.device) # ang vel - # self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states)) - def _update_terrain_curriculum(self, env_ids): """ Implements the game-inspired curriculum. @@ -514,8 +496,7 @@ def _init_buffers(self): if not found: self.p_gains[:, i] = 0. self.d_gains[:, i] = 0. - if self.cfg.control.control_type in ["P", "V"]: - print(f"PD gain of joint {name} were not defined, setting them to zero") + print(f"PD gain of joint {name} were not defined, setting them to zero") self.rand_push_force = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device) diff --git a/humanoid/envs/base/legged_robot_config.py b/humanoid/envs/base/legged_robot_config.py index 5455bafd..a9862165 100644 --- a/humanoid/envs/base/legged_robot_config.py +++ b/humanoid/envs/base/legged_robot_config.py @@ -88,7 +88,6 @@ class init_state: "joint_b": 0.} class control: - control_type = 'P' # P: position, V: velocity, T: torques # PD Drive parameters: stiffness = {'joint_a': 10.0, 'joint_b': 15.} # [N*m/rad] damping = {'joint_a': 1.0, 'joint_b': 1.5} # [N*m*s/rad] @@ -119,28 +118,15 @@ class asset: armature = 0. thickness = 0.01 + class domain_rand: randomize_friction = True friction_range = [0.5, 1.25] - randomize_base_mass = True + randomize_base_mass = False added_mass_range = [-1., 1.] push_robots = True - push_interval_s = 4 + push_interval_s = 15 max_push_vel_xy = 1. - max_push_ang_vel = 3. - continuous_push = True - max_push_force = 1 - max_push_torque = 1 - push_force_noise = 1 - push_torque_noise = 1 - randomize_gains = True - p_gain_range = [0.7, 1.3] - d_gain_range = [0.7, 1.3] - randomize_torques = True - torque_range = [0.8, 1.2] - - randomize_link_mass = True - link_mass_range = [-1.0, 1.0] class rewards: diff --git a/humanoid/envs/custom/humanoid_config.py b/humanoid/envs/custom/humanoid_config.py index dc3ab70f..79794e0f 100644 --- a/humanoid/envs/custom/humanoid_config.py +++ b/humanoid/envs/custom/humanoid_config.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: BSD-3-Clause -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # @@ -30,6 +30,7 @@ from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO + class XBotLCfg(LeggedRobotCfg): """ Configuration class for the XBotL humanoid robot. @@ -44,7 +45,7 @@ class env(LeggedRobotCfg.env): num_privileged_obs = int(c_frame_stack * single_num_privileged_obs) num_actions = 12 num_envs = 4096 - episode_length_s = 24 # episode length in seconds + episode_length_s = 24 # episode length in seconds use_ref_actions = False class safety: @@ -53,7 +54,6 @@ class safety: vel_limit = 1.0 torque_limit = 0.85 - class asset(LeggedRobotCfg.asset): file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/urdf/XBot-L.urdf' @@ -90,14 +90,13 @@ class noise: noise_level = 0.6 # scales other values class noise_scales: - dof_pos = 0.05 - dof_vel = 0.5 - ang_vel = 0.1 - lin_vel = 0.05 + dof_pos = 0.05 + dof_vel = 0.5 + ang_vel = 0.1 + lin_vel = 0.05 quat = 0.03 height_measurements = 0.1 - class init_state(LeggedRobotCfg.init_state): pos = [0.0, 0.0, 0.95] @@ -118,11 +117,9 @@ class init_state(LeggedRobotCfg.init_state): class control(LeggedRobotCfg.control): # PD Drive parameters: - control_type = 'P' - - stiffness = {'leg_roll': 200.0, 'leg_pitch': 350.0,'leg_yaw': 200.0, + stiffness = {'leg_roll': 200.0, 'leg_pitch': 350.0, 'leg_yaw': 200.0, 'knee': 350.0, 'ankle': 15} - damping = {'leg_roll': 10, 'leg_pitch': 10,'leg_yaw': + damping = {'leg_roll': 10, 'leg_pitch': 10, 'leg_yaw': 10, 'knee': 10, 'ankle': 10} # action scale: target angle = actionScale * action + defaultAngle @@ -142,22 +139,23 @@ class physx(LeggedRobotCfg.sim.physx): num_velocity_iterations = 0 contact_offset = 0.01 # [m] rest_offset = 0.0 # [m] - bounce_threshold_velocity = 0.1 # 0.5 #0.5 [m/s] + bounce_threshold_velocity = 0.1 # [m/s] max_depenetration_velocity = 1.0 max_gpu_contact_pairs = 2**23 # 2**24 -> needed for 8000 envs and more default_buffer_size_multiplier = 5 # 0: never, 1: last sub-step, 2: all sub-steps (default=2) contact_collection = 2 - class domain_rand(LeggedRobotCfg.domain_rand): + class domain_rand: + randomize_friction = True friction_range = [0.1, 2.0] randomize_base_mass = True added_mass_range = [-5., 5.] - restitution_range = [0.0, 0.1] push_robots = True push_interval_s = 4 max_push_vel_xy = 0.2 max_push_ang_vel = 0.4 + dynamic_randomization = 0.02 class commands(LeggedRobotCfg.commands): # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error) @@ -166,25 +164,25 @@ class commands(LeggedRobotCfg.commands): heading_command = True # if true: compute ang vel command from heading error class ranges: - lin_vel_x = [-0.3, 0.6] # min max [m/s] + lin_vel_x = [-0.3, 0.6] # min max [m/s] lin_vel_y = [-0.3, 0.3] # min max [m/s] ang_vel_yaw = [-0.3, 0.3] # min max [rad/s] heading = [-3.14, 3.14] class rewards: - base_height_target = 0.89 + base_height_target = 0.89 min_dist = 0.2 - max_dist = 0.5 + max_dist = 0.5 # put some settings here for LLM parameter tuning target_joint_pos_scale = 0.17 # rad - target_feet_height = 0.05 # m + target_feet_height = 0.06 # m cycle_time = 0.64 # sec # if true negative total rewards are clipped at zero (avoids early termination problems) only_positive_rewards = True # tracking reward = exp(error*sigma) - tracking_sigma = 5 - max_contact_force = 450 # forces above this value are penalized - + tracking_sigma = 5 + max_contact_force = 700 # forces above this value are penalized + class scales: # reference motion tracking joint_pos = 1.6 @@ -195,7 +193,7 @@ class scales: foot_slip = -0.05 feet_distance = 0.2 knee_distance = 0.2 - # contact + # contact feet_contact_forces = -0.01 # vel tracking tracking_lin_vel = 1.2 @@ -215,7 +213,6 @@ class scales: dof_acc = -1e-7 collision = -1. - class normalization: class obs_scales: lin_vel = 2. diff --git a/humanoid/envs/custom/humanoid_env.py b/humanoid/envs/custom/humanoid_env.py index 5982531c..42ec1566 100644 --- a/humanoid/envs/custom/humanoid_env.py +++ b/humanoid/envs/custom/humanoid_env.py @@ -81,14 +81,6 @@ def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, self.reset_idx(torch.tensor(range(self.num_envs), device=self.device)) self.compute_observations() - - def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless): - super().__init__(cfg, sim_params, physics_engine, sim_device, headless) - self.last_feet_z = 0.05 - self.feet_height = torch.zeros((self.num_envs, 2), device=self.device) - self.reset_idx(torch.tensor(range(self.num_envs), device=self.device)) - self.compute_observations() - def _push_robots(self): """ Random pushes the robots. Emulates an impulse by setting a randomized base velocity. """ @@ -121,6 +113,7 @@ def _get_gait_phase(self): stance_mask[:, 0] = sin_pos >= 0 # right foot stance stance_mask[:, 1] = sin_pos < 0 + # Double support phase stance_mask[torch.abs(sin_pos) < 0.1] = 1 return stance_mask @@ -134,18 +127,18 @@ def compute_ref_state(self): self.ref_dof_pos = torch.zeros_like(self.dof_pos) scale_1 = self.cfg.rewards.target_joint_pos_scale scale_2 = 2 * scale_1 - # left swing + # left foot stance phase set to default joint pos sin_pos_l[sin_pos_l > 0] = 0 self.ref_dof_pos[:, 2] = sin_pos_l * scale_1 self.ref_dof_pos[:, 3] = sin_pos_l * scale_2 self.ref_dof_pos[:, 4] = sin_pos_l * scale_1 - # right + # right foot stance phase set to default joint pos sin_pos_r[sin_pos_r < 0] = 0 self.ref_dof_pos[:, 8] = sin_pos_r * scale_1 self.ref_dof_pos[:, 9] = sin_pos_r * scale_2 self.ref_dof_pos[:, 10] = sin_pos_r * scale_1 - - self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0. + # Double support phase + self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0 self.ref_action = 2 * self.ref_dof_pos @@ -197,8 +190,10 @@ def _get_noise_scale_vec(self, cfg): def step(self, actions): if self.cfg.env.use_ref_actions: actions += self.ref_action + # dynamic randomization delay = torch.rand((self.num_envs, 1), device=self.device) actions = (1 - delay) * actions + delay * self.actions + actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions return super().step(actions) @@ -465,8 +460,8 @@ def _reward_feet_clearance(self): # Compute swing mask swing_mask = 1 - self._get_gait_phase() - # feet height should larger than target feet height at the peak - rew_pos = (self.feet_height > self.cfg.rewards.target_feet_height) + # feet height should be closed to target feet height at the peak + rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.01 rew_pos = torch.sum(rew_pos * swing_mask, dim=1) self.feet_height *= ~contact return rew_pos @@ -542,4 +537,4 @@ def _reward_action_smoothness(self): term_2 = torch.sum(torch.square( self.actions + self.last_last_actions - 2 * self.last_actions), dim=1) term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1) - return term_1 + term_2 + term_3 \ No newline at end of file + return term_1 + term_2 + term_3 diff --git a/humanoid/scripts/sim2sim.py b/humanoid/scripts/sim2sim.py index ade31186..2454d480 100644 --- a/humanoid/scripts/sim2sim.py +++ b/humanoid/scripts/sim2sim.py @@ -118,6 +118,7 @@ def run_mujoco(policy, cfg): q = q[-cfg.env.num_actions:] dq = dq[-cfg.env.num_actions:] + # 1000hz -> 100hz if count_lowlevel % cfg.sim_config.decimation == 0: obs = np.zeros([1, cfg.env.num_single_obs], dtype=np.float32) @@ -148,18 +149,18 @@ def run_mujoco(policy, cfg): target_q = action * cfg.control.action_scale - count_lowlevel += 1 target_dq = np.zeros((cfg.env.num_actions), dtype=np.double) # Generate PD control tau = pd_control(target_q, q, cfg.robot_config.kps, target_dq, dq, cfg.robot_config.kds) # Calc torques tau = np.clip(tau, -cfg.robot_config.tau_limit, cfg.robot_config.tau_limit) # Clamp torques - data.ctrl = tau mujoco.mj_step(model, data) viewer.render() + count_lowlevel += 1 + viewer.close() @@ -180,8 +181,8 @@ class sim_config: else: mujoco_model_path = f'{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/mjcf/XBot-L.xml' sim_duration = 60.0 - dt = 0.002 - decimation = 5 + dt = 0.001 + decimation = 10 class robot_config: kps = np.array([200, 200, 350, 350, 15, 15, 200, 200, 350, 350, 15, 15], dtype=np.double) diff --git a/resources/robots/XBot/mjcf/XBot-L-terrain.xml b/resources/robots/XBot/mjcf/XBot-L-terrain.xml index b80bf2c1..107c09b4 100644 --- a/resources/robots/XBot/mjcf/XBot-L-terrain.xml +++ b/resources/robots/XBot/mjcf/XBot-L-terrain.xml @@ -1,6 +1,6 @@ - diff --git a/resources/robots/XBot/mjcf/XBot-L.xml b/resources/robots/XBot/mjcf/XBot-L.xml index b69b88f0..070c9d1b 100644 --- a/resources/robots/XBot/mjcf/XBot-L.xml +++ b/resources/robots/XBot/mjcf/XBot-L.xml @@ -1,6 +1,6 @@ -