Merge pull request #3 from roboterax/dev

Enhance Env Randomization, Optimize Performance
kscalelabs · Mar 13, 2024 · bf7128a · bf7128a
2 parents f565d73 + 807b3e1
commit bf7128a
Show file tree

Hide file tree

Showing 8 changed files with 48 additions and 88 deletions.
diff --git a/README.md b/README.md
@@ -57,9 +57,9 @@ Denoising World Model Learning(DWL) presents an advanced sim-to-real framework t
 #### Examples
 
 ```bash
-# Launching PPO Policy Training for 'v1' Across 8192 Environments
+# Launching PPO Policy Training for 'v1' Across 4096 Environments
 # This command initiates the PPO algorithm-based training for the humanoid task.
-python scripts/train.py --task=humanoid_ppo --run_name v1 --headless --num_envs 8192
+python scripts/train.py --task=humanoid_ppo --run_name v1 --headless --num_envs 4096
 
 # Evaluating the Trained PPO Policy 'v1'
 # This command loads the 'v1' policy for performance assessment in its environment. 

diff --git a/humanoid/envs/base/legged_robot.py b/humanoid/envs/base/legged_robot.py
@@ -258,21 +258,15 @@ def _process_rigid_shape_props(self, props, env_id):
             if env_id==0:
                 # prepare friction randomization
                 friction_range = self.cfg.domain_rand.friction_range
-                restitution_range = self.cfg.domain_rand.restitution_range
                 num_buckets = 256
                 bucket_ids = torch.randint(0, num_buckets, (self.num_envs, 1))
                 friction_buckets = torch_rand_float(friction_range[0], friction_range[1], (num_buckets,1), device='cpu')
-                restitution_buckets = torch_rand_float(restitution_range[0], restitution_range[1], (num_buckets,1), device='cpu')
                 self.friction_coeffs = friction_buckets[bucket_ids]
-                self.restitution_coeffs = restitution_buckets[bucket_ids]
 
             for s in range(len(props)):
                 props[s].friction = self.friction_coeffs[env_id]
-                props[s].restitution = self.restitution_coeffs[env_id]
-
-            self.env_frictions[env_id] = self.friction_coeffs[env_id]
-
         return props
+
 
     def _process_dof_props(self, props, env_id):
         """ Callback allowing to store/change/randomize the DOF properties of each environment.
@@ -354,12 +348,9 @@ def _compute_torques(self, actions):
         """
         # pd controller
         actions_scaled = actions * self.cfg.control.action_scale
-        # control_type = self.cfg.control.control_type
         p_gains = self.p_gains
         d_gains = self.d_gains
-        torques = p_gains * \
-            (actions_scaled + self.default_dof_pos -
-             self.dof_pos) - d_gains * self.dof_vel
+        torques = p_gains * (actions_scaled + self.default_dof_pos - self.dof_pos) - d_gains * self.dof_vel
         return torch.clip(torques, -self.torque_limits, self.torque_limits)
 
 
@@ -389,12 +380,12 @@ def _reset_root_states(self, env_ids):
         if self.custom_origins:
             self.root_states[env_ids] = self.base_init_state
             self.root_states[env_ids, :3] += self.env_origins[env_ids]
-            self.root_states[env_ids, :2] += torch_rand_float(-4., 4., (len(env_ids), 2), device=self.device) # xy position within 1m of the center
+            self.root_states[env_ids, :2] += torch_rand_float(-1., 1., (len(env_ids), 2), device=self.device) # xy position within 1m of the center
         else:
             self.root_states[env_ids] = self.base_init_state
             self.root_states[env_ids, :3] += self.env_origins[env_ids]
         # base velocities
-        # self.root_states[env_ids, 7:13] = torch_rand_float(-0.01, 0.01, (len(env_ids), 6), device=self.device) # [7:10]: lin vel, [10:13]: ang vel
+        # self.root_states[env_ids, 7:13] = torch_rand_float(-0.05, 0.05, (len(env_ids), 6), device=self.device) # [7:10]: lin vel, [10:13]: ang vel
         if self.cfg.asset.fix_base_link:
             self.root_states[env_ids, 7:13] = 0
             self.root_states[env_ids, 2] += 1.8
@@ -403,15 +394,6 @@ def _reset_root_states(self, env_ids):
                                                      gymtorch.unwrap_tensor(self.root_states),
                                                      gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
 
-    # def _push_robots(self):
-    #     """ Random pushes the robots. Emulates an impulse by setting a randomized base velocity. 
-    #     """
-    #     max_vel = self.cfg.domain_rand.max_push_vel_xy
-    #     max_push_angular = self.cfg.domain_rand.max_push_ang_vel
-    #     self.root_states[:, 7:9] = torch_rand_float(-max_vel, max_vel, (self.num_envs, 2), device=self.device) # lin vel x/y
-    #     self.root_states[:, 10:13] = torch_rand_float(-max_push_angular, max_push_angular, (self.num_envs, 3), device=self.device) # ang vel
-    #     self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
-
 
     def _update_terrain_curriculum(self, env_ids):
         """ Implements the game-inspired curriculum.
@@ -514,8 +496,7 @@ def _init_buffers(self):
             if not found:
                 self.p_gains[:, i] = 0.
                 self.d_gains[:, i] = 0.
-                if self.cfg.control.control_type in ["P", "V"]:
-                    print(f"PD gain of joint {name} were not defined, setting them to zero")
+                print(f"PD gain of joint {name} were not defined, setting them to zero")
 
 
         self.rand_push_force = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device)

diff --git a/humanoid/envs/base/legged_robot_config.py b/humanoid/envs/base/legged_robot_config.py
@@ -88,7 +88,6 @@ class init_state:
             "joint_b": 0.}
 
     class control:
-        control_type = 'P' # P: position, V: velocity, T: torques
         # PD Drive parameters:
         stiffness = {'joint_a': 10.0, 'joint_b': 15.}  # [N*m/rad]
         damping = {'joint_a': 1.0, 'joint_b': 1.5}     # [N*m*s/rad]
@@ -119,28 +118,15 @@ class asset:
         armature = 0.
         thickness = 0.01
 
+
     class domain_rand:
         randomize_friction = True
         friction_range = [0.5, 1.25]
-        randomize_base_mass = True
+        randomize_base_mass = False
         added_mass_range = [-1., 1.]
         push_robots = True
-        push_interval_s = 4
+        push_interval_s = 15
         max_push_vel_xy = 1.
-        max_push_ang_vel = 3.
-        continuous_push = True
-        max_push_force = 1
-        max_push_torque = 1
-        push_force_noise = 1
-        push_torque_noise = 1
-        randomize_gains = True
-        p_gain_range = [0.7, 1.3]
-        d_gain_range = [0.7, 1.3]
-        randomize_torques = True
-        torque_range = [0.8, 1.2]
-
-        randomize_link_mass = True
-        link_mass_range = [-1.0, 1.0]
 
 
     class rewards:

diff --git a/humanoid/envs/custom/humanoid_config.py b/humanoid/envs/custom/humanoid_config.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: BSD-3-Clause
-# 
+#
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 #
@@ -30,6 +30,7 @@
 
 from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
 
+
 class XBotLCfg(LeggedRobotCfg):
     """
     Configuration class for the XBotL humanoid robot.
@@ -44,7 +45,7 @@ class env(LeggedRobotCfg.env):
         num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)
         num_actions = 12
         num_envs = 4096
-        episode_length_s = 24 # episode length in seconds
+        episode_length_s = 24  # episode length in seconds
         use_ref_actions = False
 
     class safety:
@@ -53,7 +54,6 @@ class safety:
         vel_limit = 1.0
         torque_limit = 0.85
 
-
     class asset(LeggedRobotCfg.asset):
         file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/urdf/XBot-L.urdf'
 
@@ -90,14 +90,13 @@ class noise:
         noise_level = 0.6    # scales other values
 
         class noise_scales:
-            dof_pos = 0.05  
-            dof_vel = 0.5 
-            ang_vel = 0.1   
-            lin_vel = 0.05   
+            dof_pos = 0.05
+            dof_vel = 0.5
+            ang_vel = 0.1
+            lin_vel = 0.05
             quat = 0.03
             height_measurements = 0.1
 
-
     class init_state(LeggedRobotCfg.init_state):
         pos = [0.0, 0.0, 0.95]
 
@@ -118,11 +117,9 @@ class init_state(LeggedRobotCfg.init_state):
 
     class control(LeggedRobotCfg.control):
         # PD Drive parameters:
-        control_type = 'P'
-
-        stiffness = {'leg_roll': 200.0, 'leg_pitch': 350.0,'leg_yaw': 200.0,
+        stiffness = {'leg_roll': 200.0, 'leg_pitch': 350.0, 'leg_yaw': 200.0,
                      'knee': 350.0, 'ankle': 15}
-        damping = {'leg_roll': 10, 'leg_pitch': 10,'leg_yaw': 
+        damping = {'leg_roll': 10, 'leg_pitch': 10, 'leg_yaw':
                    10, 'knee': 10, 'ankle': 10}
 
         # action scale: target angle = actionScale * action + defaultAngle
@@ -142,22 +139,23 @@ class physx(LeggedRobotCfg.sim.physx):
             num_velocity_iterations = 0
             contact_offset = 0.01  # [m]
             rest_offset = 0.0   # [m]
-            bounce_threshold_velocity = 0.1  # 0.5 #0.5 [m/s]
+            bounce_threshold_velocity = 0.1  # [m/s]
             max_depenetration_velocity = 1.0
             max_gpu_contact_pairs = 2**23  # 2**24 -> needed for 8000 envs and more
             default_buffer_size_multiplier = 5
             # 0: never, 1: last sub-step, 2: all sub-steps (default=2)
             contact_collection = 2
 
-    class domain_rand(LeggedRobotCfg.domain_rand):
+    class domain_rand:
+        randomize_friction = True
         friction_range = [0.1, 2.0]
         randomize_base_mass = True
         added_mass_range = [-5., 5.]
-        restitution_range = [0.0, 0.1]
         push_robots = True
         push_interval_s = 4
         max_push_vel_xy = 0.2
         max_push_ang_vel = 0.4
+        dynamic_randomization = 0.02
 
     class commands(LeggedRobotCfg.commands):
         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -166,25 +164,25 @@ class commands(LeggedRobotCfg.commands):
         heading_command = True  # if true: compute ang vel command from heading error
 
         class ranges:
-            lin_vel_x = [-0.3, 0.6] # min max [m/s] 
+            lin_vel_x = [-0.3, 0.6]  # min max [m/s]
             lin_vel_y = [-0.3, 0.3]   # min max [m/s]
             ang_vel_yaw = [-0.3, 0.3]    # min max [rad/s]
             heading = [-3.14, 3.14]
 
     class rewards:
-        base_height_target = 0.89   
+        base_height_target = 0.89
         min_dist = 0.2
-        max_dist = 0.5 
+        max_dist = 0.5
         # put some settings here for LLM parameter tuning
         target_joint_pos_scale = 0.17    # rad
-        target_feet_height = 0.05       # m
+        target_feet_height = 0.06       # m
         cycle_time = 0.64                # sec
         # if true negative total rewards are clipped at zero (avoids early termination problems)
         only_positive_rewards = True
         # tracking reward = exp(error*sigma)
-        tracking_sigma = 5 
-        max_contact_force = 450  # forces above this value are penalized
-        
+        tracking_sigma = 5
+        max_contact_force = 700  # forces above this value are penalized
+
         class scales:
             # reference motion tracking
             joint_pos = 1.6
@@ -195,7 +193,7 @@ class scales:
             foot_slip = -0.05
             feet_distance = 0.2
             knee_distance = 0.2
-            # contact 
+            # contact
             feet_contact_forces = -0.01
             # vel tracking
             tracking_lin_vel = 1.2
@@ -215,7 +213,6 @@ class scales:
             dof_acc = -1e-7
             collision = -1.
 
-
     class normalization:
         class obs_scales:
             lin_vel = 2.

diff --git a/humanoid/envs/custom/humanoid_env.py b/humanoid/envs/custom/humanoid_env.py
@@ -81,14 +81,6 @@ def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device,
         self.reset_idx(torch.tensor(range(self.num_envs), device=self.device))
         self.compute_observations()
 
-
-    def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless):
-        super().__init__(cfg, sim_params, physics_engine, sim_device, headless)
-        self.last_feet_z = 0.05
-        self.feet_height = torch.zeros((self.num_envs, 2), device=self.device)
-        self.reset_idx(torch.tensor(range(self.num_envs), device=self.device))
-        self.compute_observations()
-
     def _push_robots(self):
         """ Random pushes the robots. Emulates an impulse by setting a randomized base velocity. 
         """
@@ -121,6 +113,7 @@ def _get_gait_phase(self):
         stance_mask[:, 0] = sin_pos >= 0
         # right foot stance
         stance_mask[:, 1] = sin_pos < 0
+        # Double support phase
         stance_mask[torch.abs(sin_pos) < 0.1] = 1
 
         return stance_mask
@@ -134,18 +127,18 @@ def compute_ref_state(self):
         self.ref_dof_pos = torch.zeros_like(self.dof_pos)
         scale_1 = self.cfg.rewards.target_joint_pos_scale
         scale_2 = 2 * scale_1
-        # left swing
+        # left foot stance phase set to default joint pos
         sin_pos_l[sin_pos_l > 0] = 0
         self.ref_dof_pos[:, 2] = sin_pos_l * scale_1
         self.ref_dof_pos[:, 3] = sin_pos_l * scale_2
         self.ref_dof_pos[:, 4] = sin_pos_l * scale_1
-        # right
+        # right foot stance phase set to default joint pos
         sin_pos_r[sin_pos_r < 0] = 0
         self.ref_dof_pos[:, 8] = sin_pos_r * scale_1
         self.ref_dof_pos[:, 9] = sin_pos_r * scale_2
         self.ref_dof_pos[:, 10] = sin_pos_r * scale_1
-
-        self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0.
+        # Double support phase
+        self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0
 
         self.ref_action = 2 * self.ref_dof_pos
 
@@ -197,8 +190,10 @@ def _get_noise_scale_vec(self, cfg):
     def step(self, actions):
         if self.cfg.env.use_ref_actions:
             actions += self.ref_action
+        # dynamic randomization
         delay = torch.rand((self.num_envs, 1), device=self.device)
         actions = (1 - delay) * actions + delay * self.actions
+        actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
         return super().step(actions)
 
 
@@ -465,8 +460,8 @@ def _reward_feet_clearance(self):
         # Compute swing mask
         swing_mask = 1 - self._get_gait_phase()
 
-        # feet height should larger than target feet height at the peak
-        rew_pos = (self.feet_height > self.cfg.rewards.target_feet_height)
+        # feet height should be closed to target feet height at the peak
+        rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.01
         rew_pos = torch.sum(rew_pos * swing_mask, dim=1)
         self.feet_height *= ~contact
         return rew_pos
@@ -542,4 +537,4 @@ def _reward_action_smoothness(self):
         term_2 = torch.sum(torch.square(
             self.actions + self.last_last_actions - 2 * self.last_actions), dim=1)
         term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1)
-        return term_1 + term_2 + term_3
+        return term_1 + term_2 + term_3
diff --git a/humanoid/scripts/sim2sim.py b/humanoid/scripts/sim2sim.py
@@ -118,6 +118,7 @@ def run_mujoco(policy, cfg):
         q = q[-cfg.env.num_actions:]
         dq = dq[-cfg.env.num_actions:]
 
+        # 1000hz -> 100hz
         if count_lowlevel % cfg.sim_config.decimation == 0:
 
             obs = np.zeros([1, cfg.env.num_single_obs], dtype=np.float32)
@@ -148,18 +149,18 @@ def run_mujoco(policy, cfg):
 
             target_q = action * cfg.control.action_scale
 
-        count_lowlevel += 1
 
         target_dq = np.zeros((cfg.env.num_actions), dtype=np.double)
         # Generate PD control
         tau = pd_control(target_q, q, cfg.robot_config.kps,
                         target_dq, dq, cfg.robot_config.kds)  # Calc torques
         tau = np.clip(tau, -cfg.robot_config.tau_limit, cfg.robot_config.tau_limit)  # Clamp torques
-
         data.ctrl = tau
 
         mujoco.mj_step(model, data)
         viewer.render()
+        count_lowlevel += 1
+
     viewer.close()
 
 
@@ -180,8 +181,8 @@ class sim_config:
             else:
                 mujoco_model_path = f'{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/mjcf/XBot-L.xml'
             sim_duration = 60.0
-            dt = 0.002
-            decimation = 5
+            dt = 0.001
+            decimation = 10
 
         class robot_config:
             kps = np.array([200, 200, 350, 350, 15, 15, 200, 200, 350, 350, 15, 15], dtype=np.double)

diff --git a/resources/robots/XBot/mjcf/XBot-L-terrain.xml b/resources/robots/XBot/mjcf/XBot-L-terrain.xml
@@ -1,6 +1,6 @@
 <mujoco model="XBot-L">
   <compiler angle="radian" meshdir="../meshes/" eulerseq="zyx"/>
-    <option timestep='0.002' iterations='50' solver='PGS' gravity='0 0 -9.81'>
+    <option timestep='0.001' iterations='50' solver='PGS' gravity='0 0 -9.81'>
     <flag sensornoise="enable" frictionloss="enable"/>
     </option>
     <size njmax="500" nconmax="100" />