Skip to content

Commit

Permalink
Merge pull request #3 from roboterax/dev
Browse files Browse the repository at this point in the history
Enhance Env Randomization, Optimize Performance
  • Loading branch information
zlw21gxy authored Mar 13, 2024
2 parents f565d73 + 807b3e1 commit bf7128a
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 88 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ Denoising World Model Learning(DWL) presents an advanced sim-to-real framework t
#### Examples

```bash
# Launching PPO Policy Training for 'v1' Across 8192 Environments
# Launching PPO Policy Training for 'v1' Across 4096 Environments
# This command initiates the PPO algorithm-based training for the humanoid task.
python scripts/train.py --task=humanoid_ppo --run_name v1 --headless --num_envs 8192
python scripts/train.py --task=humanoid_ppo --run_name v1 --headless --num_envs 4096

# Evaluating the Trained PPO Policy 'v1'
# This command loads the 'v1' policy for performance assessment in its environment.
Expand Down
29 changes: 5 additions & 24 deletions humanoid/envs/base/legged_robot.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,21 +258,15 @@ def _process_rigid_shape_props(self, props, env_id):
if env_id==0:
# prepare friction randomization
friction_range = self.cfg.domain_rand.friction_range
restitution_range = self.cfg.domain_rand.restitution_range
num_buckets = 256
bucket_ids = torch.randint(0, num_buckets, (self.num_envs, 1))
friction_buckets = torch_rand_float(friction_range[0], friction_range[1], (num_buckets,1), device='cpu')
restitution_buckets = torch_rand_float(restitution_range[0], restitution_range[1], (num_buckets,1), device='cpu')
self.friction_coeffs = friction_buckets[bucket_ids]
self.restitution_coeffs = restitution_buckets[bucket_ids]

for s in range(len(props)):
props[s].friction = self.friction_coeffs[env_id]
props[s].restitution = self.restitution_coeffs[env_id]

self.env_frictions[env_id] = self.friction_coeffs[env_id]

return props


def _process_dof_props(self, props, env_id):
""" Callback allowing to store/change/randomize the DOF properties of each environment.
Expand Down Expand Up @@ -354,12 +348,9 @@ def _compute_torques(self, actions):
"""
# pd controller
actions_scaled = actions * self.cfg.control.action_scale
# control_type = self.cfg.control.control_type
p_gains = self.p_gains
d_gains = self.d_gains
torques = p_gains * \
(actions_scaled + self.default_dof_pos -
self.dof_pos) - d_gains * self.dof_vel
torques = p_gains * (actions_scaled + self.default_dof_pos - self.dof_pos) - d_gains * self.dof_vel
return torch.clip(torques, -self.torque_limits, self.torque_limits)


Expand Down Expand Up @@ -389,12 +380,12 @@ def _reset_root_states(self, env_ids):
if self.custom_origins:
self.root_states[env_ids] = self.base_init_state
self.root_states[env_ids, :3] += self.env_origins[env_ids]
self.root_states[env_ids, :2] += torch_rand_float(-4., 4., (len(env_ids), 2), device=self.device) # xy position within 1m of the center
self.root_states[env_ids, :2] += torch_rand_float(-1., 1., (len(env_ids), 2), device=self.device) # xy position within 1m of the center
else:
self.root_states[env_ids] = self.base_init_state
self.root_states[env_ids, :3] += self.env_origins[env_ids]
# base velocities
# self.root_states[env_ids, 7:13] = torch_rand_float(-0.01, 0.01, (len(env_ids), 6), device=self.device) # [7:10]: lin vel, [10:13]: ang vel
# self.root_states[env_ids, 7:13] = torch_rand_float(-0.05, 0.05, (len(env_ids), 6), device=self.device) # [7:10]: lin vel, [10:13]: ang vel
if self.cfg.asset.fix_base_link:
self.root_states[env_ids, 7:13] = 0
self.root_states[env_ids, 2] += 1.8
Expand All @@ -403,15 +394,6 @@ def _reset_root_states(self, env_ids):
gymtorch.unwrap_tensor(self.root_states),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))

# def _push_robots(self):
# """ Random pushes the robots. Emulates an impulse by setting a randomized base velocity.
# """
# max_vel = self.cfg.domain_rand.max_push_vel_xy
# max_push_angular = self.cfg.domain_rand.max_push_ang_vel
# self.root_states[:, 7:9] = torch_rand_float(-max_vel, max_vel, (self.num_envs, 2), device=self.device) # lin vel x/y
# self.root_states[:, 10:13] = torch_rand_float(-max_push_angular, max_push_angular, (self.num_envs, 3), device=self.device) # ang vel
# self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))


def _update_terrain_curriculum(self, env_ids):
""" Implements the game-inspired curriculum.
Expand Down Expand Up @@ -514,8 +496,7 @@ def _init_buffers(self):
if not found:
self.p_gains[:, i] = 0.
self.d_gains[:, i] = 0.
if self.cfg.control.control_type in ["P", "V"]:
print(f"PD gain of joint {name} were not defined, setting them to zero")
print(f"PD gain of joint {name} were not defined, setting them to zero")


self.rand_push_force = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device)
Expand Down
20 changes: 3 additions & 17 deletions humanoid/envs/base/legged_robot_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ class init_state:
"joint_b": 0.}

class control:
control_type = 'P' # P: position, V: velocity, T: torques
# PD Drive parameters:
stiffness = {'joint_a': 10.0, 'joint_b': 15.} # [N*m/rad]
damping = {'joint_a': 1.0, 'joint_b': 1.5} # [N*m*s/rad]
Expand Down Expand Up @@ -119,28 +118,15 @@ class asset:
armature = 0.
thickness = 0.01


class domain_rand:
randomize_friction = True
friction_range = [0.5, 1.25]
randomize_base_mass = True
randomize_base_mass = False
added_mass_range = [-1., 1.]
push_robots = True
push_interval_s = 4
push_interval_s = 15
max_push_vel_xy = 1.
max_push_ang_vel = 3.
continuous_push = True
max_push_force = 1
max_push_torque = 1
push_force_noise = 1
push_torque_noise = 1
randomize_gains = True
p_gain_range = [0.7, 1.3]
d_gain_range = [0.7, 1.3]
randomize_torques = True
torque_range = [0.8, 1.2]

randomize_link_mass = True
link_mass_range = [-1.0, 1.0]


class rewards:
Expand Down
45 changes: 21 additions & 24 deletions humanoid/envs/custom/humanoid_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: BSD-3-Clause
#
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
Expand Down Expand Up @@ -30,6 +30,7 @@

from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO


class XBotLCfg(LeggedRobotCfg):
"""
Configuration class for the XBotL humanoid robot.
Expand All @@ -44,7 +45,7 @@ class env(LeggedRobotCfg.env):
num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)
num_actions = 12
num_envs = 4096
episode_length_s = 24 # episode length in seconds
episode_length_s = 24 # episode length in seconds
use_ref_actions = False

class safety:
Expand All @@ -53,7 +54,6 @@ class safety:
vel_limit = 1.0
torque_limit = 0.85


class asset(LeggedRobotCfg.asset):
file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/urdf/XBot-L.urdf'

Expand Down Expand Up @@ -90,14 +90,13 @@ class noise:
noise_level = 0.6 # scales other values

class noise_scales:
dof_pos = 0.05
dof_vel = 0.5
ang_vel = 0.1
lin_vel = 0.05
dof_pos = 0.05
dof_vel = 0.5
ang_vel = 0.1
lin_vel = 0.05
quat = 0.03
height_measurements = 0.1


class init_state(LeggedRobotCfg.init_state):
pos = [0.0, 0.0, 0.95]

Expand All @@ -118,11 +117,9 @@ class init_state(LeggedRobotCfg.init_state):

class control(LeggedRobotCfg.control):
# PD Drive parameters:
control_type = 'P'

stiffness = {'leg_roll': 200.0, 'leg_pitch': 350.0,'leg_yaw': 200.0,
stiffness = {'leg_roll': 200.0, 'leg_pitch': 350.0, 'leg_yaw': 200.0,
'knee': 350.0, 'ankle': 15}
damping = {'leg_roll': 10, 'leg_pitch': 10,'leg_yaw':
damping = {'leg_roll': 10, 'leg_pitch': 10, 'leg_yaw':
10, 'knee': 10, 'ankle': 10}

# action scale: target angle = actionScale * action + defaultAngle
Expand All @@ -142,22 +139,23 @@ class physx(LeggedRobotCfg.sim.physx):
num_velocity_iterations = 0
contact_offset = 0.01 # [m]
rest_offset = 0.0 # [m]
bounce_threshold_velocity = 0.1 # 0.5 #0.5 [m/s]
bounce_threshold_velocity = 0.1 # [m/s]
max_depenetration_velocity = 1.0
max_gpu_contact_pairs = 2**23 # 2**24 -> needed for 8000 envs and more
default_buffer_size_multiplier = 5
# 0: never, 1: last sub-step, 2: all sub-steps (default=2)
contact_collection = 2

class domain_rand(LeggedRobotCfg.domain_rand):
class domain_rand:
randomize_friction = True
friction_range = [0.1, 2.0]
randomize_base_mass = True
added_mass_range = [-5., 5.]
restitution_range = [0.0, 0.1]
push_robots = True
push_interval_s = 4
max_push_vel_xy = 0.2
max_push_ang_vel = 0.4
dynamic_randomization = 0.02

class commands(LeggedRobotCfg.commands):
# Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
Expand All @@ -166,25 +164,25 @@ class commands(LeggedRobotCfg.commands):
heading_command = True # if true: compute ang vel command from heading error

class ranges:
lin_vel_x = [-0.3, 0.6] # min max [m/s]
lin_vel_x = [-0.3, 0.6] # min max [m/s]
lin_vel_y = [-0.3, 0.3] # min max [m/s]
ang_vel_yaw = [-0.3, 0.3] # min max [rad/s]
heading = [-3.14, 3.14]

class rewards:
base_height_target = 0.89
base_height_target = 0.89
min_dist = 0.2
max_dist = 0.5
max_dist = 0.5
# put some settings here for LLM parameter tuning
target_joint_pos_scale = 0.17 # rad
target_feet_height = 0.05 # m
target_feet_height = 0.06 # m
cycle_time = 0.64 # sec
# if true negative total rewards are clipped at zero (avoids early termination problems)
only_positive_rewards = True
# tracking reward = exp(error*sigma)
tracking_sigma = 5
max_contact_force = 450 # forces above this value are penalized
tracking_sigma = 5
max_contact_force = 700 # forces above this value are penalized

class scales:
# reference motion tracking
joint_pos = 1.6
Expand All @@ -195,7 +193,7 @@ class scales:
foot_slip = -0.05
feet_distance = 0.2
knee_distance = 0.2
# contact
# contact
feet_contact_forces = -0.01
# vel tracking
tracking_lin_vel = 1.2
Expand All @@ -215,7 +213,6 @@ class scales:
dof_acc = -1e-7
collision = -1.


class normalization:
class obs_scales:
lin_vel = 2.
Expand Down
25 changes: 10 additions & 15 deletions humanoid/envs/custom/humanoid_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,6 @@ def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device,
self.reset_idx(torch.tensor(range(self.num_envs), device=self.device))
self.compute_observations()


def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless):
super().__init__(cfg, sim_params, physics_engine, sim_device, headless)
self.last_feet_z = 0.05
self.feet_height = torch.zeros((self.num_envs, 2), device=self.device)
self.reset_idx(torch.tensor(range(self.num_envs), device=self.device))
self.compute_observations()

def _push_robots(self):
""" Random pushes the robots. Emulates an impulse by setting a randomized base velocity.
"""
Expand Down Expand Up @@ -121,6 +113,7 @@ def _get_gait_phase(self):
stance_mask[:, 0] = sin_pos >= 0
# right foot stance
stance_mask[:, 1] = sin_pos < 0
# Double support phase
stance_mask[torch.abs(sin_pos) < 0.1] = 1

return stance_mask
Expand All @@ -134,18 +127,18 @@ def compute_ref_state(self):
self.ref_dof_pos = torch.zeros_like(self.dof_pos)
scale_1 = self.cfg.rewards.target_joint_pos_scale
scale_2 = 2 * scale_1
# left swing
# left foot stance phase set to default joint pos
sin_pos_l[sin_pos_l > 0] = 0
self.ref_dof_pos[:, 2] = sin_pos_l * scale_1
self.ref_dof_pos[:, 3] = sin_pos_l * scale_2
self.ref_dof_pos[:, 4] = sin_pos_l * scale_1
# right
# right foot stance phase set to default joint pos
sin_pos_r[sin_pos_r < 0] = 0
self.ref_dof_pos[:, 8] = sin_pos_r * scale_1
self.ref_dof_pos[:, 9] = sin_pos_r * scale_2
self.ref_dof_pos[:, 10] = sin_pos_r * scale_1

self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0.
# Double support phase
self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0

self.ref_action = 2 * self.ref_dof_pos

Expand Down Expand Up @@ -197,8 +190,10 @@ def _get_noise_scale_vec(self, cfg):
def step(self, actions):
if self.cfg.env.use_ref_actions:
actions += self.ref_action
# dynamic randomization
delay = torch.rand((self.num_envs, 1), device=self.device)
actions = (1 - delay) * actions + delay * self.actions
actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
return super().step(actions)


Expand Down Expand Up @@ -465,8 +460,8 @@ def _reward_feet_clearance(self):
# Compute swing mask
swing_mask = 1 - self._get_gait_phase()

# feet height should larger than target feet height at the peak
rew_pos = (self.feet_height > self.cfg.rewards.target_feet_height)
# feet height should be closed to target feet height at the peak
rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.01
rew_pos = torch.sum(rew_pos * swing_mask, dim=1)
self.feet_height *= ~contact
return rew_pos
Expand Down Expand Up @@ -542,4 +537,4 @@ def _reward_action_smoothness(self):
term_2 = torch.sum(torch.square(
self.actions + self.last_last_actions - 2 * self.last_actions), dim=1)
term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1)
return term_1 + term_2 + term_3
return term_1 + term_2 + term_3
9 changes: 5 additions & 4 deletions humanoid/scripts/sim2sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def run_mujoco(policy, cfg):
q = q[-cfg.env.num_actions:]
dq = dq[-cfg.env.num_actions:]

# 1000hz -> 100hz
if count_lowlevel % cfg.sim_config.decimation == 0:

obs = np.zeros([1, cfg.env.num_single_obs], dtype=np.float32)
Expand Down Expand Up @@ -148,18 +149,18 @@ def run_mujoco(policy, cfg):

target_q = action * cfg.control.action_scale

count_lowlevel += 1

target_dq = np.zeros((cfg.env.num_actions), dtype=np.double)
# Generate PD control
tau = pd_control(target_q, q, cfg.robot_config.kps,
target_dq, dq, cfg.robot_config.kds) # Calc torques
tau = np.clip(tau, -cfg.robot_config.tau_limit, cfg.robot_config.tau_limit) # Clamp torques

data.ctrl = tau

mujoco.mj_step(model, data)
viewer.render()
count_lowlevel += 1

viewer.close()


Expand All @@ -180,8 +181,8 @@ class sim_config:
else:
mujoco_model_path = f'{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/mjcf/XBot-L.xml'
sim_duration = 60.0
dt = 0.002
decimation = 5
dt = 0.001
decimation = 10

class robot_config:
kps = np.array([200, 200, 350, 350, 15, 15, 200, 200, 350, 350, 15, 15], dtype=np.double)
Expand Down
2 changes: 1 addition & 1 deletion resources/robots/XBot/mjcf/XBot-L-terrain.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<mujoco model="XBot-L">
<compiler angle="radian" meshdir="../meshes/" eulerseq="zyx"/>
<option timestep='0.002' iterations='50' solver='PGS' gravity='0 0 -9.81'>
<option timestep='0.001' iterations='50' solver='PGS' gravity='0 0 -9.81'>
<flag sensornoise="enable" frictionloss="enable"/>
</option>
<size njmax="500" nconmax="100" />
Expand Down
Loading

0 comments on commit bf7128a

Please sign in to comment.