Skip to content

Commit

Permalink
sim stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
codekansas committed Apr 1, 2024
1 parent 1e9f2fb commit d3cb69a
Show file tree
Hide file tree
Showing 6 changed files with 369 additions and 359 deletions.
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@ all:
@echo "$$HELP_MESSAGE"
.PHONY: all

# ------------------------ #
# Train #
# ------------------------ #

train-one:
@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1

train:
@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1024 --headless

# ------------------------ #
# Build #
# ------------------------ #
Expand Down
6 changes: 3 additions & 3 deletions sim/humanoid_gym/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
quickly.
"""

from .humanoid_config import XBotLCfg, XBotLCfgPPO
from .humanoid_env import XBotLFreeEnv
from .humanoid_config import StompyCfg, StompyPPO
from .humanoid_env import StompyFreeEnv


def register_tasks() -> None:
Expand All @@ -19,7 +19,7 @@ def register_tasks() -> None:
"""
from humanoid.utils.task_registry import task_registry

task_registry.register("humanoid_ppo", XBotLFreeEnv, XBotLCfg(), XBotLCfgPPO())
task_registry.register("humanoid_ppo", StompyFreeEnv, StompyCfg(), StompyPPO())


register_tasks()
231 changes: 93 additions & 138 deletions sim/humanoid_gym/envs/humanoid_config.py
Original file line number Diff line number Diff line change
@@ -1,149 +1,103 @@
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.

"""Defines the environment configuration for the Stompy humanoid robot."""

from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO

from sim.env import stompy_urdf_path
from sim.stompy.joints import Stompy

NUM_JOINTS = len(Stompy.all_joints())

class XBotLCfg(LeggedRobotCfg):
"""Configuration class for the XBotL humanoid robot."""

class env(LeggedRobotCfg.env):
# change the observation dim
frame_stack = 15
c_frame_stack = 3
num_single_obs = 47
class StompyCfg(LeggedRobotCfg):
"""Configuration class for the Stompy humanoid robot."""

class env(LeggedRobotCfg.env): # noqa: N801
# Change the observation dim
frame_stack = 15 # Number of frames in an observation
c_frame_stack = 3 # Number of frames in a critic observation
num_single_obs = 47 # Size of a single observation (for the actor policy)
num_observations = int(frame_stack * num_single_obs)
single_num_privileged_obs = 73
single_num_privileged_obs = 73 # Size of a single privileged observation (for the critic)
num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)
num_actions = 12
num_actions = NUM_JOINTS # Torque command for each joint.
num_envs = 4096
episode_length_s = 24 # episode length in seconds
episode_length_s = 24 # Maximum episode length
use_ref_actions = False

class safety:
# safety factors
class safety: # noqa: N801
pos_limit = 1.0
vel_limit = 1.0
torque_limit = 0.85
torque_limit = 1.0

class asset(LeggedRobotCfg.asset): # noqa: N801
file = str(stompy_urdf_path())

class asset(LeggedRobotCfg.asset):
file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/urdf/XBot-L.urdf"
name = "stompy"
foot_name = "rubber" # Something like "..._rubber_1"
knee_name = "belt_knee" # Something like ..._belt_knee_left...""

name = "XBot-L"
foot_name = "ankle_roll"
knee_name = "knee"
# Terminate the episode upon contacting a delicate part.
terminate_after_contacts_on = ["head", "gripper"]

terminate_after_contacts_on = ["base_link"]
penalize_contacts_on = ["base_link"]
self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter
# Penalize any non-foot contacts.
penalize_contacts_on = ["shoulder", "battery"]

self_collisions = 0 # 1 to disable, 0 to enable
collapse_fixed_joints = True
flip_visual_attachments = False
replace_cylinder_with_capsule = False
fix_base_link = False

class terrain(LeggedRobotCfg.terrain):
mesh_type = "plane"
# mesh_type = 'trimesh'
class terrain(LeggedRobotCfg.terrain): # noqa: N801
mesh_type = "plane" # plane; trimesh
curriculum = False
# rough terrain only:
# For rough terrain only:
measure_heights = False
static_friction = 0.6
dynamic_friction = 0.6
terrain_length = 8.0
terrain_width = 8.0
num_rows = 20 # number of terrain rows (levels)
num_cols = 20 # number of terrain cols (types)
max_init_terrain_level = 10 # starting curriculum state
num_rows = 20 # Number of terrain rows (levels)
num_cols = 20 # Number of terrain cols (types)
max_init_terrain_level = 10 # Starting curriculum state
# plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down
terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0]
restitution = 0.0

class noise:
class noise: # noqa: N801
add_noise = True
noise_level = 0.6 # scales other values
noise_level = 0.6 # Scales other values

class noise_scales:
class noise_scales: # noqa: N801
dof_pos = 0.05
dof_vel = 0.5
ang_vel = 0.1
lin_vel = 0.05
quat = 0.03
height_measurements = 0.1

class init_state(LeggedRobotCfg.init_state):
class init_state(LeggedRobotCfg.init_state): # noqa: N801
pos = [0.0, 0.0, 0.95]
default_joint_angles = {k: 0.0 for k in Stompy.all_joints()}

class sim(LeggedRobotCfg.sim): # noqa: N801
dt = 0.005
substeps = 1

default_joint_angles = { # = target angles [rad] when action = 0.0
"left_leg_yaw_joint": -0.0,
"left_leg_roll_joint": -0.0,
"left_leg_pitch_joint": 0.0,
"left_knee_joint": 0.0,
"left_ankle_pitch_joint": 0.0,
"left_ankle_roll_joint": 0.0,
"right_leg_yaw_joint": 0.0,
"right_leg_roll_joint": 0.0,
"right_leg_pitch_joint": -0.0,
"right_knee_joint": -0.0,
"right_ankle_pitch_joint": -0.0,
"right_ankle_roll_joint": 0.0,
}

class control(LeggedRobotCfg.control):
# PD Drive parameters:
stiffness = {"leg_roll": 200.0, "leg_pitch": 350.0, "leg_yaw": 200.0, "knee": 350.0, "ankle": 15}
damping = {"leg_roll": 10, "leg_pitch": 10, "leg_yaw": 10, "knee": 10, "ankle": 10}

# action scale: target angle = actionScale * action + defaultAngle
action_scale = 0.25
# decimation: Number of control action updates @ sim DT per policy DT
decimation = 10 # 100hz

class sim(LeggedRobotCfg.sim):
dt = 0.001 # 1000 Hz
substeps = 1 # 2
up_axis = 1 # 0 is y, 1 is z

class physx(LeggedRobotCfg.sim.physx):
class physx(LeggedRobotCfg.sim.physx): # noqa: N801
num_threads = 10
solver_type = 1 # 0: pgs, 1: tgs
solver_type = 1
num_position_iterations = 4
num_velocity_iterations = 0
contact_offset = 0.01 # [m]
rest_offset = 0.0 # [m]
bounce_threshold_velocity = 0.1 # [m/s]
max_depenetration_velocity = 1.0
max_gpu_contact_pairs = 2**23 # 2**24 -> needed for 8000 envs and more
contact_offset = 0.02
rest_offset = 0.01
bounce_threshold_velocity = 1.0
max_depenetration_velocity = 0.5
max_gpu_contact_pairs = 2**24
default_buffer_size_multiplier = 5
# 0: never, 1: last sub-step, 2: all sub-steps (default=2)
contact_collection = 2
contact_collection = 2 # gymapi.CC_ALL_SUBSTEPS

class domain_rand:
class domain_rand: # noqa: N801
randomize_friction = True
friction_range = [0.1, 2.0]
randomize_base_mass = True
Expand All @@ -154,64 +108,64 @@ class domain_rand:
max_push_ang_vel = 0.4
dynamic_randomization = 0.02

class commands(LeggedRobotCfg.commands):
# Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
num_commands = 4
resampling_time = 8.0 # time before command are changed[s]
heading_command = True # if true: compute ang vel command from heading error
class commands(LeggedRobotCfg.commands): # noqa: N801
num_commands = 8 # Command space is the joint angles for the shoulder and elbow.
resampling_time = 8.0

class ranges:
lin_vel_x = [-0.3, 0.6] # min max [m/s]
lin_vel_y = [-0.3, 0.3] # min max [m/s]
ang_vel_yaw = [-0.3, 0.3] # min max [rad/s]
heading = [-3.14, 3.14]

class rewards:
class rewards: # noqa: N801
base_height_target = 0.89
min_dist = 0.2
max_dist = 0.5
# put some settings here for LLM parameter tuning

# Put some settings here for LLM parameter tuning
target_joint_pos_scale = 0.17 # rad
target_feet_height = 0.06 # m
cycle_time = 0.64 # sec
# if true negative total rewards are clipped at zero (avoids early termination problems)

# If truem negative total rewards are clipped at zero.
only_positive_rewards = True
# tracking reward = exp(error*sigma)
tracking_sigma = 5
max_contact_force = 700 # forces above this value are penalized

class scales:
# reference motion tracking
# Max contact force should be a bit above the weight of the robot. In
# our case the robot weighs 62 Kg, so we set it to 700.
max_contact_force = 700

class scales: # noqa: N801
# Reference motion tracking
joint_pos = 1.6
feet_clearance = 1.0
feet_contact_number = 1.2
# gait

# Gait
feet_air_time = 1.0
foot_slip = -0.05
feet_distance = 0.2
knee_distance = 0.2
# contact

# Contact
feet_contact_forces = -0.01
# vel tracking

# Velocity tracking
tracking_lin_vel = 1.2
tracking_ang_vel = 1.1
vel_mismatch_exp = 0.5 # lin_z; ang x,y
low_speed = 0.2
track_vel_hard = 0.5
# base pos

# Base position
default_joint_pos = 0.5
orientation = 1.0
base_height = 0.2
base_acc = 0.2
# energy

# Energy
action_smoothness = -0.002
torques = -1e-5
dof_vel = -5e-4
dof_acc = -1e-7
collision = -1.0

class normalization:
class obs_scales:
class normalization: # noqa: N801
class obs_scales: # noqa: N801
lin_vel = 2.0
ang_vel = 1.0
dof_pos = 1.0
Expand All @@ -223,35 +177,36 @@ class obs_scales:
clip_actions = 18.0


class XBotLCfgPPO(LeggedRobotCfgPPO):
class StompyPPO(LeggedRobotCfgPPO):
seed = 5
runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner
runner_class_name = "OnPolicyRunner"

class policy:
class policy: # noqa: N801
init_noise_std = 1.0
actor_hidden_dims = [512, 256, 128]
critic_hidden_dims = [768, 256, 128]

class algorithm(LeggedRobotCfgPPO.algorithm):
class algorithm(LeggedRobotCfgPPO.algorithm): # noqa: N801
entropy_coef = 0.001
learning_rate = 1e-5
num_learning_epochs = 2
gamma = 0.994
lam = 0.9
num_mini_batches = 4

class runner:
class runner: # noqa: N801
policy_class_name = "ActorCritic"
algorithm_class_name = "PPO"
num_steps_per_env = 60 # per iteration
max_iterations = 3001 # number of policy updates
num_steps_per_env = 60
max_iterations = 3001

# logging
save_interval = 100 # check for potential saves every this many iterations
experiment_name = "XBot_ppo"
save_interval = 100
experiment_name = "stompy_ppo"
run_name = ""
# load and resume

# Load and resume
resume = False
load_run = -1 # -1 = last run
checkpoint = -1 # -1 = last saved model
resume_path = None # updated from load_run and chkpt
resume_path = None # Updated from load_run and checkpoint.
Loading

0 comments on commit d3cb69a

Please sign in to comment.