sim stuff

kscalelabs · Apr 1, 2024 · d3cb69a · d3cb69a
1 parent 1e9f2fb
commit d3cb69a
Show file tree

Hide file tree

Showing 6 changed files with 369 additions and 359 deletions.
diff --git a/Makefile b/Makefile
@@ -22,6 +22,16 @@ all:
 	@echo "$$HELP_MESSAGE"
 .PHONY: all
 
+# ------------------------ #
+#          Train           #
+# ------------------------ #
+
+train-one:
+	@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1
+
+train:
+	@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1024 --headless
+
 # ------------------------ #
 #          Build           #
 # ------------------------ #

diff --git a/sim/humanoid_gym/envs/__init__.py b/sim/humanoid_gym/envs/__init__.py
@@ -7,8 +7,8 @@
 quickly.
 """
 
-from .humanoid_config import XBotLCfg, XBotLCfgPPO
-from .humanoid_env import XBotLFreeEnv
+from .humanoid_config import StompyCfg, StompyPPO
+from .humanoid_env import StompyFreeEnv
 
 
 def register_tasks() -> None:
@@ -19,7 +19,7 @@ def register_tasks() -> None:
     """
     from humanoid.utils.task_registry import task_registry
 
-    task_registry.register("humanoid_ppo", XBotLFreeEnv, XBotLCfg(), XBotLCfgPPO())
+    task_registry.register("humanoid_ppo", StompyFreeEnv, StompyCfg(), StompyPPO())
 
 
 register_tasks()
diff --git a/sim/humanoid_gym/envs/humanoid_config.py b/sim/humanoid_gym/envs/humanoid_config.py
@@ -1,149 +1,103 @@
-# SPDX-License-Identifier: BSD-3-Clause
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-# list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
-
+"""Defines the environment configuration for the Stompy humanoid robot."""
 
 from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
 
+from sim.env import stompy_urdf_path
+from sim.stompy.joints import Stompy
+
+NUM_JOINTS = len(Stompy.all_joints())
 
-class XBotLCfg(LeggedRobotCfg):
-    """Configuration class for the XBotL humanoid robot."""
 
-    class env(LeggedRobotCfg.env):
-        # change the observation dim
-        frame_stack = 15
-        c_frame_stack = 3
-        num_single_obs = 47
+class StompyCfg(LeggedRobotCfg):
+    """Configuration class for the Stompy humanoid robot."""
+
+    class env(LeggedRobotCfg.env):  # noqa: N801
+        # Change the observation dim
+        frame_stack = 15  # Number of frames in an observation
+        c_frame_stack = 3  # Number of frames in a critic observation
+        num_single_obs = 47  # Size of a single observation (for the actor policy)
         num_observations = int(frame_stack * num_single_obs)
-        single_num_privileged_obs = 73
+        single_num_privileged_obs = 73  # Size of a single privileged observation (for the critic)
         num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)
-        num_actions = 12
+        num_actions = NUM_JOINTS  # Torque command for each joint.
         num_envs = 4096
-        episode_length_s = 24  # episode length in seconds
+        episode_length_s = 24  # Maximum episode length
         use_ref_actions = False
 
-    class safety:
-        # safety factors
+    class safety:  # noqa: N801
         pos_limit = 1.0
         vel_limit = 1.0
-        torque_limit = 0.85
+        torque_limit = 1.0
+
+    class asset(LeggedRobotCfg.asset):  # noqa: N801
+        file = str(stompy_urdf_path())
 
-    class asset(LeggedRobotCfg.asset):
-        file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/urdf/XBot-L.urdf"
+        name = "stompy"
+        foot_name = "rubber"  # Something like "..._rubber_1"
+        knee_name = "belt_knee"  # Something like ..._belt_knee_left...""
 
-        name = "XBot-L"
-        foot_name = "ankle_roll"
-        knee_name = "knee"
+        # Terminate the episode upon contacting a delicate part.
+        terminate_after_contacts_on = ["head", "gripper"]
 
-        terminate_after_contacts_on = ["base_link"]
-        penalize_contacts_on = ["base_link"]
-        self_collisions = 0  # 1 to disable, 0 to enable...bitwise filter
+        # Penalize any non-foot contacts.
+        penalize_contacts_on = ["shoulder", "battery"]
+
+        self_collisions = 0  # 1 to disable, 0 to enable
+        collapse_fixed_joints = True
         flip_visual_attachments = False
         replace_cylinder_with_capsule = False
         fix_base_link = False
 
-    class terrain(LeggedRobotCfg.terrain):
-        mesh_type = "plane"
-        # mesh_type = 'trimesh'
+    class terrain(LeggedRobotCfg.terrain):  # noqa: N801
+        mesh_type = "plane"  # plane; trimesh
         curriculum = False
-        # rough terrain only:
+        # For rough terrain only:
         measure_heights = False
         static_friction = 0.6
         dynamic_friction = 0.6
         terrain_length = 8.0
         terrain_width = 8.0
-        num_rows = 20  # number of terrain rows (levels)
-        num_cols = 20  # number of terrain cols (types)
-        max_init_terrain_level = 10  # starting curriculum state
+        num_rows = 20  # Number of terrain rows (levels)
+        num_cols = 20  # Number of terrain cols (types)
+        max_init_terrain_level = 10  # Starting curriculum state
         # plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down
         terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0]
         restitution = 0.0
 
-    class noise:
+    class noise:  # noqa: N801
         add_noise = True
-        noise_level = 0.6  # scales other values
+        noise_level = 0.6  # Scales other values
 
-        class noise_scales:
+        class noise_scales:  # noqa: N801
             dof_pos = 0.05
             dof_vel = 0.5
             ang_vel = 0.1
             lin_vel = 0.05
             quat = 0.03
             height_measurements = 0.1
 
-    class init_state(LeggedRobotCfg.init_state):
+    class init_state(LeggedRobotCfg.init_state):  # noqa: N801
         pos = [0.0, 0.0, 0.95]
+        default_joint_angles = {k: 0.0 for k in Stompy.all_joints()}
+
+    class sim(LeggedRobotCfg.sim):  # noqa: N801
+        dt = 0.005
+        substeps = 1
 
-        default_joint_angles = {  # = target angles [rad] when action = 0.0
-            "left_leg_yaw_joint": -0.0,
-            "left_leg_roll_joint": -0.0,
-            "left_leg_pitch_joint": 0.0,
-            "left_knee_joint": 0.0,
-            "left_ankle_pitch_joint": 0.0,
-            "left_ankle_roll_joint": 0.0,
-            "right_leg_yaw_joint": 0.0,
-            "right_leg_roll_joint": 0.0,
-            "right_leg_pitch_joint": -0.0,
-            "right_knee_joint": -0.0,
-            "right_ankle_pitch_joint": -0.0,
-            "right_ankle_roll_joint": 0.0,
-        }
-
-    class control(LeggedRobotCfg.control):
-        # PD Drive parameters:
-        stiffness = {"leg_roll": 200.0, "leg_pitch": 350.0, "leg_yaw": 200.0, "knee": 350.0, "ankle": 15}
-        damping = {"leg_roll": 10, "leg_pitch": 10, "leg_yaw": 10, "knee": 10, "ankle": 10}
-
-        # action scale: target angle = actionScale * action + defaultAngle
-        action_scale = 0.25
-        # decimation: Number of control action updates @ sim DT per policy DT
-        decimation = 10  # 100hz
-
-    class sim(LeggedRobotCfg.sim):
-        dt = 0.001  # 1000 Hz
-        substeps = 1  # 2
-        up_axis = 1  # 0 is y, 1 is z
-
-        class physx(LeggedRobotCfg.sim.physx):
+        class physx(LeggedRobotCfg.sim.physx):  # noqa: N801
             num_threads = 10
-            solver_type = 1  # 0: pgs, 1: tgs
+            solver_type = 1
             num_position_iterations = 4
             num_velocity_iterations = 0
-            contact_offset = 0.01  # [m]
-            rest_offset = 0.0  # [m]
-            bounce_threshold_velocity = 0.1  # [m/s]
-            max_depenetration_velocity = 1.0
-            max_gpu_contact_pairs = 2**23  # 2**24 -> needed for 8000 envs and more
+            contact_offset = 0.02
+            rest_offset = 0.01
+            bounce_threshold_velocity = 1.0
+            max_depenetration_velocity = 0.5
+            max_gpu_contact_pairs = 2**24
             default_buffer_size_multiplier = 5
-            # 0: never, 1: last sub-step, 2: all sub-steps (default=2)
-            contact_collection = 2
+            contact_collection = 2  # gymapi.CC_ALL_SUBSTEPS
 
-    class domain_rand:
+    class domain_rand:  # noqa: N801
         randomize_friction = True
         friction_range = [0.1, 2.0]
         randomize_base_mass = True
@@ -154,64 +108,64 @@ class domain_rand:
         max_push_ang_vel = 0.4
         dynamic_randomization = 0.02
 
-    class commands(LeggedRobotCfg.commands):
-        # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
-        num_commands = 4
-        resampling_time = 8.0  # time before command are changed[s]
-        heading_command = True  # if true: compute ang vel command from heading error
+    class commands(LeggedRobotCfg.commands):  # noqa: N801
+        num_commands = 8  # Command space is the joint angles for the shoulder and elbow.
+        resampling_time = 8.0
 
-        class ranges:
-            lin_vel_x = [-0.3, 0.6]  # min max [m/s]
-            lin_vel_y = [-0.3, 0.3]  # min max [m/s]
-            ang_vel_yaw = [-0.3, 0.3]  # min max [rad/s]
-            heading = [-3.14, 3.14]
-
-    class rewards:
+    class rewards:  # noqa: N801
         base_height_target = 0.89
         min_dist = 0.2
         max_dist = 0.5
-        # put some settings here for LLM parameter tuning
+
+        # Put some settings here for LLM parameter tuning
         target_joint_pos_scale = 0.17  # rad
         target_feet_height = 0.06  # m
         cycle_time = 0.64  # sec
-        # if true negative total rewards are clipped at zero (avoids early termination problems)
+
+        # If truem negative total rewards are clipped at zero.
         only_positive_rewards = True
-        # tracking reward = exp(error*sigma)
-        tracking_sigma = 5
-        max_contact_force = 700  # forces above this value are penalized
 
-        class scales:
-            # reference motion tracking
+        # Max contact force should be a bit above the weight of the robot. In
+        # our case the robot weighs 62 Kg, so we set it to 700.
+        max_contact_force = 700
+
+        class scales:  # noqa: N801
+            # Reference motion tracking
             joint_pos = 1.6
             feet_clearance = 1.0
             feet_contact_number = 1.2
-            # gait
+
+            # Gait
             feet_air_time = 1.0
             foot_slip = -0.05
             feet_distance = 0.2
             knee_distance = 0.2
-            # contact
+
+            # Contact
             feet_contact_forces = -0.01
-            # vel tracking
+
+            # Velocity tracking
             tracking_lin_vel = 1.2
             tracking_ang_vel = 1.1
             vel_mismatch_exp = 0.5  # lin_z; ang x,y
             low_speed = 0.2
             track_vel_hard = 0.5
-            # base pos
+
+            # Base position
             default_joint_pos = 0.5
             orientation = 1.0
             base_height = 0.2
             base_acc = 0.2
-            # energy
+
+            # Energy
             action_smoothness = -0.002
             torques = -1e-5
             dof_vel = -5e-4
             dof_acc = -1e-7
             collision = -1.0
 
-    class normalization:
-        class obs_scales:
+    class normalization:  # noqa: N801
+        class obs_scales:  # noqa: N801
             lin_vel = 2.0
             ang_vel = 1.0
             dof_pos = 1.0
@@ -223,35 +177,36 @@ class obs_scales:
         clip_actions = 18.0
 
 
-class XBotLCfgPPO(LeggedRobotCfgPPO):
+class StompyPPO(LeggedRobotCfgPPO):
     seed = 5
-    runner_class_name = "OnPolicyRunner"  # DWLOnPolicyRunner
+    runner_class_name = "OnPolicyRunner"
 
-    class policy:
+    class policy:  # noqa: N801
         init_noise_std = 1.0
         actor_hidden_dims = [512, 256, 128]
         critic_hidden_dims = [768, 256, 128]
 
-    class algorithm(LeggedRobotCfgPPO.algorithm):
+    class algorithm(LeggedRobotCfgPPO.algorithm):  # noqa: N801
         entropy_coef = 0.001
         learning_rate = 1e-5
         num_learning_epochs = 2
         gamma = 0.994
         lam = 0.9
         num_mini_batches = 4
 
-    class runner:
+    class runner:  # noqa: N801
         policy_class_name = "ActorCritic"
         algorithm_class_name = "PPO"
-        num_steps_per_env = 60  # per iteration
-        max_iterations = 3001  # number of policy updates
+        num_steps_per_env = 60
+        max_iterations = 3001
 
         # logging
-        save_interval = 100  # check for potential saves every this many iterations
-        experiment_name = "XBot_ppo"
+        save_interval = 100
+        experiment_name = "stompy_ppo"
         run_name = ""
-        # load and resume
+
+        # Load and resume
         resume = False
         load_run = -1  # -1 = last run
         checkpoint = -1  # -1 = last saved model
-        resume_path = None  # updated from load_run and chkpt
+        resume_path = None  # Updated from load_run and checkpoint.