kscalelabs · budzianowski · Dec 5, 2024 · Dec 3, 2024 · Dec 3, 2024 · Dec 3, 2024
diff --git a/examples/kinfer_policy.onnx b/examples/kinfer_policy.onnx
diff --git a/examples/standing_micro.onnx b/examples/standing_micro.onnx
diff --git a/examples/standing_micro.pt b/examples/standing_micro.pt
diff --git a/examples/standing_pro.onnx b/examples/standing_pro.onnx
diff --git a/examples/standing_pro.pt b/examples/standing_pro.pt
diff --git a/examples/walking_micro.onnx b/examples/walking_micro.onnx
diff --git a/examples/walking_micro.pt b/examples/walking_micro.pt
diff --git a/examples/test_policy.pt → examples/walking_policy.pt b/examples/test_policy.pt → examples/walking_policy.pt
diff --git a/examples/walking_pro.onnx b/examples/walking_pro.onnx
diff --git a/examples/walking_pro.pt b/examples/walking_pro.pt
diff --git a/examples/original_walking.pt → policy.pt b/examples/original_walking.pt → policy.pt
diff --git a/policy_1.pt b/policy_1.pt
diff --git a/sim/algo/ppo/on_policy_runner.py b/sim/algo/ppo/on_policy_runner.py
@@ -266,6 +266,7 @@ def save(self, path: str, infos: Optional[dict] = None) -> None:
 
     def load(self, path: str, load_optimizer: bool = True) -> dict:
         loaded_dict = torch.load(path)
+        self.load_path = path
         self.alg.actor_critic.load_state_dict(loaded_dict["model_state_dict"])
         if load_optimizer:
             self.alg.optimizer.load_state_dict(loaded_dict["optimizer_state_dict"])

diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py
@@ -16,22 +16,19 @@
 from sim.envs.humanoids.h1_env import H1FreeEnv
 from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO
 from sim.envs.humanoids.stompymicro_env import StompyMicroEnv
-from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO
-from sim.envs.humanoids.stompymini_env import MiniFreeEnv
-from sim.envs.humanoids.stompypro_config import (
-    StompyProCfg,
-    StompyProCfgPPO,
-    StompyProStandingCfg,
+from sim.envs.humanoids.grp_config import (
+    GprCfg,
+    GprCfgPPO,
+    GprStandingCfg,
 )
-from sim.envs.humanoids.stompypro_env import StompyProFreeEnv
+from sim.envs.humanoids.gpr_env import GprFreeEnv
 from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
 from sim.envs.humanoids.xbot_env import XBotLFreeEnv
 from sim.utils.task_registry import TaskRegistry  # noqa: E402
 
 task_registry = TaskRegistry()
-task_registry.register("stompymini", MiniFreeEnv, MiniCfg(), MiniCfgPPO())
-task_registry.register("stompypro", StompyProFreeEnv, StompyProCfg(), StompyProCfgPPO())
-task_registry.register("stompypro_standing", StompyProFreeEnv, StompyProStandingCfg(), StompyProCfgPPO())
+task_registry.register("gpr", GprFreeEnv, GprCfg(), GprCfgPPO())
+task_registry.register("gpr_standing", GprFreeEnv, GprStandingCfg(), GprCfgPPO())
 task_registry.register("dora", DoraFreeEnv, DoraCfg(), DoraCfgPPO())
 task_registry.register("h1", H1FreeEnv, H1Cfg(), H1CfgPPO())
 task_registry.register("g1", G1FreeEnv, G1Cfg(), G1CfgPPO())

diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py
@@ -567,6 +567,7 @@ def _init_buffers(self):
         self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
         for i in range(self.num_dofs):
             name = self.dof_names[i]
+            print(name)
             self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name]
             found = False
 
@@ -579,6 +580,7 @@ def _init_buffers(self):
                 self.p_gains[:, i] = 0.0
                 self.d_gains[:, i] = 0.0
                 print(f"PD gain of joint {name} were not defined, setting them to zero")
+
         self.rand_push_force = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device)
         self.rand_push_torque = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device)
         self.default_dof_pos = self.default_dof_pos.unsqueeze(0)

diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py
@@ -157,7 +157,6 @@ class domain_rand:
         push_robots = True
         push_interval_s = 15
         max_push_vel_xy = 1.0
-        start_pos_noise = 0.1
 
     class rewards:
         class scales:

diff --git a/sim/envs/humanoids/stompypro_env.py → sim/envs/humanoids/gpr_env.py b/sim/envs/humanoids/stompypro_env.py → sim/envs/humanoids/gpr_env.py
@@ -4,16 +4,16 @@
 from isaacgym.torch_utils import *  # isort:skip
 
 from sim.envs.base.legged_robot import LeggedRobot
-from sim.resources.stompypro.joints import Robot
+from sim.resources.gpr.joints import Robot
 from sim.utils.terrain import HumanoidTerrain
 
 from isaacgym import gymtorch  # isort:skip
 
 import torch  # isort:skip
 
 
-class StompyProFreeEnv(LeggedRobot):
-    """StompyProFreeEnv is a class that represents a custom environment for a legged robot.
+class GprFreeEnv(LeggedRobot):
+    """GprFreeEnv is a class that represents a custom environment for a legged robot.
 
     Args:
         cfg (LeggedRobotCfg): Configuration object for the legged robot.
@@ -126,13 +126,13 @@ def compute_ref_state(self):
         sin_pos_l[sin_pos_l > 0] = 0
         self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += sin_pos_l * scale_1
         self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += sin_pos_l * scale_2
-        self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += 1 * sin_pos_l * scale_1
+        self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1
 
         # right foot stance phase set to default joint pos
         sin_pos_r[sin_pos_r < 0] = 0
         self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += sin_pos_r * scale_1
         self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += sin_pos_r * scale_2
-        self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += 1 * sin_pos_r * scale_1
+        self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1
 
         # Double support phase
         self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0
@@ -325,7 +325,7 @@ def _reward_feet_contact_number(self):
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
         stance_mask = self._get_gait_phase()
-        reward = torch.where(contact == stance_mask, 1, -0.3)
+        reward = torch.where(contact == stance_mask, 1.0, -0.3)
         return torch.mean(reward, dim=1)
 
     def _reward_orientation(self):

diff --git a/sim/envs/humanoids/stompymini_config.py → sim/envs/humanoids/grp_config.py b/sim/envs/humanoids/stompymini_config.py → sim/envs/humanoids/grp_config.py
@@ -1,16 +1,16 @@
-"""Defines the environment configuration for the walking task"""
+"""Defines the environment configuration for the Getting up task"""
 
 from sim.env import robot_urdf_path
 from sim.envs.base.legged_robot_config import (  # type: ignore
     LeggedRobotCfg,
     LeggedRobotCfgPPO,
 )
-from sim.resources.stompymini.joints import Robot
+from sim.resources.gpr.joints import Robot
 
-NUM_JOINTS = len(Robot.all_joints())  # 20
+NUM_JOINTS = len(Robot.all_joints())
 
 
-class MiniCfg(LeggedRobotCfg):
+class GprCfg(LeggedRobotCfg):
     """Configuration class for the Legs humanoid robot."""
 
     class env(LeggedRobotCfg.env):
@@ -33,24 +33,26 @@ class safety(LeggedRobotCfg.safety):
         torque_limit = 0.85
 
     class asset(LeggedRobotCfg.asset):
-        name = "stompymini"
+        name = "gpr"
+
         file = str(robot_urdf_path(name))
 
-        foot_name = ["RS_01_Stator", "RS_01_Stator_2"]
-        knee_name = ["RS_04_Rotor_7", "RS_04_Rotor_8"]
+        foot_name = ["foot1", "foot3"]
+        knee_name = ["leg3_shell2", "leg3_shell22"]
+        imu_name = "imu_link"
 
-        termination_height = 0.24
-        default_feet_height = 0.03
+        termination_height = 0.2
+        default_feet_height = 0.0
 
         penalize_contacts_on = []
-        self_collisions = 1  # 1 to disable, 0 to enable...bitwise filter
+        self_collisions = 0  # 1 to disable, 0 to enable...bitwise filter
         flip_visual_attachments = False
         replace_cylinder_with_capsule = False
         fix_base_link = False
 
     class terrain(LeggedRobotCfg.terrain):
         mesh_type = "plane"
-        # mesh_type = 'trimesh'
+        # mesh_type = "trimesh"
         curriculum = False
         # rough terrain only:
         measure_heights = False
@@ -79,10 +81,7 @@ class noise_scales:
 
     class init_state(LeggedRobotCfg.init_state):
         pos = [0.0, 0.0, Robot.height]
-        # setting the right rotation
-        # quat_from_euler_xyz(torch.tensor(1.57), torch.tensor(0), torch.tensor(-1.57))
         rot = Robot.rotation
-
         default_joint_angles = {k: 0.0 for k in Robot.all_joints()}
 
         default_positions = Robot.default_standing()
@@ -123,14 +122,14 @@ class domain_rand(LeggedRobotCfg.domain_rand):
         friction_range = [0.1, 2.0]
 
         randomize_base_mass = True
-        added_mass_range = [-1.0, 1.0]
-        push_robots = True
+        added_mass_range = [-3.0, 3.0]
+        push_robots = False
         push_interval_s = 4
         max_push_vel_xy = 0.2
         max_push_ang_vel = 0.4
         # dynamic randomization
-        action_delay = 0.5
         action_noise = 0.02
+        action_delay = 0.5
 
     class commands(LeggedRobotCfg.commands):
         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -145,39 +144,28 @@ class ranges:
             heading = [-3.14, 3.14]
 
     class rewards:
-        base_height_target = 0.78
-        min_dist = 0.25
+        # quite important to keep it right
+        base_height_target = 0.63
+        min_dist = 0.2
         max_dist = 0.5
-
         # put some settings here for LLM parameter tuning
         target_joint_pos_scale = 0.17  # rad
-        target_feet_height = 0.05  # m
-        cycle_time = 0.4  # sec
+        target_feet_height = 0.06  # m
+
+        cycle_time = 0.25  # sec
         # if true negative total rewards are clipped at zero (avoids early termination problems)
         only_positive_rewards = True
         # tracking reward = exp(error*sigma)
         tracking_sigma = 5.0
-        max_contact_force = 400  # forces above this value are penalized
+        max_contact_force = 700  # forces above this value are penalized
 
         class scales:
-            # base pos
-            default_joint_pos = 0.5
-            orientation = 1
-            base_height = 0.2
-            base_acc = 0.2
-            # energy
-            action_smoothness = -0.002
-            torques = -1e-5
-            dof_vel = -5e-4
-            dof_acc = -1e-7
-            collision = -1.0
-
             # reference motion tracking
             joint_pos = 1.6
-            feet_clearance = 1.6
+            feet_clearance = 1.0
             feet_contact_number = 1.2
             # gait
-            feet_air_time = 1.6
+            feet_air_time = 1.0
             foot_slip = -0.05
             feet_distance = 0.2
             knee_distance = 0.2
@@ -190,6 +178,18 @@ class scales:
             low_speed = 0.2
             track_vel_hard = 0.5
 
+            # base pos
+            default_joint_pos = 0.5
+            orientation = 1.
+            base_height = 0.2
+            base_acc = 0.2
+            # energy
+            action_smoothness = -0.002
+            torques = -1e-5
+            dof_vel = -5e-4
+            dof_acc = -1e-7
+            collision = -1.0
+
     class normalization:
         class obs_scales:
             lin_vel = 2.0
@@ -208,27 +208,27 @@ class viewer:
         lookat = [0, -2, 0]
 
 
-class MiniStandingCfg(MiniCfg):
-    """Configuration class for the Legs humanoid robot."""
+class GprStandingCfg(GprCfg):
+    """Configuration class for the GPR humanoid robot."""
 
     class rewards:
-        base_height_target = 0.78
-        min_dist = 0.25
+        # quite important to keep it right
+        base_height_target = 1.06
+        min_dist = 0.2
         max_dist = 0.5
-
         # put some settings here for LLM parameter tuning
         target_joint_pos_scale = 0.17  # rad
         target_feet_height = 0.05  # m
-        cycle_time = 0.4  # sec
+        cycle_time = 0.64  # sec
         # if true negative total rewards are clipped at zero (avoids early termination problems)
         only_positive_rewards = True
         # tracking reward = exp(error*sigma)
-        tracking_sigma = 5.0
-        max_contact_force = 400  # forces above this value are penalized
+        tracking_sigma = 5
+        max_contact_force = 500  # forces above this value are penalized
 
         class scales:
             # base pos
-            default_joint_pos = 0.5
+            default_joint_pos = 1.0
             orientation = 1
             base_height = 0.2
             base_acc = 0.2
@@ -240,7 +240,7 @@ class scales:
             collision = -1.0
 
 
-class MiniCfgPPO(LeggedRobotCfgPPO):
+class GprCfgPPO(LeggedRobotCfgPPO):
     seed = 5
     runner_class_name = "OnPolicyRunner"  # DWLOnPolicyRunner
 
@@ -261,11 +261,11 @@ class runner:
         policy_class_name = "ActorCritic"
         algorithm_class_name = "PPO"
         num_steps_per_env = 60  # per iteration
-        max_iterations = 5001  # number of policy updates
+        max_iterations = 3001  # number of policy updates
 
         # logging
         save_interval = 100  # check for potential saves every this many iterations
-        experiment_name = "StompyMini"
+        experiment_name = "gpr"
         run_name = ""
         # load and resume
         resume = False