diff --git a/examples/kinfer_policy.onnx b/examples/kinfer_policy.onnx
new file mode 100644
index 00000000..9fd4c91c
Binary files /dev/null and b/examples/kinfer_policy.onnx differ
diff --git a/examples/standing_micro.onnx b/examples/standing_micro.onnx
deleted file mode 100644
index acef2d58..00000000
Binary files a/examples/standing_micro.onnx and /dev/null differ
diff --git a/examples/standing_micro.pt b/examples/standing_micro.pt
deleted file mode 100644
index 00f79cf4..00000000
Binary files a/examples/standing_micro.pt and /dev/null differ
diff --git a/examples/standing_pro.onnx b/examples/standing_pro.onnx
deleted file mode 100644
index 5434ff41..00000000
Binary files a/examples/standing_pro.onnx and /dev/null differ
diff --git a/examples/standing_pro.pt b/examples/standing_pro.pt
deleted file mode 100644
index daa9950d..00000000
Binary files a/examples/standing_pro.pt and /dev/null differ
diff --git a/examples/walking_micro.onnx b/examples/walking_micro.onnx
deleted file mode 100644
index 68a35b8e..00000000
Binary files a/examples/walking_micro.onnx and /dev/null differ
diff --git a/examples/walking_micro.pt b/examples/walking_micro.pt
deleted file mode 100644
index 58c94e6c..00000000
Binary files a/examples/walking_micro.pt and /dev/null differ
diff --git a/examples/test_policy.pt b/examples/walking_policy.pt
similarity index 56%
rename from examples/test_policy.pt
rename to examples/walking_policy.pt
index 69b339af..f483c38c 100644
Binary files a/examples/test_policy.pt and b/examples/walking_policy.pt differ
diff --git a/examples/walking_pro.onnx b/examples/walking_pro.onnx
deleted file mode 100644
index 432b0e95..00000000
Binary files a/examples/walking_pro.onnx and /dev/null differ
diff --git a/examples/walking_pro.pt b/examples/walking_pro.pt
deleted file mode 100644
index 2878ac43..00000000
Binary files a/examples/walking_pro.pt and /dev/null differ
diff --git a/examples/original_walking.pt b/policy.pt
similarity index 56%
rename from examples/original_walking.pt
rename to policy.pt
index 2ba5b97f..f483c38c 100644
Binary files a/examples/original_walking.pt and b/policy.pt differ
diff --git a/policy_1.pt b/policy_1.pt
deleted file mode 100644
index 390d998c..00000000
Binary files a/policy_1.pt and /dev/null differ
diff --git a/sim/algo/ppo/on_policy_runner.py b/sim/algo/ppo/on_policy_runner.py
index e3fb6e1c..b13e6677 100755
--- a/sim/algo/ppo/on_policy_runner.py
+++ b/sim/algo/ppo/on_policy_runner.py
@@ -266,6 +266,7 @@ def save(self, path: str, infos: Optional[dict] = None) -> None:
def load(self, path: str, load_optimizer: bool = True) -> dict:
loaded_dict = torch.load(path)
+ self.load_path = path
self.alg.actor_critic.load_state_dict(loaded_dict["model_state_dict"])
if load_optimizer:
self.alg.optimizer.load_state_dict(loaded_dict["optimizer_state_dict"])
diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py
index d307168f..ecd72198 100755
--- a/sim/envs/__init__.py
+++ b/sim/envs/__init__.py
@@ -16,22 +16,19 @@
from sim.envs.humanoids.h1_env import H1FreeEnv
from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO
from sim.envs.humanoids.stompymicro_env import StompyMicroEnv
-from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO
-from sim.envs.humanoids.stompymini_env import MiniFreeEnv
-from sim.envs.humanoids.stompypro_config import (
- StompyProCfg,
- StompyProCfgPPO,
- StompyProStandingCfg,
+from sim.envs.humanoids.grp_config import (
+ GprCfg,
+ GprCfgPPO,
+ GprStandingCfg,
)
-from sim.envs.humanoids.stompypro_env import StompyProFreeEnv
+from sim.envs.humanoids.gpr_env import GprFreeEnv
from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
from sim.envs.humanoids.xbot_env import XBotLFreeEnv
from sim.utils.task_registry import TaskRegistry # noqa: E402
task_registry = TaskRegistry()
-task_registry.register("stompymini", MiniFreeEnv, MiniCfg(), MiniCfgPPO())
-task_registry.register("stompypro", StompyProFreeEnv, StompyProCfg(), StompyProCfgPPO())
-task_registry.register("stompypro_standing", StompyProFreeEnv, StompyProStandingCfg(), StompyProCfgPPO())
+task_registry.register("gpr", GprFreeEnv, GprCfg(), GprCfgPPO())
+task_registry.register("gpr_standing", GprFreeEnv, GprStandingCfg(), GprCfgPPO())
task_registry.register("dora", DoraFreeEnv, DoraCfg(), DoraCfgPPO())
task_registry.register("h1", H1FreeEnv, H1Cfg(), H1CfgPPO())
task_registry.register("g1", G1FreeEnv, G1Cfg(), G1CfgPPO())
diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py
index 6bb371a8..ea8d31d4 100644
--- a/sim/envs/base/legged_robot.py
+++ b/sim/envs/base/legged_robot.py
@@ -567,6 +567,7 @@ def _init_buffers(self):
self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
for i in range(self.num_dofs):
name = self.dof_names[i]
+ print(name)
self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name]
found = False
@@ -579,6 +580,7 @@ def _init_buffers(self):
self.p_gains[:, i] = 0.0
self.d_gains[:, i] = 0.0
print(f"PD gain of joint {name} were not defined, setting them to zero")
+
self.rand_push_force = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device)
self.rand_push_torque = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device)
self.default_dof_pos = self.default_dof_pos.unsqueeze(0)
diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py
index 1921474b..e87d2252 100644
--- a/sim/envs/base/legged_robot_config.py
+++ b/sim/envs/base/legged_robot_config.py
@@ -157,7 +157,6 @@ class domain_rand:
push_robots = True
push_interval_s = 15
max_push_vel_xy = 1.0
- start_pos_noise = 0.1
class rewards:
class scales:
diff --git a/sim/envs/humanoids/stompypro_env.py b/sim/envs/humanoids/gpr_env.py
similarity index 98%
rename from sim/envs/humanoids/stompypro_env.py
rename to sim/envs/humanoids/gpr_env.py
index df9b2e75..e309e3bb 100644
--- a/sim/envs/humanoids/stompypro_env.py
+++ b/sim/envs/humanoids/gpr_env.py
@@ -4,7 +4,7 @@
from isaacgym.torch_utils import * # isort:skip
from sim.envs.base.legged_robot import LeggedRobot
-from sim.resources.stompypro.joints import Robot
+from sim.resources.gpr.joints import Robot
from sim.utils.terrain import HumanoidTerrain
from isaacgym import gymtorch # isort:skip
@@ -12,8 +12,8 @@
import torch # isort:skip
-class StompyProFreeEnv(LeggedRobot):
- """StompyProFreeEnv is a class that represents a custom environment for a legged robot.
+class GprFreeEnv(LeggedRobot):
+ """GprFreeEnv is a class that represents a custom environment for a legged robot.
Args:
cfg (LeggedRobotCfg): Configuration object for the legged robot.
@@ -126,13 +126,13 @@ def compute_ref_state(self):
sin_pos_l[sin_pos_l > 0] = 0
self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += sin_pos_l * scale_1
self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += sin_pos_l * scale_2
- self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += 1 * sin_pos_l * scale_1
+ self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1
# right foot stance phase set to default joint pos
sin_pos_r[sin_pos_r < 0] = 0
self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += sin_pos_r * scale_1
self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += sin_pos_r * scale_2
- self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += 1 * sin_pos_r * scale_1
+ self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1
# Double support phase
self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0
@@ -325,7 +325,7 @@ def _reward_feet_contact_number(self):
"""
contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
stance_mask = self._get_gait_phase()
- reward = torch.where(contact == stance_mask, 1, -0.3)
+ reward = torch.where(contact == stance_mask, 1.0, -0.3)
return torch.mean(reward, dim=1)
def _reward_orientation(self):
diff --git a/sim/envs/humanoids/stompymini_config.py b/sim/envs/humanoids/grp_config.py
similarity index 84%
rename from sim/envs/humanoids/stompymini_config.py
rename to sim/envs/humanoids/grp_config.py
index 91831c21..5ab53816 100644
--- a/sim/envs/humanoids/stompymini_config.py
+++ b/sim/envs/humanoids/grp_config.py
@@ -1,16 +1,16 @@
-"""Defines the environment configuration for the walking task"""
+"""Defines the environment configuration for the Getting up task"""
from sim.env import robot_urdf_path
from sim.envs.base.legged_robot_config import ( # type: ignore
LeggedRobotCfg,
LeggedRobotCfgPPO,
)
-from sim.resources.stompymini.joints import Robot
+from sim.resources.gpr.joints import Robot
-NUM_JOINTS = len(Robot.all_joints()) # 20
+NUM_JOINTS = len(Robot.all_joints())
-class MiniCfg(LeggedRobotCfg):
+class GprCfg(LeggedRobotCfg):
"""Configuration class for the Legs humanoid robot."""
class env(LeggedRobotCfg.env):
@@ -33,24 +33,26 @@ class safety(LeggedRobotCfg.safety):
torque_limit = 0.85
class asset(LeggedRobotCfg.asset):
- name = "stompymini"
+ name = "gpr"
+
file = str(robot_urdf_path(name))
- foot_name = ["RS_01_Stator", "RS_01_Stator_2"]
- knee_name = ["RS_04_Rotor_7", "RS_04_Rotor_8"]
+ foot_name = ["foot1", "foot3"]
+ knee_name = ["leg3_shell2", "leg3_shell22"]
+ imu_name = "imu_link"
- termination_height = 0.24
- default_feet_height = 0.03
+ termination_height = 0.2
+ default_feet_height = 0.0
penalize_contacts_on = []
- self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter
+ self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter
flip_visual_attachments = False
replace_cylinder_with_capsule = False
fix_base_link = False
class terrain(LeggedRobotCfg.terrain):
mesh_type = "plane"
- # mesh_type = 'trimesh'
+ # mesh_type = "trimesh"
curriculum = False
# rough terrain only:
measure_heights = False
@@ -79,10 +81,7 @@ class noise_scales:
class init_state(LeggedRobotCfg.init_state):
pos = [0.0, 0.0, Robot.height]
- # setting the right rotation
- # quat_from_euler_xyz(torch.tensor(1.57), torch.tensor(0), torch.tensor(-1.57))
rot = Robot.rotation
-
default_joint_angles = {k: 0.0 for k in Robot.all_joints()}
default_positions = Robot.default_standing()
@@ -123,14 +122,14 @@ class domain_rand(LeggedRobotCfg.domain_rand):
friction_range = [0.1, 2.0]
randomize_base_mass = True
- added_mass_range = [-1.0, 1.0]
- push_robots = True
+ added_mass_range = [-3.0, 3.0]
+ push_robots = False
push_interval_s = 4
max_push_vel_xy = 0.2
max_push_ang_vel = 0.4
# dynamic randomization
- action_delay = 0.5
action_noise = 0.02
+ action_delay = 0.5
class commands(LeggedRobotCfg.commands):
# Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -145,39 +144,28 @@ class ranges:
heading = [-3.14, 3.14]
class rewards:
- base_height_target = 0.78
- min_dist = 0.25
+ # quite important to keep it right
+ base_height_target = 0.63
+ min_dist = 0.2
max_dist = 0.5
-
# put some settings here for LLM parameter tuning
target_joint_pos_scale = 0.17 # rad
- target_feet_height = 0.05 # m
- cycle_time = 0.4 # sec
+ target_feet_height = 0.06 # m
+
+ cycle_time = 0.25 # sec
# if true negative total rewards are clipped at zero (avoids early termination problems)
only_positive_rewards = True
# tracking reward = exp(error*sigma)
tracking_sigma = 5.0
- max_contact_force = 400 # forces above this value are penalized
+ max_contact_force = 700 # forces above this value are penalized
class scales:
- # base pos
- default_joint_pos = 0.5
- orientation = 1
- base_height = 0.2
- base_acc = 0.2
- # energy
- action_smoothness = -0.002
- torques = -1e-5
- dof_vel = -5e-4
- dof_acc = -1e-7
- collision = -1.0
-
# reference motion tracking
joint_pos = 1.6
- feet_clearance = 1.6
+ feet_clearance = 1.0
feet_contact_number = 1.2
# gait
- feet_air_time = 1.6
+ feet_air_time = 1.0
foot_slip = -0.05
feet_distance = 0.2
knee_distance = 0.2
@@ -190,6 +178,18 @@ class scales:
low_speed = 0.2
track_vel_hard = 0.5
+ # base pos
+ default_joint_pos = 0.5
+ orientation = 1.
+ base_height = 0.2
+ base_acc = 0.2
+ # energy
+ action_smoothness = -0.002
+ torques = -1e-5
+ dof_vel = -5e-4
+ dof_acc = -1e-7
+ collision = -1.0
+
class normalization:
class obs_scales:
lin_vel = 2.0
@@ -208,27 +208,27 @@ class viewer:
lookat = [0, -2, 0]
-class MiniStandingCfg(MiniCfg):
- """Configuration class for the Legs humanoid robot."""
+class GprStandingCfg(GprCfg):
+ """Configuration class for the GPR humanoid robot."""
class rewards:
- base_height_target = 0.78
- min_dist = 0.25
+ # quite important to keep it right
+ base_height_target = 1.06
+ min_dist = 0.2
max_dist = 0.5
-
# put some settings here for LLM parameter tuning
target_joint_pos_scale = 0.17 # rad
target_feet_height = 0.05 # m
- cycle_time = 0.4 # sec
+ cycle_time = 0.64 # sec
# if true negative total rewards are clipped at zero (avoids early termination problems)
only_positive_rewards = True
# tracking reward = exp(error*sigma)
- tracking_sigma = 5.0
- max_contact_force = 400 # forces above this value are penalized
+ tracking_sigma = 5
+ max_contact_force = 500 # forces above this value are penalized
class scales:
# base pos
- default_joint_pos = 0.5
+ default_joint_pos = 1.0
orientation = 1
base_height = 0.2
base_acc = 0.2
@@ -240,7 +240,7 @@ class scales:
collision = -1.0
-class MiniCfgPPO(LeggedRobotCfgPPO):
+class GprCfgPPO(LeggedRobotCfgPPO):
seed = 5
runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner
@@ -261,11 +261,11 @@ class runner:
policy_class_name = "ActorCritic"
algorithm_class_name = "PPO"
num_steps_per_env = 60 # per iteration
- max_iterations = 5001 # number of policy updates
+ max_iterations = 3001 # number of policy updates
# logging
save_interval = 100 # check for potential saves every this many iterations
- experiment_name = "StompyMini"
+ experiment_name = "gpr"
run_name = ""
# load and resume
resume = False
diff --git a/sim/envs/humanoids/stompymini_env.py b/sim/envs/humanoids/stompymini_env.py
deleted file mode 100644
index 35d66f3f..00000000
--- a/sim/envs/humanoids/stompymini_env.py
+++ /dev/null
@@ -1,517 +0,0 @@
-# mypy: disable-error-code="valid-newtype"
-"""Defines the environment for training the humanoid."""
-
-from sim.envs.base.legged_robot import LeggedRobot
-from sim.resources.stompymini.joints import Robot
-from sim.utils.terrain import HumanoidTerrain
-
-from isaacgym import gymtorch # isort:skip
-from isaacgym.torch_utils import * # isort: skip
-
-
-import torch # isort:skip
-
-
-class MiniFreeEnv(LeggedRobot):
- """StompyFreeEnv is a class that represents a custom environment for a legged robot.
-
- Args:
- cfg: Configuration object for the legged robot.
- sim_params: Parameters for the simulation.
- physics_engine: Physics engin e used in the simulation.
- sim_device: Device used for the simulation.
- headless: Flag indicating whether the simulation should be run in headless mode.
-
- Attributes:
- last_feet_z (float): The z-coordinate of the last feet position.
- feet_height (torch.Tensor): Tensor representing the height of the feet.
- sim (gymtorch.GymSim): The simulation object.
- terrain (HumanoidTerrain): The terrain object.
- up_axis_idx (int): The index representing the up axis.
- command_input (torch.Tensor): Tensor representing the command input.
- privileged_obs_buf (torch.Tensor): Tensor representing the privileged observations buffer.
- obs_buf (torch.Tensor): Tensor representing the observations buffer.
- obs_history (collections.deque): Deque containing the history of observations.
- critic_history (collections.deque): Deque containing the history of critic observations.
-
- Methods:
- _push_robots(): Randomly pushes the robots by setting a randomized base velocity.
- _get_phase(): Calculates the phase of the gait cycle.
- _get_gait_phase(): Calculates the gait phase.
- compute_ref_state(): Computes the reference state.
- create_sim(): Creates the simulation, terrain, and environments.
- _get_noise_scale_vec(cfg): Sets a vector used to scale the noise added to the observations.
- step(actions): Performs a simulation step with the given actions.
- compute_observations(): Computes the observations.
- reset_idx(env_ids): Resets the environment for the specified environment IDs.
- """
-
- def __init__(self, cfg, sim_params, physics_engine, sim_device, headless):
- super().__init__(cfg, sim_params, physics_engine, sim_device, headless)
- self.last_feet_z = self.cfg.asset.default_feet_height
- self.feet_height = torch.zeros((self.num_envs, 2), device=self.device)
- self.reset_idx(torch.tensor(range(self.num_envs), device=self.device))
-
- env_handle = self.envs[0]
- actor_handle = self.actor_handles[0]
-
- self.legs_joints = {}
- for name, joint in Robot.legs.left.joints_motors():
- print(name)
- joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
- self.legs_joints["left_" + name] = joint_handle
-
- for name, joint in Robot.legs.right.joints_motors():
- joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
- self.legs_joints["right_" + name] = joint_handle
-
- self.compute_observations()
-
- def _push_robots(self):
- """Random pushes the robots. Emulates an impulse by setting a randomized base velocity."""
- max_vel = self.cfg.domain_rand.max_push_vel_xy
- max_push_angular = self.cfg.domain_rand.max_push_ang_vel
- self.rand_push_force[:, :2] = torch_rand_float(
- -max_vel, max_vel, (self.num_envs, 2), device=self.device
- ) # lin vel x/y
- self.root_states[:, 7:9] = self.rand_push_force[:, :2]
-
- self.rand_push_torque = torch_rand_float(
- -max_push_angular, max_push_angular, (self.num_envs, 3), device=self.device
- )
- self.root_states[:, 10:13] = self.rand_push_torque
-
- self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
-
- def _get_phase(self):
- cycle_time = self.cfg.rewards.cycle_time
- phase = self.episode_length_buf * self.dt / cycle_time
- return phase
-
- def _get_gait_phase(self):
- # return float mask 1 is stance, 0 is swing
- phase = self._get_phase()
- sin_pos = torch.sin(2 * torch.pi * phase)
- # Add double support phase
- stance_mask = torch.zeros((self.num_envs, 2), device=self.device)
- # left foot stance
- stance_mask[:, 0] = sin_pos >= 0
- # right foot stance
- stance_mask[:, 1] = sin_pos < 0
- # Double support phase
- stance_mask[torch.abs(sin_pos) < 0.1] = 1
-
- return stance_mask
-
- def check_termination(self):
- """Check if environments need to be reset"""
- self.reset_buf = torch.any(
- torch.norm(self.contact_forces[:, self.termination_contact_indices, :], dim=-1) > 1.0,
- dim=1,
- )
- self.reset_buf |= self.root_states[:, 2] < self.cfg.asset.termination_height
- self.time_out_buf = self.episode_length_buf > self.max_episode_length # no terminal reward for time-outs
- self.reset_buf |= self.time_out_buf
-
- def compute_ref_state(self):
- phase = self._get_phase()
- sin_pos = torch.sin(2 * torch.pi * phase)
- sin_pos_l = sin_pos.clone()
- sin_pos_r = sin_pos.clone()
- default_clone = self.default_dof_pos.clone()
- self.ref_dof_pos = self.default_dof_pos.repeat(self.num_envs, 1)
-
- scale_1 = self.cfg.rewards.target_joint_pos_scale
- scale_2 = 2 * scale_1
- # left foot stance phase set to default joint pos
- sin_pos_l[sin_pos_l > 0] = 0
- self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += sin_pos_l * scale_1
- self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += sin_pos_l * scale_2
- self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1
-
- # right foot stance phase set to default joint pos
- sin_pos_r[sin_pos_r < 0] = 0
- self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += sin_pos_r * scale_1
- self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += sin_pos_r * scale_2
- self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1
-
- # Double support phase
- self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0
-
- self.ref_action = 2 * self.ref_dof_pos
-
- def create_sim(self):
- """Creates simulation, terrain and evironments"""
- self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly
- self.sim = self.gym.create_sim(
- self.sim_device_id,
- self.graphics_device_id,
- self.physics_engine,
- self.sim_params,
- )
- mesh_type = self.cfg.terrain.mesh_type
- if mesh_type in ["heightfield", "trimesh"]:
- self.terrain = HumanoidTerrain(self.cfg.terrain, self.num_envs)
- if mesh_type == "plane":
- self._create_ground_plane()
- elif mesh_type == "heightfield":
- self._create_heightfield()
- elif mesh_type == "trimesh":
- self._create_trimesh()
- elif mesh_type is not None:
- raise ValueError("Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]")
- self._create_envs()
-
- def _get_noise_scale_vec(self, cfg):
- """Sets a vector used to scale the noise added to the observations.
- [NOTE]: Must be adapted when changing the observations structure
-
- Args:
- cfg (Dict): Environment config file
-
- Returns:
- [torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1]
- """
- num_actions = self.num_actions
- noise_vec = torch.zeros(self.cfg.env.num_single_obs, device=self.device)
- self.add_noise = self.cfg.noise.add_noise
- noise_scales = self.cfg.noise.noise_scales
- noise_vec[0:5] = 0.0 # commands
- noise_vec[5 : (num_actions + 5)] = noise_scales.dof_pos * self.obs_scales.dof_pos
- noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel
- noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = 0.0 # previous actions
- noise_vec[(3 * num_actions + 5) : (3 * num_actions + 5) + 3] = (
- noise_scales.ang_vel * self.obs_scales.ang_vel
- ) # ang vel
- noise_vec[(3 * num_actions + 5) + 3 : (3 * num_actions + 5)] = (
- noise_scales.quat * self.obs_scales.quat
- ) # euler x,y
- return noise_vec
-
- def compute_observations(self):
- phase = self._get_phase()
- self.compute_ref_state()
-
- sin_pos = torch.sin(2 * torch.pi * phase).unsqueeze(1)
- cos_pos = torch.cos(2 * torch.pi * phase).unsqueeze(1)
-
- stance_mask = self._get_gait_phase()
- contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5.0
-
- self.command_input = torch.cat((sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1)
- q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos
- dq = self.dof_vel * self.obs_scales.dof_vel
-
- diff = self.dof_pos - self.ref_dof_pos
- self.privileged_obs_buf = torch.cat(
- (
- self.command_input, # 2 + 3
- (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos, # 12
- self.dof_vel * self.obs_scales.dof_vel, # 12
- self.actions, # 12
- diff, # 12
- self.base_lin_vel * self.obs_scales.lin_vel, # 3
- self.base_ang_vel * self.obs_scales.ang_vel, # 3
- self.base_euler_xyz * self.obs_scales.quat, # 3
- self.rand_push_force[:, :2], # 3
- self.rand_push_torque, # 3
- self.env_frictions, # 1
- self.body_mass / 30.0, # 1
- stance_mask, # 2
- contact_mask, # 2
- ),
- dim=-1,
- )
-
- obs_buf = torch.cat(
- (
- self.command_input, # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw)
- q, # 20D
- dq, # 20D
- self.actions, # 20D
- self.base_ang_vel * self.obs_scales.ang_vel, # 3
- self.base_euler_xyz * self.obs_scales.quat, # 3
- ),
- dim=-1,
- )
-
- if self.cfg.terrain.measure_heights:
- heights = (
- torch.clip(
- self.root_states[:, 2].unsqueeze(1) - 0.5 - self.measured_heights,
- -1,
- 1.0,
- )
- * self.obs_scales.height_measurements
- )
- self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1)
-
- if self.add_noise:
- obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * self.cfg.noise.noise_level
- else:
- obs_now = obs_buf.clone()
- self.obs_history.append(obs_now)
- self.critic_history.append(self.privileged_obs_buf)
-
- obs_buf_all = torch.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1) # N,T,K
-
- self.obs_buf = obs_buf_all.reshape(self.num_envs, -1) # N, T*K
- self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1)
-
- def reset_idx(self, env_ids):
- super().reset_idx(env_ids)
- for i in range(self.obs_history.maxlen):
- self.obs_history[i][env_ids] *= 0
- for i in range(self.critic_history.maxlen):
- self.critic_history[i][env_ids] *= 0
-
- # ================================================ Rewards ================================================== #
- def _reward_joint_pos(self):
- """Calculates the reward based on the difference between the current joint positions and the target joint positions."""
- joint_pos = self.dof_pos.clone()
- pos_target = self.ref_dof_pos.clone()
- diff = joint_pos - pos_target
- r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(diff, dim=1).clamp(0, 0.5)
-
- return r
-
- def _reward_feet_distance(self):
- """Calculates the reward based on the distance between the feet. Penilize feet get close to each other or too far away."""
- foot_pos = self.rigid_state[:, self.feet_indices, :2]
- foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1)
- fd = self.cfg.rewards.min_dist
- max_df = self.cfg.rewards.max_dist
- d_min = torch.clamp(foot_dist - fd, -0.5, 0.0)
- d_max = torch.clamp(foot_dist - max_df, 0, 0.5)
- return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2
-
- def _reward_knee_distance(self):
- """Calculates the reward based on the distance between the knee of the humanoid."""
- foot_pos = self.rigid_state[:, self.knee_indices, :2]
- foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1)
- fd = self.cfg.rewards.min_dist
- max_df = self.cfg.rewards.max_dist / 2
- d_min = torch.clamp(foot_dist - fd, -0.5, 0.0)
- d_max = torch.clamp(foot_dist - max_df, 0, 0.5)
- return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2
-
- def _reward_foot_slip(self):
- """Calculates the reward for minimizing foot slip. The reward is based on the contact forces
- and the speed of the feet. A contact threshold is used to determine if the foot is in contact
- with the ground. The speed of the foot is calculated and scaled by the contact condition.
- """
- contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
- foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
- rew = torch.sqrt(foot_speed_norm)
- rew *= contact
- return torch.sum(rew, dim=1)
-
- def _reward_feet_air_time(self):
- """Calculates the reward for feet air time, promoting longer steps. This is achieved by
- checking the first contact with the ground after being in the air. The air time is
- limited to a maximum value for reward calculation.
- """
- contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
- stance_mask = self._get_gait_phase()
- self.contact_filt = torch.logical_or(torch.logical_or(contact, stance_mask), self.last_contacts)
- self.last_contacts = contact
- first_contact = (self.feet_air_time > 0.0) * self.contact_filt
- self.feet_air_time += self.dt
- air_time = self.feet_air_time.clamp(0, 0.5) * first_contact
- self.feet_air_time *= ~self.contact_filt
- return air_time.sum(dim=1)
-
- def _reward_feet_contact_number(self):
- """Calculates a reward based on the number of feet contacts aligning with the gait phase.
- Rewards or penalizes depending on whether the foot contact matches the expected gait phase.
- """
- contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
- stance_mask = self._get_gait_phase()
- reward = torch.where(contact == stance_mask, 1, -0.3)
- return torch.mean(reward, dim=1)
-
- def _reward_orientation(self):
- """Calculates the reward for maintaining a flat base orientation. It penalizes deviation
- from the desired base orientation using the base euler angles and the projected gravity vector.
- """
- quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10)
- orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20)
- return (quat_mismatch + orientation) / 2
-
- def _reward_feet_contact_forces(self):
- """Calculates the reward for keeping contact forces within a specified range. Penalizes
- high contact forces on the feet.
- """
- return torch.sum(
- (
- torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force
- ).clip(0, 400),
- dim=1,
- )
-
- def _reward_default_joint_pos(self):
- """Calculates the reward for keeping joint positions close to default positions, with a focus
- on penalizing deviation in yaw and roll directions. Excludes yaw and roll from the main penalty.
- """
- joint_diff = self.dof_pos - self.default_joint_pd_target
- left_yaw_roll = joint_diff[:, [self.legs_joints["left_hip_roll"], self.legs_joints["left_hip_yaw"]]]
- right_yaw_roll = joint_diff[:, [self.legs_joints["right_hip_roll"], self.legs_joints["right_hip_yaw"]]]
- yaw_roll = torch.norm(left_yaw_roll, dim=1) + torch.norm(right_yaw_roll, dim=1)
- yaw_roll = torch.clamp(yaw_roll - 0.1, 0, 50)
-
- return torch.exp(-yaw_roll * 100) - 0.01 * torch.norm(joint_diff, dim=1)
-
- def _reward_base_height(self):
- """Calculates the reward based on the robot's base height. Penalizes deviation from a target base height.
- The reward is computed based on the height difference between the robot's base and the average height
- of its feet when they are in contact with the ground.
- """
- stance_mask = self._get_gait_phase()
- measured_heights = torch.sum(self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1) / torch.sum(
- stance_mask, dim=1
- )
- base_height = self.root_states[:, 2] - (measured_heights - self.cfg.asset.default_feet_height)
- reward = torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100)
- return reward
-
- def _reward_base_acc(self):
- """Computes the reward based on the base's acceleration. Penalizes high accelerations of the robot's base,
- encouraging smoother motion.
- """
- root_acc = self.last_root_vel - self.root_states[:, 7:13]
- rew = torch.exp(-torch.norm(root_acc, dim=1) * 3)
- return rew
-
- def _reward_vel_mismatch_exp(self):
- """Computes a reward based on the mismatch in the robot's linear and angular velocities.
- Encourages the robot to maintain a stable velocity by penalizing large deviations.
- """
- lin_mismatch = torch.exp(-torch.square(self.base_lin_vel[:, 2]) * 10)
- ang_mismatch = torch.exp(-torch.norm(self.base_ang_vel[:, :2], dim=1) * 5.0)
-
- c_update = (lin_mismatch + ang_mismatch) / 2.0
-
- return c_update
-
- def _reward_track_vel_hard(self):
- """Calculates a reward for accurately tracking both linear and angular velocity commands.
- Penalizes deviations from specified linear and angular velocity targets.
- """
- # Tracking of linear velocity commands (xy axes)
- lin_vel_error = torch.norm(self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1)
- lin_vel_error_exp = torch.exp(-lin_vel_error * 10)
-
- # Tracking of angular velocity commands (yaw)
- ang_vel_error = torch.abs(self.commands[:, 2] - self.base_ang_vel[:, 2])
- ang_vel_error_exp = torch.exp(-ang_vel_error * 10)
-
- linear_error = 0.2 * (lin_vel_error + ang_vel_error)
-
- return (lin_vel_error_exp + ang_vel_error_exp) / 2.0 - linear_error
-
- def _reward_tracking_lin_vel(self):
- """Tracks linear velocity commands along the xy axes.
- Calculates a reward based on how closely the robot's linear velocity matches the commanded values.
- """
- lin_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1)
- return torch.exp(-lin_vel_error * self.cfg.rewards.tracking_sigma)
-
- def _reward_tracking_ang_vel(self):
- """Tracks angular velocity commands for yaw rotation.
- Computes a reward based on how closely the robot's angular velocity matches the commanded yaw values.
- """
- ang_vel_error = torch.square(self.commands[:, 2] - self.base_ang_vel[:, 2])
- return torch.exp(-ang_vel_error * self.cfg.rewards.tracking_sigma)
-
- def _reward_feet_clearance(self):
- """Calculates reward based on the clearance of the swing leg from the ground during movement.
- Encourages appropriate lift of the feet during the swing phase of the gait.
- """
- # Compute feet contact mask
- contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-
- # Get the z-position of the feet and compute the change in z-position
- feet_z = self.rigid_state[:, self.feet_indices, 2] - self.cfg.asset.default_feet_height
- delta_z = feet_z - self.last_feet_z
- self.feet_height += delta_z
- self.last_feet_z = feet_z
-
- # Compute swing mask
- swing_mask = 1 - self._get_gait_phase()
-
- # feet height should be closed to target feet height at the peak
- rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.02
- rew_pos = torch.sum(rew_pos * swing_mask, dim=1)
- self.feet_height *= ~contact
-
- return rew_pos
-
- def _reward_low_speed(self):
- """Rewards or penalizes the robot based on its speed relative to the commanded speed.
- This function checks if the robot is moving too slow, too fast, or at the desired speed,
- and if the movement direction matches the command.
- """
- # Calculate the absolute value of speed and command for comparison
- absolute_speed = torch.abs(self.base_lin_vel[:, 0])
- absolute_command = torch.abs(self.commands[:, 0])
-
- # Define speed criteria for desired range
- speed_too_low = absolute_speed < 0.5 * absolute_command
- speed_too_high = absolute_speed > 1.2 * absolute_command
- speed_desired = ~(speed_too_low | speed_too_high)
-
- # Check if the speed and command directions are mismatched
- sign_mismatch = torch.sign(self.base_lin_vel[:, 0]) != torch.sign(self.commands[:, 0])
-
- # Initialize reward tensor
- reward = torch.zeros_like(self.base_lin_vel[:, 0])
-
- # Assign rewards based on conditions
- # Speed too low
- reward[speed_too_low] = -1.0
- # Speed too high
- reward[speed_too_high] = 0.0
- # Speed within desired range
- reward[speed_desired] = 1.2
- # Sign mismatch has the highest priority
- reward[sign_mismatch] = -2.0
- return reward * (self.commands[:, 0].abs() > 0.1)
-
- def _reward_torques(self):
- """Penalizes the use of high torques in the robot's joints. Encourages efficient movement by minimizing
- the necessary force exerted by the motors.
- """
- return torch.sum(torch.square(self.torques), dim=1)
-
- def _reward_dof_vel(self):
- """Penalizes high velocities at the degrees of freedom (DOF) of the robot. This encourages smoother and
- more controlled movements.
- """
- return torch.sum(torch.square(self.dof_vel), dim=1)
-
- def _reward_dof_acc(self):
- """Penalizes high accelerations at the robot's degrees of freedom (DOF). This is important for ensuring
- smooth and stable motion, reducing wear on the robot's mechanical parts.
- """
- return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1)
-
- def _reward_collision(self):
- """Penalizes collisions of the robot with the environment, specifically focusing on selected body parts.
- This encourages the robot to avoid undesired contact with objects or surfaces.
- """
- return torch.sum(
- 1.0 * (torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1),
- dim=1,
- )
-
- def _reward_action_smoothness(self):
- """Encourages smoothness in the robot's actions by penalizing large differences between consecutive actions.
- This is important for achieving fluid motion and reducing mechanical stress.
- """
- term_1 = torch.sum(torch.square(self.last_actions - self.actions), dim=1)
- term_2 = torch.sum(
- torch.square(self.actions + self.last_last_actions - 2 * self.last_actions),
- dim=1,
- )
- term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1)
- return term_1 + term_2 + term_3
diff --git a/sim/envs/humanoids/stompypro_config.py b/sim/envs/humanoids/stompypro_config.py
deleted file mode 100644
index 714e15c2..00000000
--- a/sim/envs/humanoids/stompypro_config.py
+++ /dev/null
@@ -1,295 +0,0 @@
-"""Defines the environment configuration for the Getting up task"""
-
-from sim.env import robot_urdf_path
-from sim.envs.base.legged_robot_config import ( # type: ignore
- LeggedRobotCfg,
- LeggedRobotCfgPPO,
-)
-from sim.resources.stompypro.joints import Robot
-
-NUM_JOINTS = len(Robot.all_joints())
-
-
-class StompyProCfg(LeggedRobotCfg):
- """Configuration class for the Legs humanoid robot."""
-
- class env(LeggedRobotCfg.env):
- # change the observation dim
- frame_stack = 15
- c_frame_stack = 3
- num_single_obs = 11 + NUM_JOINTS * 3
- num_observations = int(frame_stack * num_single_obs)
- single_num_privileged_obs = 25 + NUM_JOINTS * 4
- num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)
- num_actions = NUM_JOINTS
- num_envs = 4096
- episode_length_s = 24 # episode length in seconds
- use_ref_actions = False
-
- class safety(LeggedRobotCfg.safety):
- # safety factors
- pos_limit = 1.0
- vel_limit = 1.0
- torque_limit = 0.85
-
- class asset(LeggedRobotCfg.asset):
- name = "stompypro"
-
- file = str(robot_urdf_path(name))
-
- foot_name = ["L_foot", "R_foot"]
- knee_name = ["L_calf", "R_calf"]
- imu_name = "imu_link"
-
- termination_height = 0.2
- default_feet_height = 0.0
-
- penalize_contacts_on = []
- self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter
- flip_visual_attachments = False
- replace_cylinder_with_capsule = False
- fix_base_link = False
-
- class terrain(LeggedRobotCfg.terrain):
- # mesh_type = "plane"
- mesh_type = "trimesh"
- curriculum = False
- # rough terrain only:
- measure_heights = False
- static_friction = 0.6
- dynamic_friction = 0.6
- terrain_length = 8.0
- terrain_width = 8.0
- num_rows = 10 # number of terrain rows (levels)
- num_cols = 10 # number of terrain cols (types)
- max_init_terrain_level = 10 # starting curriculum state
- # plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down
- terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0]
- restitution = 0.0
-
- class noise:
- add_noise = True
- noise_level = 0.6 # scales other values
-
- class noise_scales:
- dof_pos = 0.05
- dof_vel = 0.5
- ang_vel = 0.1
- lin_vel = 0.05
- quat = 0.03
- height_measurements = 0.1
-
- class init_state(LeggedRobotCfg.init_state):
- pos = [0.0, 0.0, Robot.height]
- rot = Robot.rotation
- default_joint_angles = {k: 0.0 for k in Robot.all_joints()}
-
- default_positions = Robot.default_standing()
- for joint in default_positions:
- default_joint_angles[joint] = default_positions[joint]
-
- class control(LeggedRobotCfg.control):
- # PD Drive parameters:
- stiffness = Robot.stiffness()
- damping = Robot.damping()
- # action scale: target angle = actionScale * action + defaultAngle
- action_scale = 0.25
- # decimation: Number of control action updates @ sim DT per policy DT
- decimation = 20 # 100hz
-
- class sim(LeggedRobotCfg.sim):
- dt = 0.001 # 1000 Hz
- substeps = 1 # 2
- up_axis = 1 # 0 is y, 1 is z
-
- class physx(LeggedRobotCfg.sim.physx):
- num_threads = 10
- solver_type = 1 # 0: pgs, 1: tgs
- num_position_iterations = 4
- num_velocity_iterations = 1
- contact_offset = 0.01 # [m]
- rest_offset = 0.0 # [m]
- bounce_threshold_velocity = 0.1 # [m/s]
- max_depenetration_velocity = 1.0
- max_gpu_contact_pairs = 2**23 # 2**24 -> needed for 8000 envs and more
- default_buffer_size_multiplier = 5
- # 0: never, 1: last sub-step, 2: all sub-steps (default=2)
- contact_collection = 2
-
- class domain_rand(LeggedRobotCfg.domain_rand):
- randomize_friction = True
- friction_range = [0.1, 2.0]
-
- randomize_base_mass = True
- added_mass_range = [-5.0, 5.0]
- push_robots = True
- push_interval_s = 4
- max_push_vel_xy = 0.2
- max_push_ang_vel = 0.4
- # dynamic randomization
- action_noise = 0.02
- action_delay = 0.5
-
- class commands(LeggedRobotCfg.commands):
- # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
- num_commands = 4
- resampling_time = 8.0 # time before command are changed[s]
- heading_command = True # if true: compute ang vel command from heading error
-
- class ranges:
- lin_vel_x = [-0.3, 0.6] # min max [m/s]
- lin_vel_y = [-0.3, 0.3] # min max [m/s]
- ang_vel_yaw = [-0.3, 0.3] # min max [rad/s]
- heading = [-3.14, 3.14]
-
- # a - normal
- # b - negate target_join_pos_scale (-0.14)
- class rewards:
- # quite important to keep it right
- base_height_target = 0.63
- min_dist = 0.2
- max_dist = 0.5
- # put some settings here for LLM parameter tuning
- target_joint_pos_scale = 0.17 # rad
- target_feet_height = 0.06 # m
-
- cycle_time = 0.4 # sec
- # if true negative total rewards are clipped at zero (avoids early termination problems)
- only_positive_rewards = True
- # tracking reward = exp(error*sigma)
- tracking_sigma = 5.0
- max_contact_force = 400 # forces above this value are penalized
-
- class scales:
- # reference motion tracking
- joint_pos = 1.9
- feet_clearance = 1.7
- feet_contact_number = 1.7
- # gait
- feet_air_time = 1.6
- foot_slip = -0.05
- feet_distance = 0.2
- knee_distance = 0.2
- # contact
- feet_contact_forces = -0.01
- # vel tracking
- tracking_lin_vel = 1.0
- tracking_ang_vel = 1.0
- vel_mismatch_exp = 0.5 # lin_z; ang x,y
- low_speed = 0.2
- track_vel_hard = 0.5
-
- # base pos
- default_joint_pos = 0.5
- orientation = 1
- base_height = 0.2
- base_acc = 0.2
- # energy
- action_smoothness = -0.002
- torques = -1e-5
- dof_vel = -5e-4
- dof_acc = -1e-7
- collision = -1.0
-
- # reference motion tracking
- joint_pos = 1.6
- feet_clearance = 1.6
- feet_contact_number = 1.2
- # gait
- feet_air_time = 1.6
- foot_slip = -0.05
- feet_distance = 0.2
- knee_distance = 0.2
- # contact
- feet_contact_forces = -0.01
- # vel tracking
- tracking_lin_vel = 1.2
- tracking_ang_vel = 1.1
- vel_mismatch_exp = 0.5 # lin_z; ang x,y
- low_speed = 0.2
- track_vel_hard = 0.5
-
- class normalization:
- class obs_scales:
- lin_vel = 2.0
- ang_vel = 1.0
- dof_pos = 1.0
- dof_vel = 0.05
- quat = 1.0
- height_measurements = 5.0
-
- clip_observations = 18.0
- clip_actions = 18.0
-
- class viewer:
- ref_env = 0
- pos = [4, -4, 2]
- lookat = [0, -2, 0]
-
-
-class StompyProStandingCfg(StompyProCfg):
- """Configuration class for the Stompy Pro humanoid robot."""
-
- # a - normal
- # b - negate target_join_pos_scale (-0.14)
- class rewards:
- # quite important to keep it right
- base_height_target = 0.63
- min_dist = 0.2
- max_dist = 0.4
- # put some settings here for LLM parameter tuning
- target_joint_pos_scale = 0.14 # rad
- target_feet_height = 0.05 # m
- cycle_time = 0.5 # sec
- # if true negative total rewards are clipped at zero (avoids early termination problems)
- only_positive_rewards = True
- # tracking reward = exp(error*sigma)
- tracking_sigma = 5
- max_contact_force = 500 # forces above this value are penalized
-
- class scales:
- # base pos
- default_joint_pos = 1.0
- orientation = 1
- base_height = 0.2
- base_acc = 0.2
- # energy
- action_smoothness = -0.002
- torques = -1e-5
- dof_vel = -5e-4
- dof_acc = -1e-7
- collision = -1.0
-
-
-class StompyProCfgPPO(LeggedRobotCfgPPO):
- seed = 5
- runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner
-
- class policy:
- init_noise_std = 1.0
- actor_hidden_dims = [512, 256, 128]
- critic_hidden_dims = [768, 256, 128]
-
- class algorithm(LeggedRobotCfgPPO.algorithm):
- entropy_coef = 0.001
- learning_rate = 1e-5
- num_learning_epochs = 2
- gamma = 0.994
- lam = 0.9
- num_mini_batches = 4
-
- class runner:
- policy_class_name = "ActorCritic"
- algorithm_class_name = "PPO"
- num_steps_per_env = 60 # per iteration
- max_iterations = 3001 # number of policy updates
-
- # logging
- save_interval = 100 # check for potential saves every this many iterations
- experiment_name = "StompyPro"
- run_name = ""
- # load and resume
- resume = False
- load_run = -1 # -1 = last run
- checkpoint = -1 # -1 = last saved model
- resume_path = None # updated from load_run and chkpt
diff --git a/sim/model_export.py b/sim/model_export.py
index 51a812c5..c5184817 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -15,7 +15,7 @@
@dataclass
class ActorCfg:
- embodiment: str = "stompypro"
+ embodiment: str = "gpr"
cycle_time: float = 0.4 # Cycle time for sinusoidal command input
action_scale: float = 0.25 # Scale for actions
lin_vel_scale: float = 2.0 # Scale for linear velocity
@@ -252,6 +252,55 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
}, input_tensors
+def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, Tuple[Tensor, ...]]:
+ all_weights = torch.load(model_path, map_location="cpu", weights_only=True)
+ weights = all_weights["model_state_dict"]
+ num_actor_obs = weights["actor.0.weight"].shape[1]
+ num_critic_obs = weights["critic.0.weight"].shape[1]
+ num_actions = weights["std"].shape[0]
+ actor_hidden_dims = [v.shape[0] for k, v in weights.items() if re.match(r"actor\.\d+\.weight", k)]
+ critic_hidden_dims = [v.shape[0] for k, v in weights.items() if re.match(r"critic\.\d+\.weight", k)]
+ actor_hidden_dims = actor_hidden_dims[:-1]
+ critic_hidden_dims = critic_hidden_dims[:-1]
+
+ ac_model = ActorCritic(num_actor_obs, num_critic_obs, num_actions, actor_hidden_dims, critic_hidden_dims)
+ ac_model.load_state_dict(weights)
+
+ a_model = Actor(ac_model.actor, cfg)
+
+ # Gets the model input tensors.
+ x_vel = torch.randn(1)
+ y_vel = torch.randn(1)
+ rot = torch.randn(1)
+ t = torch.randn(1)
+ dof_pos = torch.randn(a_model.num_actions)
+ dof_vel = torch.randn(a_model.num_actions)
+ prev_actions = torch.randn(a_model.num_actions)
+ imu_ang_vel = torch.randn(3)
+ imu_euler_xyz = torch.randn(3)
+ buffer = a_model.get_init_buffer()
+ input_tensors = (x_vel, y_vel, rot, t, dof_pos, dof_vel, prev_actions, imu_ang_vel, imu_euler_xyz, buffer)
+
+ jit_model = torch.jit.script(a_model)
+
+ # Add sim2sim metadata
+ robot_effort = list(a_model.robot.effort().values())
+ robot_stiffness = list(a_model.robot.stiffness().values())
+ robot_damping = list(a_model.robot.damping().values())
+ default_standing = list(a_model.robot.default_standing().values())
+ num_actions = a_model.num_actions
+ num_observations = a_model.num_observations
+
+ return a_model, {
+ "robot_effort": robot_effort,
+ "robot_stiffness": robot_stiffness,
+ "robot_damping": robot_damping,
+ "default_standing": default_standing,
+ "num_actions": num_actions,
+ "num_observations": num_observations,
+ }, input_tensors
+
+
def convert_model_to_onnx(model_path: str, cfg: ActorCfg, save_path: Optional[str] = None) -> ort.InferenceSession:
"""Converts a PyTorch model to a ONNX format.
diff --git a/sim/play.py b/sim/play.py
index 28587bf2..4881d6ba 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -2,7 +2,7 @@
"""Play a trained policy in the environment.
Run:
- python sim/play.py --task stompypro --log_h5
+ python sim/play.py --task gpr --log_h5
"""
import argparse
import copy
@@ -91,14 +91,23 @@ def play(args: argparse.Namespace) -> None:
# export policy as a onnx module (used to run it on web)
if args.export_onnx:
- path = ppo_runner.alg.actor_critic
- policy_cfg = ActorCfg()
+ path = ppo_runner.load_path
+ embodiment = ppo_runner.cfg['experiment_name'].lower()
+ policy_cfg = ActorCfg(embodiment=embodiment)
+
+ if embodiment == "stompypro":
+ policy_cfg.cycle_time = 0.4
+ elif embodiment == "stompymicro":
+ policy_cfg.cycle_time = 0.2
+ else:
+ print(f"Specific policy cfg for {embodiment} not implemented")
+
actor_model, sim2sim_info, input_tensors = get_actor_policy(path, policy_cfg)
# Merge policy_cfg and sim2sim_info into a single config object
export_config = {**vars(policy_cfg), **sim2sim_info}
- policy = export_to_onnx(
+ export_to_onnx(
actor_model,
input_tensors=input_tensors,
config=export_config,
diff --git a/sim/play_old.py b/sim/play_old.py
new file mode 100755
index 00000000..d842fb83
--- /dev/null
+++ b/sim/play_old.py
@@ -0,0 +1,243 @@
+# mypy: ignore-errors
+"""Play a trained policy in the environment.
+
+Run:
+ python sim/play.py --task g1 --log_h5
+ python sim/play.py --task stompymini --log_h5
+"""
+import argparse
+import copy
+import logging
+import os
+from datetime import datetime
+from typing import Any, Union
+
+import cv2
+import h5py
+import numpy as np
+from isaacgym import gymapi
+from tqdm import tqdm
+
+logger = logging.getLogger(__name__)
+
+from sim.env import run_dir # noqa: E402
+from sim.envs import task_registry # noqa: E402
+from sim.model_export import ActorCfg, convert_model_to_onnx # noqa: E402
+from sim.utils.helpers import get_args # noqa: E402
+from sim.utils.logger import Logger # noqa: E402
+
+import torch # isort: skip
+
+
+def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None:
+ os.makedirs(path, exist_ok=True)
+ path = os.path.join(path, "policy_1.pt")
+ model = copy.deepcopy(actor_critic.actor).to("cpu")
+ traced_script_module = torch.jit.script(model)
+ traced_script_module.save(path)
+
+
+def play(args: argparse.Namespace) -> None:
+ logger.info("Configuring environment and training settings...")
+ env_cfg, train_cfg = task_registry.get_cfgs(name=args.task)
+
+ # override some parameters for testing
+ env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1)
+ env_cfg.sim.max_gpu_contact_pairs = 2**10
+ if args.trimesh:
+ env_cfg.terrain.mesh_type = "trimesh"
+ else:
+ env_cfg.terrain.mesh_type = "plane"
+ env_cfg.terrain.num_rows = 5
+ env_cfg.terrain.num_cols = 5
+ env_cfg.terrain.curriculum = False
+ env_cfg.terrain.max_init_terrain_level = 5
+ env_cfg.noise.add_noise = True
+ env_cfg.domain_rand.push_robots = False
+ env_cfg.domain_rand.joint_angle_noise = 0.01
+ env_cfg.noise.curriculum = False
+ env_cfg.noise.noise_level = 0.5
+
+ train_cfg.seed = 123145
+ logger.info("train_cfg.runner_class_name: %s", train_cfg.runner_class_name)
+
+ # prepare environment
+ env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg)
+ env.set_camera(env_cfg.viewer.pos, env_cfg.viewer.lookat)
+
+ obs = env.get_observations()
+
+ # load policy
+ train_cfg.runner.resume = True
+ ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg)
+ policy = ppo_runner.get_inference_policy(device=env.device)
+
+ # # Export policy if needed
+ # if args.export_policy:
+ # path = os.path.join(".")
+ # export_policy_as_jit(ppo_runner.alg.actor_critic, path)
+ # print("Exported policy as jit script to: ", path)
+
+ # export policy as a onnx module (used to run it on web)
+ # if args.export_onnx:
+ # path = ppo_runner.alg.actor_critic
+ # convert_model_to_onnx(path, ActorCfg(), save_path="policy.onnx")
+ # print("Exported policy as onnx to: ", path)
+
+ # Prepare for logging
+ env_logger = Logger(env.dt)
+ robot_index = 0
+ joint_index = 1
+ stop_state_log = 1000
+ now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+ if args.log_h5:
+ h5_file = h5py.File(f"data{now}.h5", "w")
+
+ # Create dataset for actions
+ max_timesteps = stop_state_log
+ num_dof = env.num_dof
+ dset_actions = h5_file.create_dataset("actions", (max_timesteps, num_dof), dtype=np.float32)
+
+ # Create dataset of observations
+ buf_len = len(env.obs_history) # length of observation buffer
+ dset_2D_command = h5_file.create_dataset(
+ "observations/2D_command", (max_timesteps, buf_len, 2), dtype=np.float32
+ ) # sin and cos commands
+ dset_3D_command = h5_file.create_dataset(
+ "observations/3D_command", (max_timesteps, buf_len, 3), dtype=np.float32
+ ) # x, y, yaw commands
+ dset_q = h5_file.create_dataset(
+ "observations/q", (max_timesteps, buf_len, num_dof), dtype=np.float32
+ ) # joint positions
+ dset_dq = h5_file.create_dataset(
+ "observations/dq", (max_timesteps, buf_len, num_dof), dtype=np.float32
+ ) # joint velocities
+ dset_obs_actions = h5_file.create_dataset(
+ "observations/actions", (max_timesteps, buf_len, num_dof), dtype=np.float32
+ ) # actions
+ dset_ang_vel = h5_file.create_dataset(
+ "observations/ang_vel", (max_timesteps, buf_len, 3), dtype=np.float32
+ ) # root angular velocity
+ dset_euler = h5_file.create_dataset(
+ "observations/euler", (max_timesteps, buf_len, 3), dtype=np.float32
+ ) # root orientation
+
+ if args.render:
+ camera_properties = gymapi.CameraProperties()
+ camera_properties.width = 1920
+ camera_properties.height = 1080
+ h1 = env.gym.create_camera_sensor(env.envs[0], camera_properties)
+ camera_offset = gymapi.Vec3(3, -3, 1)
+ camera_rotation = gymapi.Quat.from_axis_angle(gymapi.Vec3(-0.3, 0.2, 1), np.deg2rad(135))
+ actor_handle = env.gym.get_actor_handle(env.envs[0], 0)
+ body_handle = env.gym.get_actor_rigid_body_handle(env.envs[0], actor_handle, 0)
+ logger.info("body_handle: %s", body_handle)
+ logger.info("actor_handle: %s", actor_handle)
+ env.gym.attach_camera_to_body(
+ h1, env.envs[0], body_handle, gymapi.Transform(camera_offset, camera_rotation), gymapi.FOLLOW_POSITION
+ )
+
+ fourcc = cv2.VideoWriter_fourcc(*"MJPG") # type: ignore[attr-defined]
+
+ # Creates a directory to store videos.
+ video_dir = run_dir() / "videos"
+ experiment_dir = video_dir / train_cfg.runner.experiment_name
+ experiment_dir.mkdir(parents=True, exist_ok=True)
+
+ dir = os.path.join(experiment_dir, now + str(args.run_name) + ".mp4")
+ if not os.path.exists(video_dir):
+ os.mkdir(video_dir)
+ if not os.path.exists(experiment_dir):
+ os.mkdir(experiment_dir)
+ video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080))
+
+ for t in tqdm(range(stop_state_log)):
+ actions = policy(obs.detach())
+ if args.log_h5:
+ dset_actions[t] = actions.detach().numpy()
+ if args.fix_command:
+ env.commands[:, 0] = 0.5
+ env.commands[:, 1] = 0.0
+ env.commands[:, 2] = 0.0
+ env.commands[:, 3] = 0.0
+ obs, critic_obs, rews, dones, infos = env.step(actions.detach())
+ print(f"IMU: {obs[0, (3 * env.num_actions + 5) + 3 : (3 * env.num_actions + 5) + 2 * 3]}")
+
+ if args.render:
+ env.gym.fetch_results(env.sim, True)
+ env.gym.step_graphics(env.sim)
+ env.gym.render_all_camera_sensors(env.sim)
+ img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR)
+ img = np.reshape(img, (1080, 1920, 4))
+ img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
+
+ video.write(img[..., :3])
+
+ # Log states
+ dof_pos_target = actions[robot_index, joint_index].item() * env.cfg.control.action_scale
+ dof_pos = env.dof_pos[robot_index, joint_index].item()
+ dof_vel = env.dof_vel[robot_index, joint_index].item()
+ dof_torque = env.torques[robot_index, joint_index].item()
+ command_x = env.commands[robot_index, 0].item()
+ command_y = env.commands[robot_index, 1].item()
+ command_yaw = env.commands[robot_index, 2].item()
+ base_vel_x = env.base_lin_vel[robot_index, 0].item()
+ base_vel_y = env.base_lin_vel[robot_index, 1].item()
+ base_vel_z = env.base_lin_vel[robot_index, 2].item()
+ base_vel_yaw = env.base_ang_vel[robot_index, 2].item()
+ contact_forces_z = env.contact_forces[robot_index, env.feet_indices, 2].cpu().numpy()
+
+ if args.log_h5:
+ for i in range(buf_len):
+ cur_obs = env.obs_history[i].tolist()[0]
+ dset_2D_command[t, i] = cur_obs[0:2] # sin and cos commands
+ dset_3D_command[t, i] = cur_obs[2:5] # x, y, yaw commands
+ dset_q[t, i] = cur_obs[5 : 5 + num_dof] # joint positions
+ dset_dq[t, i] = cur_obs[5 + num_dof : 5 + 2 * num_dof] # joint velocities
+ dset_obs_actions[t, i] = cur_obs[5 + 2 * num_dof : 5 + 3 * num_dof] # actions
+ dset_ang_vel[t, i] = cur_obs[5 + 3 * num_dof : 8 + 3 * num_dof] # root angular velocity
+ dset_euler[t, i] = cur_obs[8 + 3 * num_dof : 11 + 3 * num_dof] # root orientation
+
+ env_logger.log_states(
+ {
+ "dof_pos_target": dof_pos_target,
+ "dof_pos": dof_pos,
+ "dof_vel": dof_vel,
+ "dof_torque": dof_torque,
+ "command_x": command_x,
+ "command_y": command_y,
+ "command_yaw": command_yaw,
+ "base_vel_x": base_vel_x,
+ "base_vel_y": base_vel_y,
+ "base_vel_z": base_vel_z,
+ "base_vel_yaw": base_vel_yaw,
+ "contact_forces_z": contact_forces_z,
+ }
+ )
+ if infos["episode"]:
+ num_episodes = env.reset_buf.sum().item()
+ if num_episodes > 0:
+ env_logger.log_rewards(infos["episode"], num_episodes)
+
+ env_logger.print_rewards()
+ env_logger.plot_states()
+
+ if args.render:
+ video.release()
+
+ if args.log_h5:
+ print("Saving data to " + os.path.abspath(f"data{now}.h5"))
+ h5_file.close()
+
+
+if __name__ == "__main__":
+ base_args = get_args()
+ parser = argparse.ArgumentParser(description="Extend base arguments with log_h5")
+ parser.add_argument("--log_h5", action="store_true", help="Enable HDF5 logging")
+ parser.add_argument("--render", action="store_true", help="Enable rendering", default=True)
+ parser.add_argument("--fix_command", action="store_true", help="Fix command", default=True)
+ parser.add_argument("--export_onnx", action="store_true", help="Export policy as ONNX", default=True)
+ parser.add_argument("--export_policy", action="store_true", help="Export policy as JIT", default=True)
+ args, unknown = parser.parse_known_args(namespace=base_args)
+
+ play(args)
diff --git a/sim/requirements.txt b/sim/requirements.txt
index 8a025bea..ff8a57a2 100755
--- a/sim/requirements.txt
+++ b/sim/requirements.txt
@@ -10,6 +10,7 @@ numpy<2.0.0
wandb
tensorboard==2.14.0
onnxscript
+mujoco==2.3.6
# onnxruntime
kinfer
diff --git a/sim/resources/gpr/README.md b/sim/resources/gpr/README.md
new file mode 100644
index 00000000..58ff4a70
--- /dev/null
+++ b/sim/resources/gpr/README.md
@@ -0,0 +1,13 @@
+# URDF/XML development:
+1. kol run https://cad.onshape.com/documents/bc3a557e2f92bdcea4099f0d/w/09713ac603d461fc1dff6b5d/e/5a4acdbffe6f8ed7c4e34970 --config-path config_example.json # noqa: E501
+2. python sim/scripts/create_fixed_torso.py
+3. Rotate first link
+
+
+
+
+
+
+4. Fix left leg axes to align with expected directions
+5. urf2mjcf robot.urdf -o robot.xml
+
\ No newline at end of file
diff --git a/sim/resources/stompymini/__init__.py b/sim/resources/gpr/__init__.py
similarity index 100%
rename from sim/resources/stompymini/__init__.py
rename to sim/resources/gpr/__init__.py
diff --git a/sim/resources/stompypro/joints.py b/sim/resources/gpr/joints.py
similarity index 75%
rename from sim/resources/stompypro/joints.py
rename to sim/resources/gpr/joints.py
index 27c6c2c6..d1f966eb 100755
--- a/sim/resources/stompypro/joints.py
+++ b/sim/resources/gpr/joints.py
@@ -1,9 +1,4 @@
-"""Defines a more Pythonic interface for specifying the joint names.
-
-The best way to re-generate this snippet for a new robot is to use the
-`sim/scripts/print_joints.py` script. This script will print out a hierarchical
-tree of the various joint names in the robot.
-"""
+"""Defines a more Pythonic interface for specifying the joint names."""
import textwrap
from abc import ABC
@@ -96,8 +91,23 @@ class Robot(Node):
legs = Legs()
# arms = Arms()
- height = 0.63
- rotation = [0.0, 0.0, 0, 1]
+ height = 1.06
+ rotation = [0, 0, 0, 1]
+
+ @classmethod
+ def isaac_to_real_signs(cls) -> Dict[str, int]:
+ return {
+ Robot.legs.left.hip_pitch: 1,
+ Robot.legs.left.hip_yaw: 1,
+ Robot.legs.left.hip_roll: 1,
+ Robot.legs.left.knee_pitch: 1,
+ Robot.legs.left.ankle_pitch: 1,
+ Robot.legs.right.hip_pitch: -1,
+ Robot.legs.right.hip_yaw: -1,
+ Robot.legs.right.hip_roll: 1,
+ Robot.legs.right.knee_pitch: -1,
+ Robot.legs.right.ankle_pitch: 1,
+ }
@classmethod
def isaac_to_mujoco_signs(cls) -> Dict[str, int]:
@@ -118,37 +128,39 @@ def isaac_to_mujoco_signs(cls) -> Dict[str, int]:
def default_positions(cls) -> Dict[str, float]:
return {}
- """This should be ordered according to how the policy is trained. E.g. the first
- entry should be the angle of the first joint in the policy."""
-
+ # CONTRACT - this should be ordered according to how the policy is trained.
+ # E.g. the first entry should be the angle of the first joint in the policy.
@classmethod
def default_standing(cls) -> Dict[str, float]:
return {
Robot.legs.left.hip_pitch: -0.23,
Robot.legs.left.hip_yaw: 0.0,
Robot.legs.left.hip_roll: 0.0,
- Robot.legs.left.knee_pitch: 0.441,
- Robot.legs.left.ankle_pitch: -0.258,
- Robot.legs.right.hip_pitch: -0.23,
+ Robot.legs.left.knee_pitch: -0.441,
+ Robot.legs.left.ankle_pitch: -0.195,
+ Robot.legs.right.hip_pitch: 0.23,
Robot.legs.right.hip_yaw: 0.0,
Robot.legs.right.hip_roll: 0.0,
Robot.legs.right.knee_pitch: 0.441,
- Robot.legs.right.ankle_pitch: -0.258,
+ Robot.legs.right.ankle_pitch: 0.195,
}
@classmethod
def default_limits(cls) -> Dict[str, Dict[str, float]]:
- return {}
+ return {
+ Robot.legs.left.knee_pitch: {"lower": -1.57, "upper": 0},
+ Robot.legs.right.knee_pitch: {"lower": 0, "upper": 1.57}
+ }
# p_gains
@classmethod
def stiffness(cls) -> Dict[str, float]:
return {
- "hip_y": 300,
- "hip_x": 200,
- "hip_z": 200,
- "knee": 300,
- "ankle_y": 300,
+ "hip_y": 120,
+ "hip_x": 60,
+ "hip_z": 60,
+ "knee": 120,
+ "ankle_y": 17,
}
# d_gains
@@ -156,23 +168,22 @@ def stiffness(cls) -> Dict[str, float]:
def damping(cls) -> Dict[str, float]:
return {
"hip_y": 10,
- "hip_x": 10,
- "hip_z": 10,
+ "hip_x": 5,
+ "hip_z": 5,
"knee": 10,
- "ankle_y": 10,
+ "ankle_y": 5,
}
# # pos_limits
@classmethod
def effort(cls) -> Dict[str, float]:
return {
- "hip_y": 250,
+ "hip_y": 120,
"hip_x": 100,
"hip_z": 100,
- "knee": 250,
- "ankle_y": 17,
+ "knee": 120,
+ "ankle_y": 50,
}
-
# # vel_limits
@classmethod
def velocity(cls) -> Dict[str, float]:
@@ -196,4 +207,4 @@ def print_joints() -> None:
if __name__ == "__main__":
# python -m sim.Robot.joints
- print_joints()
+ print_joints()
\ No newline at end of file
diff --git a/sim/resources/gpr/meshes/arm1_top.stl b/sim/resources/gpr/meshes/arm1_top.stl
new file mode 100644
index 00000000..d3937233
Binary files /dev/null and b/sim/resources/gpr/meshes/arm1_top.stl differ
diff --git a/sim/resources/gpr/meshes/arm1_top_2.stl b/sim/resources/gpr/meshes/arm1_top_2.stl
new file mode 100644
index 00000000..20808d7c
Binary files /dev/null and b/sim/resources/gpr/meshes/arm1_top_2.stl differ
diff --git a/sim/resources/gpr/meshes/arm2_shell.stl b/sim/resources/gpr/meshes/arm2_shell.stl
new file mode 100644
index 00000000..d095688d
Binary files /dev/null and b/sim/resources/gpr/meshes/arm2_shell.stl differ
diff --git a/sim/resources/gpr/meshes/arm2_shell_2.stl b/sim/resources/gpr/meshes/arm2_shell_2.stl
new file mode 100644
index 00000000..98302751
Binary files /dev/null and b/sim/resources/gpr/meshes/arm2_shell_2.stl differ
diff --git a/sim/resources/gpr/meshes/arm3_shell.stl b/sim/resources/gpr/meshes/arm3_shell.stl
new file mode 100644
index 00000000..dd787c59
Binary files /dev/null and b/sim/resources/gpr/meshes/arm3_shell.stl differ
diff --git a/sim/resources/gpr/meshes/arm3_shell2.stl b/sim/resources/gpr/meshes/arm3_shell2.stl
new file mode 100644
index 00000000..9690a4e5
Binary files /dev/null and b/sim/resources/gpr/meshes/arm3_shell2.stl differ
diff --git a/sim/resources/gpr/meshes/body1-part.stl b/sim/resources/gpr/meshes/body1-part.stl
new file mode 100644
index 00000000..83dc0912
Binary files /dev/null and b/sim/resources/gpr/meshes/body1-part.stl differ
diff --git a/sim/resources/gpr/meshes/foot1.stl b/sim/resources/gpr/meshes/foot1.stl
new file mode 100644
index 00000000..93874859
Binary files /dev/null and b/sim/resources/gpr/meshes/foot1.stl differ
diff --git a/sim/resources/gpr/meshes/foot3.stl b/sim/resources/gpr/meshes/foot3.stl
new file mode 100644
index 00000000..841369df
Binary files /dev/null and b/sim/resources/gpr/meshes/foot3.stl differ
diff --git a/sim/resources/gpr/meshes/hand_shell.stl b/sim/resources/gpr/meshes/hand_shell.stl
new file mode 100644
index 00000000..4aff47dc
Binary files /dev/null and b/sim/resources/gpr/meshes/hand_shell.stl differ
diff --git a/sim/resources/gpr/meshes/hand_shell_2.stl b/sim/resources/gpr/meshes/hand_shell_2.stl
new file mode 100644
index 00000000..4aff47dc
Binary files /dev/null and b/sim/resources/gpr/meshes/hand_shell_2.stl differ
diff --git a/sim/resources/gpr/meshes/leg0_shell.stl b/sim/resources/gpr/meshes/leg0_shell.stl
new file mode 100644
index 00000000..c2a3806e
Binary files /dev/null and b/sim/resources/gpr/meshes/leg0_shell.stl differ
diff --git a/sim/resources/gpr/meshes/leg0_shell_2.stl b/sim/resources/gpr/meshes/leg0_shell_2.stl
new file mode 100644
index 00000000..c2a3806e
Binary files /dev/null and b/sim/resources/gpr/meshes/leg0_shell_2.stl differ
diff --git a/sim/resources/gpr/meshes/leg1_shell.stl b/sim/resources/gpr/meshes/leg1_shell.stl
new file mode 100644
index 00000000..b3b29475
Binary files /dev/null and b/sim/resources/gpr/meshes/leg1_shell.stl differ
diff --git a/sim/resources/gpr/meshes/leg1_shell3.stl b/sim/resources/gpr/meshes/leg1_shell3.stl
new file mode 100644
index 00000000..249d8151
Binary files /dev/null and b/sim/resources/gpr/meshes/leg1_shell3.stl differ
diff --git a/sim/resources/gpr/meshes/leg2_shell.stl b/sim/resources/gpr/meshes/leg2_shell.stl
new file mode 100644
index 00000000..a9e9d2c6
Binary files /dev/null and b/sim/resources/gpr/meshes/leg2_shell.stl differ
diff --git a/sim/resources/gpr/meshes/leg2_shell_2.stl b/sim/resources/gpr/meshes/leg2_shell_2.stl
new file mode 100644
index 00000000..a9e9d2c6
Binary files /dev/null and b/sim/resources/gpr/meshes/leg2_shell_2.stl differ
diff --git a/sim/resources/gpr/meshes/leg3_shell2.stl b/sim/resources/gpr/meshes/leg3_shell2.stl
new file mode 100644
index 00000000..6c8c68aa
Binary files /dev/null and b/sim/resources/gpr/meshes/leg3_shell2.stl differ
diff --git a/sim/resources/gpr/meshes/leg3_shell22.stl b/sim/resources/gpr/meshes/leg3_shell22.stl
new file mode 100644
index 00000000..952779e8
Binary files /dev/null and b/sim/resources/gpr/meshes/leg3_shell22.stl differ
diff --git a/sim/resources/gpr/meshes/shoulder.stl b/sim/resources/gpr/meshes/shoulder.stl
new file mode 100644
index 00000000..90bc3924
Binary files /dev/null and b/sim/resources/gpr/meshes/shoulder.stl differ
diff --git a/sim/resources/gpr/meshes/shoulder_2.stl b/sim/resources/gpr/meshes/shoulder_2.stl
new file mode 100644
index 00000000..59c1316a
Binary files /dev/null and b/sim/resources/gpr/meshes/shoulder_2.stl differ
diff --git a/sim/resources/gpr/robot.urdf b/sim/resources/gpr/robot.urdf
new file mode 100644
index 00000000..46370774
--- /dev/null
+++ b/sim/resources/gpr/robot.urdf
@@ -0,0 +1,616 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/sim/resources/gpr/robot_fixed.urdf b/sim/resources/gpr/robot_fixed.urdf
new file mode 100644
index 00000000..30b11861
--- /dev/null
+++ b/sim/resources/gpr/robot_fixed.urdf
@@ -0,0 +1,616 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/sim/resources/gpr/robot_fixed.xml b/sim/resources/gpr/robot_fixed.xml
new file mode 100644
index 00000000..57330943
--- /dev/null
+++ b/sim/resources/gpr/robot_fixed.xml
@@ -0,0 +1,193 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/sim/resources/stompymini/joints.py b/sim/resources/stompymini/joints.py
deleted file mode 100644
index 9d3a22b6..00000000
--- a/sim/resources/stompymini/joints.py
+++ /dev/null
@@ -1,426 +0,0 @@
-"""Defines a more Pythonic interface for specifying the joint names.
-
-The best way to re-generate this snippet for a new robot is to use the
-`sim/scripts/print_joints.py` script. This script will print out a hierarchical
-tree of the various joint names in the robot.
-"""
-
-import textwrap
-from abc import ABC
-from typing import Dict, List, Tuple, Union
-
-
-class Node(ABC):
- @classmethod
- def children(cls) -> List["Union[Node, str]"]:
- return [
- attr
- for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__"))
- if isinstance(attr, (Node, str))
- ]
-
- @classmethod
- def joints(cls) -> List[str]:
- return [
- attr
- for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__"))
- if isinstance(attr, str)
- ]
-
- @classmethod
- def joints_motors(cls) -> List[Tuple[str, str]]:
- joint_names: List[Tuple[str, str]] = []
- for attr in dir(cls):
- if not attr.startswith("__"):
- attr2 = getattr(cls, attr)
- if isinstance(attr2, str):
- joint_names.append((attr, attr2))
-
- return joint_names
-
- @classmethod
- def all_joints(cls) -> List[str]:
- leaves = cls.joints()
- for child in cls.children():
- if isinstance(child, Node):
- leaves.extend(child.all_joints())
- return leaves
-
- def __str__(self) -> str:
- parts = [str(child) for child in self.children()]
- parts_str = textwrap.indent("\n" + "\n".join(parts), " ")
- return f"[{self.__class__.__name__}]{parts_str}"
-
-
-class LeftHand(Node):
- wrist_roll = "left hand roll"
- gripper = "left hand gripper"
-
-
-class LeftArm(Node):
- shoulder_yaw = "left shoulder yaw"
- shoulder_pitch = "left shoulder pitch"
- shoulder_roll = "left shoulder roll"
- elbow_pitch = "left elbow pitch"
- hand = LeftHand()
-
-
-class RightHand(Node):
- wrist_roll = "right hand roll"
- gripper = "right hand gripper"
-
-
-class RightArm(Node):
- shoulder_yaw = "right shoulder yaw"
- shoulder_pitch = "right shoulder pitch"
- shoulder_roll = "right shoulder roll"
- elbow_pitch = "right elbow pitch"
- hand = RightHand()
-
-
-class LeftLeg(Node):
- hip_roll = "left hip roll"
- hip_yaw = "left hip yaw"
- hip_pitch = "left hip pitch"
- knee_pitch = "left knee pitch"
- ankle_pitch = "left ankle pitch"
-
-
-class RightLeg(Node):
- hip_roll = "right hip roll"
- hip_yaw = "right hip yaw"
- hip_pitch = "right hip pitch"
- knee_pitch = "right knee pitch"
- ankle_pitch = "right ankle pitch"
-
-
-class Legs(Node):
- left = LeftLeg()
- right = RightLeg()
-
-
-class Robot(Node):
- height = 0.92
- rotation = [0.5000, -0.4996, -0.5000, 0.5004]
- collision_links = [
- "lower_half_assembly_1_left_leg_1_foot_pad_1_simple",
- "lower_half_assembly_1_right_leg_1_foot_pad_1_simple",
- ]
-
- left_arm = LeftArm()
- right_arm = RightArm()
- legs = Legs()
-
- @classmethod
- def default_standing(cls) -> Dict[str, float]:
- return {
- # arms
- Robot.left_arm.shoulder_pitch: 2.25,
- Robot.left_arm.shoulder_yaw: 1.57,
- Robot.left_arm.shoulder_roll: 3.14,
- Robot.left_arm.elbow_pitch: -1.61,
- Robot.left_arm.hand.wrist_roll: -1.56,
- Robot.left_arm.hand.gripper: -0.1,
- Robot.right_arm.shoulder_pitch: 3.45,
- Robot.right_arm.shoulder_yaw: -0.1,
- Robot.right_arm.shoulder_roll: -1.61,
- Robot.right_arm.elbow_pitch: -1.49,
- Robot.right_arm.hand.wrist_roll: 0,
- Robot.right_arm.hand.gripper: 0.1,
- # legs
- Robot.legs.left.hip_pitch: 0.33,
- Robot.legs.left.hip_roll: -1.52,
- Robot.legs.left.hip_yaw: 4.67,
- Robot.legs.left.knee_pitch: -0.61,
- Robot.legs.left.ankle_pitch: 1.88,
- Robot.legs.right.hip_pitch: 2.91,
- Robot.legs.right.hip_roll: 3.24,
- Robot.legs.right.hip_yaw: 3.22,
- Robot.legs.right.knee_pitch: 0.65,
- Robot.legs.right.ankle_pitch: -0.54,
- }
-
- @classmethod
- def default_limits2(cls) -> Dict[str, Dict[str, float]]:
- return {
- # left arm
- Robot.left_arm.shoulder_pitch: {
- "lower": 2.54,
- "upper": 2.56,
- },
- Robot.left_arm.shoulder_yaw: {
- "lower": 1.56,
- "upper": 1.58,
- },
- Robot.left_arm.shoulder_roll: {
- "lower": 3.13,
- "upper": 3.14,
- },
- Robot.left_arm.elbow_pitch: {
- "lower": -1.56,
- "upper": -1.58,
- },
- Robot.left_arm.hand.wrist_roll: {
- "lower": -1.56,
- "upper": -1.58,
- },
- Robot.left_arm.hand.gripper: {
- "lower": 0,
- "upper": 1.57,
- },
- # right arm
- Robot.right_arm.shoulder_pitch: {
- "lower": 3.119,
- "upper": 3.121,
- },
- Robot.right_arm.shoulder_yaw: {
- "lower": 1.981,
- "upper": 1.979,
- },
- Robot.right_arm.shoulder_roll: {
- "lower": -1.381,
- "upper": -1.979,
- },
- Robot.right_arm.elbow_pitch: {
- "lower": -3.319,
- "upper": 3.321,
- },
- Robot.right_arm.hand.wrist_roll: {
- "lower": -0.001,
- "upper": 0.001,
- },
- Robot.right_arm.hand.gripper: {
- "lower": 0,
- "upper": 1.57,
- },
- # left leg
- Robot.legs.left.hip_pitch: {
- "lower": -1.14,
- "upper": 1.14,
- },
- Robot.legs.left.hip_roll: {
- "lower": -3.5,
- "upper": 0.5,
- },
- Robot.legs.left.hip_yaw: {
- "lower": 3.14,
- "upper": 5.14,
- },
- Robot.legs.left.knee_pitch: {
- "lower": -2,
- "upper": 0,
- },
- Robot.legs.left.ankle_pitch: {
- "lower": 1.4,
- "upper": 2.2,
- },
- # right leg
- Robot.legs.right.hip_pitch: {
- "lower": 0.55,
- "upper": 3.55,
- },
- Robot.legs.right.hip_roll: {
- "lower": 2.75,
- "upper": 3.99,
- },
- Robot.legs.right.hip_yaw: {
- "lower": 2.24,
- "upper": 4.24,
- },
- Robot.legs.right.knee_pitch: {
- "lower": 0,
- "upper": 2,
- },
- Robot.legs.right.ankle_pitch: {
- "lower": -1.0,
- "upper": 0.2,
- },
- }
-
- @classmethod
- def default_limits(cls) -> Dict[str, Dict[str, float]]:
- return {
- # left arm
- Robot.left_arm.shoulder_pitch: {
- "lower": 2.04,
- "upper": 3.06,
- },
- Robot.left_arm.shoulder_yaw: {
- "lower": -1,
- "upper": 2,
- },
- Robot.left_arm.shoulder_roll: {
- "lower": 2.63,
- "upper": 3.64,
- },
- Robot.left_arm.elbow_pitch: {
- "lower": -2.06,
- "upper": -1.08,
- },
- Robot.left_arm.hand.wrist_roll: {
- "lower": -2.06,
- "upper": -1.08,
- },
- Robot.left_arm.hand.gripper: {
- "lower": -0.5,
- "upper": 2.07,
- },
- # right arm
- Robot.right_arm.shoulder_pitch: {
- "lower": 2.619,
- "upper": 3.621,
- },
- Robot.right_arm.shoulder_yaw: {
- "lower": -1.481,
- "upper": 1,
- },
- Robot.right_arm.shoulder_roll: {
- "lower": -1.881,
- "upper": -1.479,
- },
- Robot.right_arm.elbow_pitch: {
- "lower": -3.819,
- "upper": 3.821,
- },
- Robot.right_arm.hand.wrist_roll: {
- "lower": -0.501,
- "upper": 0.501,
- },
- Robot.right_arm.hand.gripper: {
- "lower": -0.5,
- "upper": 2.07,
- },
- # left leg
- Robot.legs.left.hip_pitch: {
- "lower": -1.64,
- "upper": 1.64,
- },
- Robot.legs.left.hip_roll: {
- "lower": -4.0,
- "upper": 1.0,
- },
- Robot.legs.left.hip_yaw: {
- "lower": 2.64,
- "upper": 5.64,
- },
- Robot.legs.left.knee_pitch: {
- "lower": -2.5,
- "upper": 0.5,
- },
- Robot.legs.left.ankle_pitch: {
- "lower": 0.9,
- "upper": 2.7,
- },
- # right leg
- Robot.legs.right.hip_pitch: {
- "lower": 0.05,
- "upper": 4.05,
- },
- Robot.legs.right.hip_roll: {
- "lower": 2.25,
- "upper": 4.49,
- },
- Robot.legs.right.hip_yaw: {
- "lower": 1.74,
- "upper": 4.74,
- },
- Robot.legs.right.knee_pitch: {
- "lower": -0.5,
- "upper": 2.5,
- },
- Robot.legs.right.ankle_pitch: {
- "lower": -1.5,
- "upper": 0.7,
- },
- }
-
- # p_gains
- @classmethod
- def stiffness(cls) -> Dict[str, float]:
- return {
- "hip pitch": 250,
- "hip yaw": 250,
- "hip roll": 150,
- "knee pitch": 250,
- "ankle pitch": 150,
- "shoulder pitch": 150,
- "shoulder yaw": 45,
- "shoulder roll": 45,
- "elbow pitch": 45,
- "hand roll": 45,
- "gripper": 45,
- }
-
- # d_gains
- @classmethod
- def damping(cls) -> Dict[str, float]:
- return {
- "hip pitch": 10,
- "hip yaw": 10,
- "hip roll": 10,
- "knee pitch": 10,
- "ankle pitch": 10,
- "shoulder pitch": 10,
- "shoulder yaw": 10,
- "shoulder roll": 5,
- "elbow pitch": 5,
- "hand roll": 5,
- "gripper": 5,
- }
-
- # pos_limits
- @classmethod
- def effort(cls) -> Dict[str, float]:
- return {
- "hip pitch": 120,
- "hip yaw": 120,
- "hip roll": 17,
- "knee pitch": 120,
- "ankle pitch": 17,
- "shoulder pitch": 120,
- "shoulder yaw": 17,
- "shoulder roll": 17,
- "elbow pitch": 17,
- "hand roll": 17,
- "gripper": 17,
- }
-
- # vel_limits
- @classmethod
- def velocity(cls) -> Dict[str, float]:
- return {
- "hip pitch": 40,
- "hip yaw": 40,
- "hip roll": 40,
- "knee pitch": 40,
- "ankle pitch": 40,
- "shoulder pitch": 40,
- "shoulder yaw": 40,
- "shoulder roll": 40,
- "elbow pitch": 40,
- "hand roll": 40,
- "gripper": 40,
- }
-
- @classmethod
- def friction(cls) -> Dict[str, float]:
- return {
- "hip pitch": 0.0,
- "hip yaw": 0.0,
- "hip roll": 0.0,
- "knee pitch": 0.0,
- "ankle pitch": 0.0,
- "hand roll": 0.0,
- "gripper": 0.0,
- }
-
-
-def print_joints() -> None:
- joints = Robot.all_joints()
- assert len(joints) == len(set(joints)), "Duplicate joint names found!"
- print(Robot())
-
-
-if __name__ == "__main__":
- # python -m sim.Robot.joints
- print_joints()
diff --git a/sim/resources/stompymini/robot.urdf b/sim/resources/stompymini/robot.urdf
deleted file mode 100644
index d5c4059c..00000000
--- a/sim/resources/stompymini/robot.urdf
+++ /dev/null
@@ -1,663 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/sim/resources/stompymini/robot_fixed.urdf b/sim/resources/stompymini/robot_fixed.urdf
deleted file mode 100644
index ebf74644..00000000
--- a/sim/resources/stompymini/robot_fixed.urdf
+++ /dev/null
@@ -1,662 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/sim/resources/stompypro/__init__.py b/sim/resources/stompypro/__init__.py
deleted file mode 100755
index e69de29b..00000000
diff --git a/sim/resources/stompypro/robot.urdf b/sim/resources/stompypro/robot.urdf
deleted file mode 100644
index 2e1c1a73..00000000
--- a/sim/resources/stompypro/robot.urdf
+++ /dev/null
@@ -1,499 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/sim/resources/stompypro/robot_calibration.xml b/sim/resources/stompypro/robot_calibration.xml
deleted file mode 100644
index 7cb3c9f6..00000000
--- a/sim/resources/stompypro/robot_calibration.xml
+++ /dev/null
@@ -1,191 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/sim/resources/stompypro/robot_fixed.urdf b/sim/resources/stompypro/robot_fixed.urdf
deleted file mode 100644
index a8b1f04c..00000000
--- a/sim/resources/stompypro/robot_fixed.urdf
+++ /dev/null
@@ -1,522 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/sim/resources/stompypro/robot_fixed.xml b/sim/resources/stompypro/robot_fixed.xml
deleted file mode 100644
index 63345107..00000000
--- a/sim/resources/stompypro/robot_fixed.xml
+++ /dev/null
@@ -1,191 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/sim/scripts/create_fixed_torso.py b/sim/scripts/create_fixed_torso.py
index 7002eb9b..0a1cf123 100755
--- a/sim/scripts/create_fixed_torso.py
+++ b/sim/scripts/create_fixed_torso.py
@@ -52,8 +52,8 @@ def update_urdf(model_path: str, embodiment: str) -> None:
def main() -> None:
parser = argparse.ArgumentParser(description="Update URDF file to fix robot joints.")
- parser.add_argument("--model_path", type=str, help="Path to the model directory", default="sim/resources/stompypro")
- parser.add_argument("--embodiment", type=str, help="Embodiment to use", default="stompypro")
+ parser.add_argument("--model_path", type=str, help="Path to the model directory", default="sim/resources/gpr")
+ parser.add_argument("--embodiment", type=str, help="Embodiment to use", default="gpr")
args = parser.parse_args()
update_urdf(args.model_path, args.embodiment)
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index 59955505..dfc19bd4 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -1,8 +1,7 @@
-"""
-Difference setup
-python sim/play.py --task mini_ppo --sim_device cpu
-python sim/sim2sim.py --load_model examples/standing_pro.pt --embodiment stompypro
-python sim/sim2sim.py --load_model examples/standing_micro.pt --embodiment stompymicro
+"""Sim2sim deployment test.
+
+Run:
+ python sim/sim2sim.py --load_model examples/walking_policy.onnx --embodiment gpr
"""
import argparse
import math
@@ -32,7 +31,7 @@ class Sim2simCfg:
dt: float = 0.001
decimation: int = 10
tau_factor: float = 3
- cycle_time: float = 0.4
+ cycle_time: float = 0.25
@@ -316,8 +315,8 @@ def run_mujoco(
x_vel_cmd, y_vel_cmd, yaw_vel_cmd = np.random.uniform(0.1, 0.5), 0.0, 0.0
policy_cfg = ActorCfg(embodiment=args.embodiment)
- if args.embodiment == "stompypro":
- policy_cfg.cycle_time = 0.4
+ if args.embodiment == "gpr":
+ policy_cfg.cycle_time = 0.25
cfg = Sim2simCfg(
sim_duration=10.0,
dt=0.001,
@@ -325,7 +324,7 @@ def run_mujoco(
tau_factor=4.0,
cycle_time=policy_cfg.cycle_time,
)
- elif args.embodiment == "stompymicro":
+ elif args.embodiment == "zeroth":
policy_cfg.cycle_time = 0.2
cfg = Sim2simCfg(
sim_duration=10.0,
@@ -361,7 +360,6 @@ def run_mujoco(
"robot_damping": metadata["robot_damping"],
}
-
run_mujoco(
args.embodiment,
policy,
@@ -371,4 +369,4 @@ def run_mujoco(
args.log_h5,
args.render,
args.h5_out_dir,
- )
+ )
\ No newline at end of file
diff --git a/sim/sim2sim_old.py b/sim/sim2sim_old.py
new file mode 100755
index 00000000..fd761aa8
--- /dev/null
+++ b/sim/sim2sim_old.py
@@ -0,0 +1,354 @@
+"""
+python sim/sim2sim.py --load_model examples/standing_pro.pt --embodiment gpr
+python sim/sim2sim.py --load_model examples/standing_micro.pt --embodiment zeroth
+"""
+import argparse
+import math
+import os
+from copy import deepcopy
+from dataclasses import dataclass
+from typing import Dict, List, Tuple, Union
+
+import mujoco
+import mujoco_viewer
+import numpy as np
+import onnxruntime as ort
+import pygame
+from scipy.spatial.transform import Rotation as R
+from tqdm import tqdm
+
+# from sim.h5_logger import HDF5Logger
+from sim.model_export import ActorCfg, convert_model_to_onnx
+
+
+@dataclass
+class Sim2simCfg:
+ sim_duration: float = 60.0
+ dt: float = 0.001
+ decimation: int = 10
+ tau_factor: float = 3
+ cycle_time: float = 0.4
+
+
+def handle_keyboard_input() -> None:
+ global x_vel_cmd, y_vel_cmd, yaw_vel_cmd
+
+ keys = pygame.key.get_pressed()
+
+ # Update movement commands based on arrow keys
+ if keys[pygame.K_UP]:
+ x_vel_cmd += 0.0005
+ if keys[pygame.K_DOWN]:
+ x_vel_cmd -= 0.0005
+ if keys[pygame.K_LEFT]:
+ y_vel_cmd += 0.0005
+ if keys[pygame.K_RIGHT]:
+ y_vel_cmd -= 0.0005
+
+ # Yaw control
+ if keys[pygame.K_a]:
+ yaw_vel_cmd += 0.001
+ if keys[pygame.K_z]:
+ yaw_vel_cmd -= 0.001
+
+
+def quaternion_to_euler_array(quat: np.ndarray) -> np.ndarray:
+ # Ensure quaternion is in the correct format [x, y, z, w]
+ x, y, z, w = quat
+
+ # Roll (x-axis rotation)
+ t0 = +2.0 * (w * x + y * z)
+ t1 = +1.0 - 2.0 * (x * x + y * y)
+ roll_x = np.arctan2(t0, t1)
+
+ # Pitch (y-axis rotation)
+ t2 = +2.0 * (w * y - z * x)
+ t2 = np.clip(t2, -1.0, 1.0)
+ pitch_y = np.arcsin(t2)
+
+ # Yaw (z-axis rotation)
+ t3 = +2.0 * (w * z + x * y)
+ t4 = +1.0 - 2.0 * (y * y + z * z)
+ yaw_z = np.arctan2(t3, t4)
+
+ # Returns roll, pitch, yaw in a NumPy array in radians
+ return np.array([roll_x, pitch_y, yaw_z])
+
+
+def get_obs(data: mujoco.MjData) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+ """Extracts an observation from the mujoco data structure"""
+ q = data.qpos.astype(np.double)
+ dq = data.qvel.astype(np.double)
+ quat = data.sensor("orientation").data[[1, 2, 3, 0]].astype(np.double)
+ r = R.from_quat(quat)
+ v = r.apply(data.qvel[:3], inverse=True).astype(np.double) # In the base frame
+ omega = data.sensor("angular-velocity").data.astype(np.double)
+ gvec = r.apply(np.array([0.0, 0.0, -1.0]), inverse=True).astype(np.double)
+ return (q, dq, quat, v, omega, gvec)
+
+
+def pd_control(
+ target_q: np.ndarray,
+ q: np.ndarray,
+ kp: np.ndarray,
+ dq: np.ndarray,
+ kd: np.ndarray,
+ default: np.ndarray,
+) -> np.ndarray:
+ """Calculates torques from position commands"""
+ return kp * (target_q + default - q) - kd * dq
+
+
+def run_mujoco(
+ embodiment: str,
+ policy: ort.InferenceSession,
+ cfg: Sim2simCfg,
+ model_info: Dict[str, Union[float, List[float], str]],
+ keyboard_use: bool = False,
+ log_h5: bool = False,
+ render: bool = True,
+ h5_out_dir: str = "sim/resources",
+) -> None:
+ """
+ Run the Mujoco simulation using the provided policy and configuration.
+
+ Args:
+ policy: The policy used for controlling the simulation.
+ cfg: The configuration object containing simulation settings.
+
+ Returns:
+ None
+ """
+ model_dir = os.environ.get("MODEL_DIR", "sim/resources")
+ mujoco_model_path = f"{model_dir}/{embodiment}/robot_fixed.xml"
+
+ model = mujoco.MjModel.from_xml_path(mujoco_model_path)
+ model.opt.timestep = cfg.dt
+ data = mujoco.MjData(model)
+
+ assert isinstance(model_info["num_actions"], int)
+ assert isinstance(model_info["num_observations"], int)
+ assert isinstance(model_info["robot_effort"], list)
+ assert isinstance(model_info["robot_stiffness"], list)
+ assert isinstance(model_info["robot_damping"], list)
+
+ tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * cfg.tau_factor
+ kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"]))
+ kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"]))
+
+ try:
+ data.qpos = model.keyframe("default").qpos
+ default = deepcopy(model.keyframe("default").qpos)[-model_info["num_actions"] :]
+ print("Default position:", default)
+ except:
+ print("No default position found, using zero initialization")
+ default = np.zeros(model_info["num_actions"]) # 3 for pos, 4 for quat, cfg.num_actions for joints
+ default += np.random.uniform(-0.03, 0.03, size=default.shape)
+ print("Default position:", default)
+ mujoco.mj_step(model, data)
+ for ii in range(len(data.ctrl) + 1):
+ print(data.joint(ii).id, data.joint(ii).name)
+
+ data.qvel = np.zeros_like(data.qvel)
+ data.qacc = np.zeros_like(data.qacc)
+
+ if render:
+ viewer = mujoco_viewer.MujocoViewer(model, data)
+
+ target_q = np.zeros((model_info["num_actions"]), dtype=np.double)
+ prev_actions = np.zeros((model_info["num_actions"]), dtype=np.double)
+ hist_obs = np.zeros((model_info["num_observations"]), dtype=np.double)
+
+ count_lowlevel = 0
+
+ input_data = {
+ "x_vel.1": np.zeros(1).astype(np.float32),
+ "y_vel.1": np.zeros(1).astype(np.float32),
+ "rot.1": np.zeros(1).astype(np.float32),
+ "t.1": np.zeros(1).astype(np.float32),
+ "dof_pos.1": np.zeros(model_info["num_actions"]).astype(np.float32),
+ "dof_vel.1": np.zeros(model_info["num_actions"]).astype(np.float32),
+ "prev_actions.1": np.zeros(model_info["num_actions"]).astype(np.float32),
+ "imu_ang_vel.1": np.zeros(3).astype(np.float32),
+ "imu_euler_xyz.1": np.zeros(3).astype(np.float32),
+ "buffer.1": np.zeros(model_info["num_observations"]).astype(np.float32),
+ }
+
+ if log_h5:
+ stop_state_log = int(cfg.sim_duration / cfg.dt) / cfg.decimation
+ logger = HDF5Logger(
+ data_name=embodiment,
+ num_actions=model_info["num_actions"],
+ max_timesteps=stop_state_log,
+ num_observations=model_info["num_observations"],
+ h5_out_dir=h5_out_dir
+ )
+
+ # Initialize variables for tracking upright steps and average speed
+ upright_steps = 0
+ total_speed = 0.0
+ step_count = 0
+
+ for _ in tqdm(range(int(cfg.sim_duration / cfg.dt)), desc="Simulating..."):
+ if keyboard_use:
+ handle_keyboard_input()
+
+ # Obtain an observation
+ q, dq, quat, v, omega, gvec = get_obs(data)
+ q = q[-model_info["num_actions"] :]
+ dq = dq[-model_info["num_actions"] :]
+
+ eu_ang = quaternion_to_euler_array(quat)
+ eu_ang[eu_ang > math.pi] -= 2 * math.pi
+
+ # Check if the robot is upright (roll and pitch within ±30 degrees)
+ if abs(eu_ang[0]) > math.radians(30) or abs(eu_ang[1]) > math.radians(30):
+ print("Robot tilted heavily, ending simulation.")
+ break
+ else:
+ upright_steps += 1 # Increment upright steps
+
+ # Calculate speed and accumulate for average speed calculation
+ speed = np.linalg.norm(v[:2]) # Speed in the x-y plane
+ total_speed += speed
+ step_count += 1
+
+ # 1000hz -> Nhz
+ if count_lowlevel % cfg.decimation == 0:
+ # Convert sim coordinates to policy coordinates
+ cur_pos_obs = q - default
+ cur_vel_obs = dq
+
+ input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32)
+ input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32)
+ input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32)
+
+ input_data["t.1"] = np.array([count_lowlevel * cfg.dt], dtype=np.float32)
+
+ input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32)
+ input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32)
+
+ input_data["prev_actions.1"] = prev_actions.astype(np.float32)
+
+ input_data["imu_ang_vel.1"] = omega.astype(np.float32)
+ input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32)
+
+ input_data["buffer.1"] = hist_obs.astype(np.float32)
+
+ positions, curr_actions, hist_obs = policy.run(None, input_data)
+ target_q = positions
+ prev_actions = curr_actions
+
+ # Generate PD control
+ tau = pd_control(target_q, q, kps, dq, kds, default) # Calc torques
+ tau = np.clip(tau, -tau_limit, tau_limit) # Clamp torques
+
+ data.ctrl = tau
+ mujoco.mj_step(model, data)
+
+ if render:
+ viewer.render()
+ count_lowlevel += 1
+
+ if render:
+ viewer.close()
+
+ # Calculate average speed
+ if step_count > 0:
+ average_speed = total_speed / step_count
+ else:
+ average_speed = 0.0
+
+ # Save or print the statistics at the end of the episode
+ print(f"Number of upright steps: {upright_steps}")
+ print(f"Average speed: {average_speed:.4f} m/s")
+
+ if log_h5:
+ logger.close()
+
+
+def parse_modelmeta(
+ modelmeta: List[Tuple[str, str]],
+ verbose: bool = False,
+) -> Dict[str, Union[float, List[float], str]]:
+ parsed_meta: Dict[str, Union[float, List[float], str]] = {}
+ for key, value in modelmeta:
+ if value.startswith("[") and value.endswith("]"):
+ parsed_meta[key] = list(map(float, value.strip("[]").split(",")))
+ else:
+ try:
+ parsed_meta[key] = float(value)
+ try:
+ if int(value) == parsed_meta[key]:
+ parsed_meta[key] = int(value)
+ except ValueError:
+ pass
+ except ValueError:
+ print(f"Failed to convert {value} to float")
+ parsed_meta[key] = value
+ if verbose:
+ for key, value in parsed_meta.items():
+ print(f"{key}: {value}")
+ return parsed_meta
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Deployment script.")
+ parser.add_argument("--embodiment", type=str, required=True, help="Embodiment name.")
+ parser.add_argument("--load_model", type=str, required=True, help="Path to run to load from.")
+ parser.add_argument("--keyboard_use", action="store_true", help="keyboard_use")
+ parser.add_argument("--log_h5", action="store_true", help="log_h5")
+ parser.add_argument("--h5_out_dir", type=str, default="sim/resources", help="Directory to save HDF5 files")
+ parser.add_argument("--no_render", action="store_false", dest="render", help="Disable rendering")
+ parser.set_defaults(render=True)
+ args = parser.parse_args()
+
+ if args.keyboard_use:
+ x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.0, 0.0, 0.0
+ pygame.init()
+ pygame.display.set_caption("Simulation Control")
+ else:
+ x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.4, 0.0, 0.0
+
+ policy_cfg = ActorCfg(embodiment=args.embodiment)
+ if args.embodiment == "gpr":
+ policy_cfg.cycle_time = 0.64
+ cfg = Sim2simCfg(
+ sim_duration=10.0,
+ dt=0.001,
+ decimation=10,
+ tau_factor=4.0,
+ cycle_time=policy_cfg.cycle_time,
+ )
+ elif args.embodiment == "zeroth":
+ policy_cfg.cycle_time = 0.2
+ cfg = Sim2simCfg(
+ sim_duration=10.0,
+ dt=0.001,
+ decimation=10,
+ tau_factor=1,
+ cycle_time=policy_cfg.cycle_time,
+ )
+
+ if args.load_model.endswith(".onnx"):
+ policy = ort.InferenceSession(args.load_model)
+ else:
+ policy = convert_model_to_onnx(
+ args.load_model, policy_cfg, save_path="policy.onnx"
+ )
+
+ model_info = parse_modelmeta(
+ policy.get_modelmeta().custom_metadata_map.items(),
+ verbose=True,
+ )
+
+ run_mujoco(
+ args.embodiment,
+ policy,
+ cfg,
+ model_info,
+ args.keyboard_use,
+ args.log_h5,
+ args.render,
+ args.h5_out_dir,
+ )