diff --git a/examples/kinfer_policy.onnx b/examples/kinfer_policy.onnx new file mode 100644 index 00000000..9fd4c91c Binary files /dev/null and b/examples/kinfer_policy.onnx differ diff --git a/examples/standing_micro.onnx b/examples/standing_micro.onnx deleted file mode 100644 index acef2d58..00000000 Binary files a/examples/standing_micro.onnx and /dev/null differ diff --git a/examples/standing_micro.pt b/examples/standing_micro.pt deleted file mode 100644 index 00f79cf4..00000000 Binary files a/examples/standing_micro.pt and /dev/null differ diff --git a/examples/standing_pro.onnx b/examples/standing_pro.onnx deleted file mode 100644 index 5434ff41..00000000 Binary files a/examples/standing_pro.onnx and /dev/null differ diff --git a/examples/standing_pro.pt b/examples/standing_pro.pt deleted file mode 100644 index daa9950d..00000000 Binary files a/examples/standing_pro.pt and /dev/null differ diff --git a/examples/walking_micro.onnx b/examples/walking_micro.onnx deleted file mode 100644 index 68a35b8e..00000000 Binary files a/examples/walking_micro.onnx and /dev/null differ diff --git a/examples/walking_micro.pt b/examples/walking_micro.pt deleted file mode 100644 index 58c94e6c..00000000 Binary files a/examples/walking_micro.pt and /dev/null differ diff --git a/examples/test_policy.pt b/examples/walking_policy.pt similarity index 56% rename from examples/test_policy.pt rename to examples/walking_policy.pt index 69b339af..f483c38c 100644 Binary files a/examples/test_policy.pt and b/examples/walking_policy.pt differ diff --git a/examples/walking_pro.onnx b/examples/walking_pro.onnx deleted file mode 100644 index 432b0e95..00000000 Binary files a/examples/walking_pro.onnx and /dev/null differ diff --git a/examples/walking_pro.pt b/examples/walking_pro.pt deleted file mode 100644 index 2878ac43..00000000 Binary files a/examples/walking_pro.pt and /dev/null differ diff --git a/examples/original_walking.pt b/policy.pt similarity index 56% rename from examples/original_walking.pt rename to policy.pt index 2ba5b97f..f483c38c 100644 Binary files a/examples/original_walking.pt and b/policy.pt differ diff --git a/policy_1.pt b/policy_1.pt deleted file mode 100644 index 390d998c..00000000 Binary files a/policy_1.pt and /dev/null differ diff --git a/sim/algo/ppo/on_policy_runner.py b/sim/algo/ppo/on_policy_runner.py index e3fb6e1c..b13e6677 100755 --- a/sim/algo/ppo/on_policy_runner.py +++ b/sim/algo/ppo/on_policy_runner.py @@ -266,6 +266,7 @@ def save(self, path: str, infos: Optional[dict] = None) -> None: def load(self, path: str, load_optimizer: bool = True) -> dict: loaded_dict = torch.load(path) + self.load_path = path self.alg.actor_critic.load_state_dict(loaded_dict["model_state_dict"]) if load_optimizer: self.alg.optimizer.load_state_dict(loaded_dict["optimizer_state_dict"]) diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py index d307168f..ecd72198 100755 --- a/sim/envs/__init__.py +++ b/sim/envs/__init__.py @@ -16,22 +16,19 @@ from sim.envs.humanoids.h1_env import H1FreeEnv from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO from sim.envs.humanoids.stompymicro_env import StompyMicroEnv -from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO -from sim.envs.humanoids.stompymini_env import MiniFreeEnv -from sim.envs.humanoids.stompypro_config import ( - StompyProCfg, - StompyProCfgPPO, - StompyProStandingCfg, +from sim.envs.humanoids.grp_config import ( + GprCfg, + GprCfgPPO, + GprStandingCfg, ) -from sim.envs.humanoids.stompypro_env import StompyProFreeEnv +from sim.envs.humanoids.gpr_env import GprFreeEnv from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO from sim.envs.humanoids.xbot_env import XBotLFreeEnv from sim.utils.task_registry import TaskRegistry # noqa: E402 task_registry = TaskRegistry() -task_registry.register("stompymini", MiniFreeEnv, MiniCfg(), MiniCfgPPO()) -task_registry.register("stompypro", StompyProFreeEnv, StompyProCfg(), StompyProCfgPPO()) -task_registry.register("stompypro_standing", StompyProFreeEnv, StompyProStandingCfg(), StompyProCfgPPO()) +task_registry.register("gpr", GprFreeEnv, GprCfg(), GprCfgPPO()) +task_registry.register("gpr_standing", GprFreeEnv, GprStandingCfg(), GprCfgPPO()) task_registry.register("dora", DoraFreeEnv, DoraCfg(), DoraCfgPPO()) task_registry.register("h1", H1FreeEnv, H1Cfg(), H1CfgPPO()) task_registry.register("g1", G1FreeEnv, G1Cfg(), G1CfgPPO()) diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py index 6bb371a8..ea8d31d4 100644 --- a/sim/envs/base/legged_robot.py +++ b/sim/envs/base/legged_robot.py @@ -567,6 +567,7 @@ def _init_buffers(self): self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False) for i in range(self.num_dofs): name = self.dof_names[i] + print(name) self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name] found = False @@ -579,6 +580,7 @@ def _init_buffers(self): self.p_gains[:, i] = 0.0 self.d_gains[:, i] = 0.0 print(f"PD gain of joint {name} were not defined, setting them to zero") + self.rand_push_force = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device) self.rand_push_torque = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device) self.default_dof_pos = self.default_dof_pos.unsqueeze(0) diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py index 1921474b..e87d2252 100644 --- a/sim/envs/base/legged_robot_config.py +++ b/sim/envs/base/legged_robot_config.py @@ -157,7 +157,6 @@ class domain_rand: push_robots = True push_interval_s = 15 max_push_vel_xy = 1.0 - start_pos_noise = 0.1 class rewards: class scales: diff --git a/sim/envs/humanoids/stompypro_env.py b/sim/envs/humanoids/gpr_env.py similarity index 98% rename from sim/envs/humanoids/stompypro_env.py rename to sim/envs/humanoids/gpr_env.py index df9b2e75..e309e3bb 100644 --- a/sim/envs/humanoids/stompypro_env.py +++ b/sim/envs/humanoids/gpr_env.py @@ -4,7 +4,7 @@ from isaacgym.torch_utils import * # isort:skip from sim.envs.base.legged_robot import LeggedRobot -from sim.resources.stompypro.joints import Robot +from sim.resources.gpr.joints import Robot from sim.utils.terrain import HumanoidTerrain from isaacgym import gymtorch # isort:skip @@ -12,8 +12,8 @@ import torch # isort:skip -class StompyProFreeEnv(LeggedRobot): - """StompyProFreeEnv is a class that represents a custom environment for a legged robot. +class GprFreeEnv(LeggedRobot): + """GprFreeEnv is a class that represents a custom environment for a legged robot. Args: cfg (LeggedRobotCfg): Configuration object for the legged robot. @@ -126,13 +126,13 @@ def compute_ref_state(self): sin_pos_l[sin_pos_l > 0] = 0 self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += sin_pos_l * scale_1 self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += sin_pos_l * scale_2 - self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += 1 * sin_pos_l * scale_1 + self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1 # right foot stance phase set to default joint pos sin_pos_r[sin_pos_r < 0] = 0 self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += sin_pos_r * scale_1 self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += sin_pos_r * scale_2 - self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += 1 * sin_pos_r * scale_1 + self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1 # Double support phase self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0 @@ -325,7 +325,7 @@ def _reward_feet_contact_number(self): """ contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 stance_mask = self._get_gait_phase() - reward = torch.where(contact == stance_mask, 1, -0.3) + reward = torch.where(contact == stance_mask, 1.0, -0.3) return torch.mean(reward, dim=1) def _reward_orientation(self): diff --git a/sim/envs/humanoids/stompymini_config.py b/sim/envs/humanoids/grp_config.py similarity index 84% rename from sim/envs/humanoids/stompymini_config.py rename to sim/envs/humanoids/grp_config.py index 91831c21..5ab53816 100644 --- a/sim/envs/humanoids/stompymini_config.py +++ b/sim/envs/humanoids/grp_config.py @@ -1,16 +1,16 @@ -"""Defines the environment configuration for the walking task""" +"""Defines the environment configuration for the Getting up task""" from sim.env import robot_urdf_path from sim.envs.base.legged_robot_config import ( # type: ignore LeggedRobotCfg, LeggedRobotCfgPPO, ) -from sim.resources.stompymini.joints import Robot +from sim.resources.gpr.joints import Robot -NUM_JOINTS = len(Robot.all_joints()) # 20 +NUM_JOINTS = len(Robot.all_joints()) -class MiniCfg(LeggedRobotCfg): +class GprCfg(LeggedRobotCfg): """Configuration class for the Legs humanoid robot.""" class env(LeggedRobotCfg.env): @@ -33,24 +33,26 @@ class safety(LeggedRobotCfg.safety): torque_limit = 0.85 class asset(LeggedRobotCfg.asset): - name = "stompymini" + name = "gpr" + file = str(robot_urdf_path(name)) - foot_name = ["RS_01_Stator", "RS_01_Stator_2"] - knee_name = ["RS_04_Rotor_7", "RS_04_Rotor_8"] + foot_name = ["foot1", "foot3"] + knee_name = ["leg3_shell2", "leg3_shell22"] + imu_name = "imu_link" - termination_height = 0.24 - default_feet_height = 0.03 + termination_height = 0.2 + default_feet_height = 0.0 penalize_contacts_on = [] - self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter + self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter flip_visual_attachments = False replace_cylinder_with_capsule = False fix_base_link = False class terrain(LeggedRobotCfg.terrain): mesh_type = "plane" - # mesh_type = 'trimesh' + # mesh_type = "trimesh" curriculum = False # rough terrain only: measure_heights = False @@ -79,10 +81,7 @@ class noise_scales: class init_state(LeggedRobotCfg.init_state): pos = [0.0, 0.0, Robot.height] - # setting the right rotation - # quat_from_euler_xyz(torch.tensor(1.57), torch.tensor(0), torch.tensor(-1.57)) rot = Robot.rotation - default_joint_angles = {k: 0.0 for k in Robot.all_joints()} default_positions = Robot.default_standing() @@ -123,14 +122,14 @@ class domain_rand(LeggedRobotCfg.domain_rand): friction_range = [0.1, 2.0] randomize_base_mass = True - added_mass_range = [-1.0, 1.0] - push_robots = True + added_mass_range = [-3.0, 3.0] + push_robots = False push_interval_s = 4 max_push_vel_xy = 0.2 max_push_ang_vel = 0.4 # dynamic randomization - action_delay = 0.5 action_noise = 0.02 + action_delay = 0.5 class commands(LeggedRobotCfg.commands): # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error) @@ -145,39 +144,28 @@ class ranges: heading = [-3.14, 3.14] class rewards: - base_height_target = 0.78 - min_dist = 0.25 + # quite important to keep it right + base_height_target = 0.63 + min_dist = 0.2 max_dist = 0.5 - # put some settings here for LLM parameter tuning target_joint_pos_scale = 0.17 # rad - target_feet_height = 0.05 # m - cycle_time = 0.4 # sec + target_feet_height = 0.06 # m + + cycle_time = 0.25 # sec # if true negative total rewards are clipped at zero (avoids early termination problems) only_positive_rewards = True # tracking reward = exp(error*sigma) tracking_sigma = 5.0 - max_contact_force = 400 # forces above this value are penalized + max_contact_force = 700 # forces above this value are penalized class scales: - # base pos - default_joint_pos = 0.5 - orientation = 1 - base_height = 0.2 - base_acc = 0.2 - # energy - action_smoothness = -0.002 - torques = -1e-5 - dof_vel = -5e-4 - dof_acc = -1e-7 - collision = -1.0 - # reference motion tracking joint_pos = 1.6 - feet_clearance = 1.6 + feet_clearance = 1.0 feet_contact_number = 1.2 # gait - feet_air_time = 1.6 + feet_air_time = 1.0 foot_slip = -0.05 feet_distance = 0.2 knee_distance = 0.2 @@ -190,6 +178,18 @@ class scales: low_speed = 0.2 track_vel_hard = 0.5 + # base pos + default_joint_pos = 0.5 + orientation = 1. + base_height = 0.2 + base_acc = 0.2 + # energy + action_smoothness = -0.002 + torques = -1e-5 + dof_vel = -5e-4 + dof_acc = -1e-7 + collision = -1.0 + class normalization: class obs_scales: lin_vel = 2.0 @@ -208,27 +208,27 @@ class viewer: lookat = [0, -2, 0] -class MiniStandingCfg(MiniCfg): - """Configuration class for the Legs humanoid robot.""" +class GprStandingCfg(GprCfg): + """Configuration class for the GPR humanoid robot.""" class rewards: - base_height_target = 0.78 - min_dist = 0.25 + # quite important to keep it right + base_height_target = 1.06 + min_dist = 0.2 max_dist = 0.5 - # put some settings here for LLM parameter tuning target_joint_pos_scale = 0.17 # rad target_feet_height = 0.05 # m - cycle_time = 0.4 # sec + cycle_time = 0.64 # sec # if true negative total rewards are clipped at zero (avoids early termination problems) only_positive_rewards = True # tracking reward = exp(error*sigma) - tracking_sigma = 5.0 - max_contact_force = 400 # forces above this value are penalized + tracking_sigma = 5 + max_contact_force = 500 # forces above this value are penalized class scales: # base pos - default_joint_pos = 0.5 + default_joint_pos = 1.0 orientation = 1 base_height = 0.2 base_acc = 0.2 @@ -240,7 +240,7 @@ class scales: collision = -1.0 -class MiniCfgPPO(LeggedRobotCfgPPO): +class GprCfgPPO(LeggedRobotCfgPPO): seed = 5 runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner @@ -261,11 +261,11 @@ class runner: policy_class_name = "ActorCritic" algorithm_class_name = "PPO" num_steps_per_env = 60 # per iteration - max_iterations = 5001 # number of policy updates + max_iterations = 3001 # number of policy updates # logging save_interval = 100 # check for potential saves every this many iterations - experiment_name = "StompyMini" + experiment_name = "gpr" run_name = "" # load and resume resume = False diff --git a/sim/envs/humanoids/stompymini_env.py b/sim/envs/humanoids/stompymini_env.py deleted file mode 100644 index 35d66f3f..00000000 --- a/sim/envs/humanoids/stompymini_env.py +++ /dev/null @@ -1,517 +0,0 @@ -# mypy: disable-error-code="valid-newtype" -"""Defines the environment for training the humanoid.""" - -from sim.envs.base.legged_robot import LeggedRobot -from sim.resources.stompymini.joints import Robot -from sim.utils.terrain import HumanoidTerrain - -from isaacgym import gymtorch # isort:skip -from isaacgym.torch_utils import * # isort: skip - - -import torch # isort:skip - - -class MiniFreeEnv(LeggedRobot): - """StompyFreeEnv is a class that represents a custom environment for a legged robot. - - Args: - cfg: Configuration object for the legged robot. - sim_params: Parameters for the simulation. - physics_engine: Physics engin e used in the simulation. - sim_device: Device used for the simulation. - headless: Flag indicating whether the simulation should be run in headless mode. - - Attributes: - last_feet_z (float): The z-coordinate of the last feet position. - feet_height (torch.Tensor): Tensor representing the height of the feet. - sim (gymtorch.GymSim): The simulation object. - terrain (HumanoidTerrain): The terrain object. - up_axis_idx (int): The index representing the up axis. - command_input (torch.Tensor): Tensor representing the command input. - privileged_obs_buf (torch.Tensor): Tensor representing the privileged observations buffer. - obs_buf (torch.Tensor): Tensor representing the observations buffer. - obs_history (collections.deque): Deque containing the history of observations. - critic_history (collections.deque): Deque containing the history of critic observations. - - Methods: - _push_robots(): Randomly pushes the robots by setting a randomized base velocity. - _get_phase(): Calculates the phase of the gait cycle. - _get_gait_phase(): Calculates the gait phase. - compute_ref_state(): Computes the reference state. - create_sim(): Creates the simulation, terrain, and environments. - _get_noise_scale_vec(cfg): Sets a vector used to scale the noise added to the observations. - step(actions): Performs a simulation step with the given actions. - compute_observations(): Computes the observations. - reset_idx(env_ids): Resets the environment for the specified environment IDs. - """ - - def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): - super().__init__(cfg, sim_params, physics_engine, sim_device, headless) - self.last_feet_z = self.cfg.asset.default_feet_height - self.feet_height = torch.zeros((self.num_envs, 2), device=self.device) - self.reset_idx(torch.tensor(range(self.num_envs), device=self.device)) - - env_handle = self.envs[0] - actor_handle = self.actor_handles[0] - - self.legs_joints = {} - for name, joint in Robot.legs.left.joints_motors(): - print(name) - joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint) - self.legs_joints["left_" + name] = joint_handle - - for name, joint in Robot.legs.right.joints_motors(): - joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint) - self.legs_joints["right_" + name] = joint_handle - - self.compute_observations() - - def _push_robots(self): - """Random pushes the robots. Emulates an impulse by setting a randomized base velocity.""" - max_vel = self.cfg.domain_rand.max_push_vel_xy - max_push_angular = self.cfg.domain_rand.max_push_ang_vel - self.rand_push_force[:, :2] = torch_rand_float( - -max_vel, max_vel, (self.num_envs, 2), device=self.device - ) # lin vel x/y - self.root_states[:, 7:9] = self.rand_push_force[:, :2] - - self.rand_push_torque = torch_rand_float( - -max_push_angular, max_push_angular, (self.num_envs, 3), device=self.device - ) - self.root_states[:, 10:13] = self.rand_push_torque - - self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states)) - - def _get_phase(self): - cycle_time = self.cfg.rewards.cycle_time - phase = self.episode_length_buf * self.dt / cycle_time - return phase - - def _get_gait_phase(self): - # return float mask 1 is stance, 0 is swing - phase = self._get_phase() - sin_pos = torch.sin(2 * torch.pi * phase) - # Add double support phase - stance_mask = torch.zeros((self.num_envs, 2), device=self.device) - # left foot stance - stance_mask[:, 0] = sin_pos >= 0 - # right foot stance - stance_mask[:, 1] = sin_pos < 0 - # Double support phase - stance_mask[torch.abs(sin_pos) < 0.1] = 1 - - return stance_mask - - def check_termination(self): - """Check if environments need to be reset""" - self.reset_buf = torch.any( - torch.norm(self.contact_forces[:, self.termination_contact_indices, :], dim=-1) > 1.0, - dim=1, - ) - self.reset_buf |= self.root_states[:, 2] < self.cfg.asset.termination_height - self.time_out_buf = self.episode_length_buf > self.max_episode_length # no terminal reward for time-outs - self.reset_buf |= self.time_out_buf - - def compute_ref_state(self): - phase = self._get_phase() - sin_pos = torch.sin(2 * torch.pi * phase) - sin_pos_l = sin_pos.clone() - sin_pos_r = sin_pos.clone() - default_clone = self.default_dof_pos.clone() - self.ref_dof_pos = self.default_dof_pos.repeat(self.num_envs, 1) - - scale_1 = self.cfg.rewards.target_joint_pos_scale - scale_2 = 2 * scale_1 - # left foot stance phase set to default joint pos - sin_pos_l[sin_pos_l > 0] = 0 - self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += sin_pos_l * scale_1 - self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += sin_pos_l * scale_2 - self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1 - - # right foot stance phase set to default joint pos - sin_pos_r[sin_pos_r < 0] = 0 - self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += sin_pos_r * scale_1 - self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += sin_pos_r * scale_2 - self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1 - - # Double support phase - self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0 - - self.ref_action = 2 * self.ref_dof_pos - - def create_sim(self): - """Creates simulation, terrain and evironments""" - self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly - self.sim = self.gym.create_sim( - self.sim_device_id, - self.graphics_device_id, - self.physics_engine, - self.sim_params, - ) - mesh_type = self.cfg.terrain.mesh_type - if mesh_type in ["heightfield", "trimesh"]: - self.terrain = HumanoidTerrain(self.cfg.terrain, self.num_envs) - if mesh_type == "plane": - self._create_ground_plane() - elif mesh_type == "heightfield": - self._create_heightfield() - elif mesh_type == "trimesh": - self._create_trimesh() - elif mesh_type is not None: - raise ValueError("Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]") - self._create_envs() - - def _get_noise_scale_vec(self, cfg): - """Sets a vector used to scale the noise added to the observations. - [NOTE]: Must be adapted when changing the observations structure - - Args: - cfg (Dict): Environment config file - - Returns: - [torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1] - """ - num_actions = self.num_actions - noise_vec = torch.zeros(self.cfg.env.num_single_obs, device=self.device) - self.add_noise = self.cfg.noise.add_noise - noise_scales = self.cfg.noise.noise_scales - noise_vec[0:5] = 0.0 # commands - noise_vec[5 : (num_actions + 5)] = noise_scales.dof_pos * self.obs_scales.dof_pos - noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel - noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = 0.0 # previous actions - noise_vec[(3 * num_actions + 5) : (3 * num_actions + 5) + 3] = ( - noise_scales.ang_vel * self.obs_scales.ang_vel - ) # ang vel - noise_vec[(3 * num_actions + 5) + 3 : (3 * num_actions + 5)] = ( - noise_scales.quat * self.obs_scales.quat - ) # euler x,y - return noise_vec - - def compute_observations(self): - phase = self._get_phase() - self.compute_ref_state() - - sin_pos = torch.sin(2 * torch.pi * phase).unsqueeze(1) - cos_pos = torch.cos(2 * torch.pi * phase).unsqueeze(1) - - stance_mask = self._get_gait_phase() - contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5.0 - - self.command_input = torch.cat((sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1) - q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos - dq = self.dof_vel * self.obs_scales.dof_vel - - diff = self.dof_pos - self.ref_dof_pos - self.privileged_obs_buf = torch.cat( - ( - self.command_input, # 2 + 3 - (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos, # 12 - self.dof_vel * self.obs_scales.dof_vel, # 12 - self.actions, # 12 - diff, # 12 - self.base_lin_vel * self.obs_scales.lin_vel, # 3 - self.base_ang_vel * self.obs_scales.ang_vel, # 3 - self.base_euler_xyz * self.obs_scales.quat, # 3 - self.rand_push_force[:, :2], # 3 - self.rand_push_torque, # 3 - self.env_frictions, # 1 - self.body_mass / 30.0, # 1 - stance_mask, # 2 - contact_mask, # 2 - ), - dim=-1, - ) - - obs_buf = torch.cat( - ( - self.command_input, # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw) - q, # 20D - dq, # 20D - self.actions, # 20D - self.base_ang_vel * self.obs_scales.ang_vel, # 3 - self.base_euler_xyz * self.obs_scales.quat, # 3 - ), - dim=-1, - ) - - if self.cfg.terrain.measure_heights: - heights = ( - torch.clip( - self.root_states[:, 2].unsqueeze(1) - 0.5 - self.measured_heights, - -1, - 1.0, - ) - * self.obs_scales.height_measurements - ) - self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1) - - if self.add_noise: - obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * self.cfg.noise.noise_level - else: - obs_now = obs_buf.clone() - self.obs_history.append(obs_now) - self.critic_history.append(self.privileged_obs_buf) - - obs_buf_all = torch.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1) # N,T,K - - self.obs_buf = obs_buf_all.reshape(self.num_envs, -1) # N, T*K - self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1) - - def reset_idx(self, env_ids): - super().reset_idx(env_ids) - for i in range(self.obs_history.maxlen): - self.obs_history[i][env_ids] *= 0 - for i in range(self.critic_history.maxlen): - self.critic_history[i][env_ids] *= 0 - - # ================================================ Rewards ================================================== # - def _reward_joint_pos(self): - """Calculates the reward based on the difference between the current joint positions and the target joint positions.""" - joint_pos = self.dof_pos.clone() - pos_target = self.ref_dof_pos.clone() - diff = joint_pos - pos_target - r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(diff, dim=1).clamp(0, 0.5) - - return r - - def _reward_feet_distance(self): - """Calculates the reward based on the distance between the feet. Penilize feet get close to each other or too far away.""" - foot_pos = self.rigid_state[:, self.feet_indices, :2] - foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1) - fd = self.cfg.rewards.min_dist - max_df = self.cfg.rewards.max_dist - d_min = torch.clamp(foot_dist - fd, -0.5, 0.0) - d_max = torch.clamp(foot_dist - max_df, 0, 0.5) - return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2 - - def _reward_knee_distance(self): - """Calculates the reward based on the distance between the knee of the humanoid.""" - foot_pos = self.rigid_state[:, self.knee_indices, :2] - foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1) - fd = self.cfg.rewards.min_dist - max_df = self.cfg.rewards.max_dist / 2 - d_min = torch.clamp(foot_dist - fd, -0.5, 0.0) - d_max = torch.clamp(foot_dist - max_df, 0, 0.5) - return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2 - - def _reward_foot_slip(self): - """Calculates the reward for minimizing foot slip. The reward is based on the contact forces - and the speed of the feet. A contact threshold is used to determine if the foot is in contact - with the ground. The speed of the foot is calculated and scaled by the contact condition. - """ - contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 - foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2) - rew = torch.sqrt(foot_speed_norm) - rew *= contact - return torch.sum(rew, dim=1) - - def _reward_feet_air_time(self): - """Calculates the reward for feet air time, promoting longer steps. This is achieved by - checking the first contact with the ground after being in the air. The air time is - limited to a maximum value for reward calculation. - """ - contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 - stance_mask = self._get_gait_phase() - self.contact_filt = torch.logical_or(torch.logical_or(contact, stance_mask), self.last_contacts) - self.last_contacts = contact - first_contact = (self.feet_air_time > 0.0) * self.contact_filt - self.feet_air_time += self.dt - air_time = self.feet_air_time.clamp(0, 0.5) * first_contact - self.feet_air_time *= ~self.contact_filt - return air_time.sum(dim=1) - - def _reward_feet_contact_number(self): - """Calculates a reward based on the number of feet contacts aligning with the gait phase. - Rewards or penalizes depending on whether the foot contact matches the expected gait phase. - """ - contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 - stance_mask = self._get_gait_phase() - reward = torch.where(contact == stance_mask, 1, -0.3) - return torch.mean(reward, dim=1) - - def _reward_orientation(self): - """Calculates the reward for maintaining a flat base orientation. It penalizes deviation - from the desired base orientation using the base euler angles and the projected gravity vector. - """ - quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10) - orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20) - return (quat_mismatch + orientation) / 2 - - def _reward_feet_contact_forces(self): - """Calculates the reward for keeping contact forces within a specified range. Penalizes - high contact forces on the feet. - """ - return torch.sum( - ( - torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force - ).clip(0, 400), - dim=1, - ) - - def _reward_default_joint_pos(self): - """Calculates the reward for keeping joint positions close to default positions, with a focus - on penalizing deviation in yaw and roll directions. Excludes yaw and roll from the main penalty. - """ - joint_diff = self.dof_pos - self.default_joint_pd_target - left_yaw_roll = joint_diff[:, [self.legs_joints["left_hip_roll"], self.legs_joints["left_hip_yaw"]]] - right_yaw_roll = joint_diff[:, [self.legs_joints["right_hip_roll"], self.legs_joints["right_hip_yaw"]]] - yaw_roll = torch.norm(left_yaw_roll, dim=1) + torch.norm(right_yaw_roll, dim=1) - yaw_roll = torch.clamp(yaw_roll - 0.1, 0, 50) - - return torch.exp(-yaw_roll * 100) - 0.01 * torch.norm(joint_diff, dim=1) - - def _reward_base_height(self): - """Calculates the reward based on the robot's base height. Penalizes deviation from a target base height. - The reward is computed based on the height difference between the robot's base and the average height - of its feet when they are in contact with the ground. - """ - stance_mask = self._get_gait_phase() - measured_heights = torch.sum(self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1) / torch.sum( - stance_mask, dim=1 - ) - base_height = self.root_states[:, 2] - (measured_heights - self.cfg.asset.default_feet_height) - reward = torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100) - return reward - - def _reward_base_acc(self): - """Computes the reward based on the base's acceleration. Penalizes high accelerations of the robot's base, - encouraging smoother motion. - """ - root_acc = self.last_root_vel - self.root_states[:, 7:13] - rew = torch.exp(-torch.norm(root_acc, dim=1) * 3) - return rew - - def _reward_vel_mismatch_exp(self): - """Computes a reward based on the mismatch in the robot's linear and angular velocities. - Encourages the robot to maintain a stable velocity by penalizing large deviations. - """ - lin_mismatch = torch.exp(-torch.square(self.base_lin_vel[:, 2]) * 10) - ang_mismatch = torch.exp(-torch.norm(self.base_ang_vel[:, :2], dim=1) * 5.0) - - c_update = (lin_mismatch + ang_mismatch) / 2.0 - - return c_update - - def _reward_track_vel_hard(self): - """Calculates a reward for accurately tracking both linear and angular velocity commands. - Penalizes deviations from specified linear and angular velocity targets. - """ - # Tracking of linear velocity commands (xy axes) - lin_vel_error = torch.norm(self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1) - lin_vel_error_exp = torch.exp(-lin_vel_error * 10) - - # Tracking of angular velocity commands (yaw) - ang_vel_error = torch.abs(self.commands[:, 2] - self.base_ang_vel[:, 2]) - ang_vel_error_exp = torch.exp(-ang_vel_error * 10) - - linear_error = 0.2 * (lin_vel_error + ang_vel_error) - - return (lin_vel_error_exp + ang_vel_error_exp) / 2.0 - linear_error - - def _reward_tracking_lin_vel(self): - """Tracks linear velocity commands along the xy axes. - Calculates a reward based on how closely the robot's linear velocity matches the commanded values. - """ - lin_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1) - return torch.exp(-lin_vel_error * self.cfg.rewards.tracking_sigma) - - def _reward_tracking_ang_vel(self): - """Tracks angular velocity commands for yaw rotation. - Computes a reward based on how closely the robot's angular velocity matches the commanded yaw values. - """ - ang_vel_error = torch.square(self.commands[:, 2] - self.base_ang_vel[:, 2]) - return torch.exp(-ang_vel_error * self.cfg.rewards.tracking_sigma) - - def _reward_feet_clearance(self): - """Calculates reward based on the clearance of the swing leg from the ground during movement. - Encourages appropriate lift of the feet during the swing phase of the gait. - """ - # Compute feet contact mask - contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 - - # Get the z-position of the feet and compute the change in z-position - feet_z = self.rigid_state[:, self.feet_indices, 2] - self.cfg.asset.default_feet_height - delta_z = feet_z - self.last_feet_z - self.feet_height += delta_z - self.last_feet_z = feet_z - - # Compute swing mask - swing_mask = 1 - self._get_gait_phase() - - # feet height should be closed to target feet height at the peak - rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.02 - rew_pos = torch.sum(rew_pos * swing_mask, dim=1) - self.feet_height *= ~contact - - return rew_pos - - def _reward_low_speed(self): - """Rewards or penalizes the robot based on its speed relative to the commanded speed. - This function checks if the robot is moving too slow, too fast, or at the desired speed, - and if the movement direction matches the command. - """ - # Calculate the absolute value of speed and command for comparison - absolute_speed = torch.abs(self.base_lin_vel[:, 0]) - absolute_command = torch.abs(self.commands[:, 0]) - - # Define speed criteria for desired range - speed_too_low = absolute_speed < 0.5 * absolute_command - speed_too_high = absolute_speed > 1.2 * absolute_command - speed_desired = ~(speed_too_low | speed_too_high) - - # Check if the speed and command directions are mismatched - sign_mismatch = torch.sign(self.base_lin_vel[:, 0]) != torch.sign(self.commands[:, 0]) - - # Initialize reward tensor - reward = torch.zeros_like(self.base_lin_vel[:, 0]) - - # Assign rewards based on conditions - # Speed too low - reward[speed_too_low] = -1.0 - # Speed too high - reward[speed_too_high] = 0.0 - # Speed within desired range - reward[speed_desired] = 1.2 - # Sign mismatch has the highest priority - reward[sign_mismatch] = -2.0 - return reward * (self.commands[:, 0].abs() > 0.1) - - def _reward_torques(self): - """Penalizes the use of high torques in the robot's joints. Encourages efficient movement by minimizing - the necessary force exerted by the motors. - """ - return torch.sum(torch.square(self.torques), dim=1) - - def _reward_dof_vel(self): - """Penalizes high velocities at the degrees of freedom (DOF) of the robot. This encourages smoother and - more controlled movements. - """ - return torch.sum(torch.square(self.dof_vel), dim=1) - - def _reward_dof_acc(self): - """Penalizes high accelerations at the robot's degrees of freedom (DOF). This is important for ensuring - smooth and stable motion, reducing wear on the robot's mechanical parts. - """ - return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1) - - def _reward_collision(self): - """Penalizes collisions of the robot with the environment, specifically focusing on selected body parts. - This encourages the robot to avoid undesired contact with objects or surfaces. - """ - return torch.sum( - 1.0 * (torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1), - dim=1, - ) - - def _reward_action_smoothness(self): - """Encourages smoothness in the robot's actions by penalizing large differences between consecutive actions. - This is important for achieving fluid motion and reducing mechanical stress. - """ - term_1 = torch.sum(torch.square(self.last_actions - self.actions), dim=1) - term_2 = torch.sum( - torch.square(self.actions + self.last_last_actions - 2 * self.last_actions), - dim=1, - ) - term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1) - return term_1 + term_2 + term_3 diff --git a/sim/envs/humanoids/stompypro_config.py b/sim/envs/humanoids/stompypro_config.py deleted file mode 100644 index 714e15c2..00000000 --- a/sim/envs/humanoids/stompypro_config.py +++ /dev/null @@ -1,295 +0,0 @@ -"""Defines the environment configuration for the Getting up task""" - -from sim.env import robot_urdf_path -from sim.envs.base.legged_robot_config import ( # type: ignore - LeggedRobotCfg, - LeggedRobotCfgPPO, -) -from sim.resources.stompypro.joints import Robot - -NUM_JOINTS = len(Robot.all_joints()) - - -class StompyProCfg(LeggedRobotCfg): - """Configuration class for the Legs humanoid robot.""" - - class env(LeggedRobotCfg.env): - # change the observation dim - frame_stack = 15 - c_frame_stack = 3 - num_single_obs = 11 + NUM_JOINTS * 3 - num_observations = int(frame_stack * num_single_obs) - single_num_privileged_obs = 25 + NUM_JOINTS * 4 - num_privileged_obs = int(c_frame_stack * single_num_privileged_obs) - num_actions = NUM_JOINTS - num_envs = 4096 - episode_length_s = 24 # episode length in seconds - use_ref_actions = False - - class safety(LeggedRobotCfg.safety): - # safety factors - pos_limit = 1.0 - vel_limit = 1.0 - torque_limit = 0.85 - - class asset(LeggedRobotCfg.asset): - name = "stompypro" - - file = str(robot_urdf_path(name)) - - foot_name = ["L_foot", "R_foot"] - knee_name = ["L_calf", "R_calf"] - imu_name = "imu_link" - - termination_height = 0.2 - default_feet_height = 0.0 - - penalize_contacts_on = [] - self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter - flip_visual_attachments = False - replace_cylinder_with_capsule = False - fix_base_link = False - - class terrain(LeggedRobotCfg.terrain): - # mesh_type = "plane" - mesh_type = "trimesh" - curriculum = False - # rough terrain only: - measure_heights = False - static_friction = 0.6 - dynamic_friction = 0.6 - terrain_length = 8.0 - terrain_width = 8.0 - num_rows = 10 # number of terrain rows (levels) - num_cols = 10 # number of terrain cols (types) - max_init_terrain_level = 10 # starting curriculum state - # plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down - terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0] - restitution = 0.0 - - class noise: - add_noise = True - noise_level = 0.6 # scales other values - - class noise_scales: - dof_pos = 0.05 - dof_vel = 0.5 - ang_vel = 0.1 - lin_vel = 0.05 - quat = 0.03 - height_measurements = 0.1 - - class init_state(LeggedRobotCfg.init_state): - pos = [0.0, 0.0, Robot.height] - rot = Robot.rotation - default_joint_angles = {k: 0.0 for k in Robot.all_joints()} - - default_positions = Robot.default_standing() - for joint in default_positions: - default_joint_angles[joint] = default_positions[joint] - - class control(LeggedRobotCfg.control): - # PD Drive parameters: - stiffness = Robot.stiffness() - damping = Robot.damping() - # action scale: target angle = actionScale * action + defaultAngle - action_scale = 0.25 - # decimation: Number of control action updates @ sim DT per policy DT - decimation = 20 # 100hz - - class sim(LeggedRobotCfg.sim): - dt = 0.001 # 1000 Hz - substeps = 1 # 2 - up_axis = 1 # 0 is y, 1 is z - - class physx(LeggedRobotCfg.sim.physx): - num_threads = 10 - solver_type = 1 # 0: pgs, 1: tgs - num_position_iterations = 4 - num_velocity_iterations = 1 - contact_offset = 0.01 # [m] - rest_offset = 0.0 # [m] - bounce_threshold_velocity = 0.1 # [m/s] - max_depenetration_velocity = 1.0 - max_gpu_contact_pairs = 2**23 # 2**24 -> needed for 8000 envs and more - default_buffer_size_multiplier = 5 - # 0: never, 1: last sub-step, 2: all sub-steps (default=2) - contact_collection = 2 - - class domain_rand(LeggedRobotCfg.domain_rand): - randomize_friction = True - friction_range = [0.1, 2.0] - - randomize_base_mass = True - added_mass_range = [-5.0, 5.0] - push_robots = True - push_interval_s = 4 - max_push_vel_xy = 0.2 - max_push_ang_vel = 0.4 - # dynamic randomization - action_noise = 0.02 - action_delay = 0.5 - - class commands(LeggedRobotCfg.commands): - # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error) - num_commands = 4 - resampling_time = 8.0 # time before command are changed[s] - heading_command = True # if true: compute ang vel command from heading error - - class ranges: - lin_vel_x = [-0.3, 0.6] # min max [m/s] - lin_vel_y = [-0.3, 0.3] # min max [m/s] - ang_vel_yaw = [-0.3, 0.3] # min max [rad/s] - heading = [-3.14, 3.14] - - # a - normal - # b - negate target_join_pos_scale (-0.14) - class rewards: - # quite important to keep it right - base_height_target = 0.63 - min_dist = 0.2 - max_dist = 0.5 - # put some settings here for LLM parameter tuning - target_joint_pos_scale = 0.17 # rad - target_feet_height = 0.06 # m - - cycle_time = 0.4 # sec - # if true negative total rewards are clipped at zero (avoids early termination problems) - only_positive_rewards = True - # tracking reward = exp(error*sigma) - tracking_sigma = 5.0 - max_contact_force = 400 # forces above this value are penalized - - class scales: - # reference motion tracking - joint_pos = 1.9 - feet_clearance = 1.7 - feet_contact_number = 1.7 - # gait - feet_air_time = 1.6 - foot_slip = -0.05 - feet_distance = 0.2 - knee_distance = 0.2 - # contact - feet_contact_forces = -0.01 - # vel tracking - tracking_lin_vel = 1.0 - tracking_ang_vel = 1.0 - vel_mismatch_exp = 0.5 # lin_z; ang x,y - low_speed = 0.2 - track_vel_hard = 0.5 - - # base pos - default_joint_pos = 0.5 - orientation = 1 - base_height = 0.2 - base_acc = 0.2 - # energy - action_smoothness = -0.002 - torques = -1e-5 - dof_vel = -5e-4 - dof_acc = -1e-7 - collision = -1.0 - - # reference motion tracking - joint_pos = 1.6 - feet_clearance = 1.6 - feet_contact_number = 1.2 - # gait - feet_air_time = 1.6 - foot_slip = -0.05 - feet_distance = 0.2 - knee_distance = 0.2 - # contact - feet_contact_forces = -0.01 - # vel tracking - tracking_lin_vel = 1.2 - tracking_ang_vel = 1.1 - vel_mismatch_exp = 0.5 # lin_z; ang x,y - low_speed = 0.2 - track_vel_hard = 0.5 - - class normalization: - class obs_scales: - lin_vel = 2.0 - ang_vel = 1.0 - dof_pos = 1.0 - dof_vel = 0.05 - quat = 1.0 - height_measurements = 5.0 - - clip_observations = 18.0 - clip_actions = 18.0 - - class viewer: - ref_env = 0 - pos = [4, -4, 2] - lookat = [0, -2, 0] - - -class StompyProStandingCfg(StompyProCfg): - """Configuration class for the Stompy Pro humanoid robot.""" - - # a - normal - # b - negate target_join_pos_scale (-0.14) - class rewards: - # quite important to keep it right - base_height_target = 0.63 - min_dist = 0.2 - max_dist = 0.4 - # put some settings here for LLM parameter tuning - target_joint_pos_scale = 0.14 # rad - target_feet_height = 0.05 # m - cycle_time = 0.5 # sec - # if true negative total rewards are clipped at zero (avoids early termination problems) - only_positive_rewards = True - # tracking reward = exp(error*sigma) - tracking_sigma = 5 - max_contact_force = 500 # forces above this value are penalized - - class scales: - # base pos - default_joint_pos = 1.0 - orientation = 1 - base_height = 0.2 - base_acc = 0.2 - # energy - action_smoothness = -0.002 - torques = -1e-5 - dof_vel = -5e-4 - dof_acc = -1e-7 - collision = -1.0 - - -class StompyProCfgPPO(LeggedRobotCfgPPO): - seed = 5 - runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner - - class policy: - init_noise_std = 1.0 - actor_hidden_dims = [512, 256, 128] - critic_hidden_dims = [768, 256, 128] - - class algorithm(LeggedRobotCfgPPO.algorithm): - entropy_coef = 0.001 - learning_rate = 1e-5 - num_learning_epochs = 2 - gamma = 0.994 - lam = 0.9 - num_mini_batches = 4 - - class runner: - policy_class_name = "ActorCritic" - algorithm_class_name = "PPO" - num_steps_per_env = 60 # per iteration - max_iterations = 3001 # number of policy updates - - # logging - save_interval = 100 # check for potential saves every this many iterations - experiment_name = "StompyPro" - run_name = "" - # load and resume - resume = False - load_run = -1 # -1 = last run - checkpoint = -1 # -1 = last saved model - resume_path = None # updated from load_run and chkpt diff --git a/sim/model_export.py b/sim/model_export.py index 51a812c5..c5184817 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -15,7 +15,7 @@ @dataclass class ActorCfg: - embodiment: str = "stompypro" + embodiment: str = "gpr" cycle_time: float = 0.4 # Cycle time for sinusoidal command input action_scale: float = 0.25 # Scale for actions lin_vel_scale: float = 2.0 # Scale for linear velocity @@ -252,6 +252,55 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T }, input_tensors +def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, Tuple[Tensor, ...]]: + all_weights = torch.load(model_path, map_location="cpu", weights_only=True) + weights = all_weights["model_state_dict"] + num_actor_obs = weights["actor.0.weight"].shape[1] + num_critic_obs = weights["critic.0.weight"].shape[1] + num_actions = weights["std"].shape[0] + actor_hidden_dims = [v.shape[0] for k, v in weights.items() if re.match(r"actor\.\d+\.weight", k)] + critic_hidden_dims = [v.shape[0] for k, v in weights.items() if re.match(r"critic\.\d+\.weight", k)] + actor_hidden_dims = actor_hidden_dims[:-1] + critic_hidden_dims = critic_hidden_dims[:-1] + + ac_model = ActorCritic(num_actor_obs, num_critic_obs, num_actions, actor_hidden_dims, critic_hidden_dims) + ac_model.load_state_dict(weights) + + a_model = Actor(ac_model.actor, cfg) + + # Gets the model input tensors. + x_vel = torch.randn(1) + y_vel = torch.randn(1) + rot = torch.randn(1) + t = torch.randn(1) + dof_pos = torch.randn(a_model.num_actions) + dof_vel = torch.randn(a_model.num_actions) + prev_actions = torch.randn(a_model.num_actions) + imu_ang_vel = torch.randn(3) + imu_euler_xyz = torch.randn(3) + buffer = a_model.get_init_buffer() + input_tensors = (x_vel, y_vel, rot, t, dof_pos, dof_vel, prev_actions, imu_ang_vel, imu_euler_xyz, buffer) + + jit_model = torch.jit.script(a_model) + + # Add sim2sim metadata + robot_effort = list(a_model.robot.effort().values()) + robot_stiffness = list(a_model.robot.stiffness().values()) + robot_damping = list(a_model.robot.damping().values()) + default_standing = list(a_model.robot.default_standing().values()) + num_actions = a_model.num_actions + num_observations = a_model.num_observations + + return a_model, { + "robot_effort": robot_effort, + "robot_stiffness": robot_stiffness, + "robot_damping": robot_damping, + "default_standing": default_standing, + "num_actions": num_actions, + "num_observations": num_observations, + }, input_tensors + + def convert_model_to_onnx(model_path: str, cfg: ActorCfg, save_path: Optional[str] = None) -> ort.InferenceSession: """Converts a PyTorch model to a ONNX format. diff --git a/sim/play.py b/sim/play.py index 28587bf2..4881d6ba 100755 --- a/sim/play.py +++ b/sim/play.py @@ -2,7 +2,7 @@ """Play a trained policy in the environment. Run: - python sim/play.py --task stompypro --log_h5 + python sim/play.py --task gpr --log_h5 """ import argparse import copy @@ -91,14 +91,23 @@ def play(args: argparse.Namespace) -> None: # export policy as a onnx module (used to run it on web) if args.export_onnx: - path = ppo_runner.alg.actor_critic - policy_cfg = ActorCfg() + path = ppo_runner.load_path + embodiment = ppo_runner.cfg['experiment_name'].lower() + policy_cfg = ActorCfg(embodiment=embodiment) + + if embodiment == "stompypro": + policy_cfg.cycle_time = 0.4 + elif embodiment == "stompymicro": + policy_cfg.cycle_time = 0.2 + else: + print(f"Specific policy cfg for {embodiment} not implemented") + actor_model, sim2sim_info, input_tensors = get_actor_policy(path, policy_cfg) # Merge policy_cfg and sim2sim_info into a single config object export_config = {**vars(policy_cfg), **sim2sim_info} - policy = export_to_onnx( + export_to_onnx( actor_model, input_tensors=input_tensors, config=export_config, diff --git a/sim/play_old.py b/sim/play_old.py new file mode 100755 index 00000000..d842fb83 --- /dev/null +++ b/sim/play_old.py @@ -0,0 +1,243 @@ +# mypy: ignore-errors +"""Play a trained policy in the environment. + +Run: + python sim/play.py --task g1 --log_h5 + python sim/play.py --task stompymini --log_h5 +""" +import argparse +import copy +import logging +import os +from datetime import datetime +from typing import Any, Union + +import cv2 +import h5py +import numpy as np +from isaacgym import gymapi +from tqdm import tqdm + +logger = logging.getLogger(__name__) + +from sim.env import run_dir # noqa: E402 +from sim.envs import task_registry # noqa: E402 +from sim.model_export import ActorCfg, convert_model_to_onnx # noqa: E402 +from sim.utils.helpers import get_args # noqa: E402 +from sim.utils.logger import Logger # noqa: E402 + +import torch # isort: skip + + +def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None: + os.makedirs(path, exist_ok=True) + path = os.path.join(path, "policy_1.pt") + model = copy.deepcopy(actor_critic.actor).to("cpu") + traced_script_module = torch.jit.script(model) + traced_script_module.save(path) + + +def play(args: argparse.Namespace) -> None: + logger.info("Configuring environment and training settings...") + env_cfg, train_cfg = task_registry.get_cfgs(name=args.task) + + # override some parameters for testing + env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1) + env_cfg.sim.max_gpu_contact_pairs = 2**10 + if args.trimesh: + env_cfg.terrain.mesh_type = "trimesh" + else: + env_cfg.terrain.mesh_type = "plane" + env_cfg.terrain.num_rows = 5 + env_cfg.terrain.num_cols = 5 + env_cfg.terrain.curriculum = False + env_cfg.terrain.max_init_terrain_level = 5 + env_cfg.noise.add_noise = True + env_cfg.domain_rand.push_robots = False + env_cfg.domain_rand.joint_angle_noise = 0.01 + env_cfg.noise.curriculum = False + env_cfg.noise.noise_level = 0.5 + + train_cfg.seed = 123145 + logger.info("train_cfg.runner_class_name: %s", train_cfg.runner_class_name) + + # prepare environment + env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg) + env.set_camera(env_cfg.viewer.pos, env_cfg.viewer.lookat) + + obs = env.get_observations() + + # load policy + train_cfg.runner.resume = True + ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg) + policy = ppo_runner.get_inference_policy(device=env.device) + + # # Export policy if needed + # if args.export_policy: + # path = os.path.join(".") + # export_policy_as_jit(ppo_runner.alg.actor_critic, path) + # print("Exported policy as jit script to: ", path) + + # export policy as a onnx module (used to run it on web) + # if args.export_onnx: + # path = ppo_runner.alg.actor_critic + # convert_model_to_onnx(path, ActorCfg(), save_path="policy.onnx") + # print("Exported policy as onnx to: ", path) + + # Prepare for logging + env_logger = Logger(env.dt) + robot_index = 0 + joint_index = 1 + stop_state_log = 1000 + now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + if args.log_h5: + h5_file = h5py.File(f"data{now}.h5", "w") + + # Create dataset for actions + max_timesteps = stop_state_log + num_dof = env.num_dof + dset_actions = h5_file.create_dataset("actions", (max_timesteps, num_dof), dtype=np.float32) + + # Create dataset of observations + buf_len = len(env.obs_history) # length of observation buffer + dset_2D_command = h5_file.create_dataset( + "observations/2D_command", (max_timesteps, buf_len, 2), dtype=np.float32 + ) # sin and cos commands + dset_3D_command = h5_file.create_dataset( + "observations/3D_command", (max_timesteps, buf_len, 3), dtype=np.float32 + ) # x, y, yaw commands + dset_q = h5_file.create_dataset( + "observations/q", (max_timesteps, buf_len, num_dof), dtype=np.float32 + ) # joint positions + dset_dq = h5_file.create_dataset( + "observations/dq", (max_timesteps, buf_len, num_dof), dtype=np.float32 + ) # joint velocities + dset_obs_actions = h5_file.create_dataset( + "observations/actions", (max_timesteps, buf_len, num_dof), dtype=np.float32 + ) # actions + dset_ang_vel = h5_file.create_dataset( + "observations/ang_vel", (max_timesteps, buf_len, 3), dtype=np.float32 + ) # root angular velocity + dset_euler = h5_file.create_dataset( + "observations/euler", (max_timesteps, buf_len, 3), dtype=np.float32 + ) # root orientation + + if args.render: + camera_properties = gymapi.CameraProperties() + camera_properties.width = 1920 + camera_properties.height = 1080 + h1 = env.gym.create_camera_sensor(env.envs[0], camera_properties) + camera_offset = gymapi.Vec3(3, -3, 1) + camera_rotation = gymapi.Quat.from_axis_angle(gymapi.Vec3(-0.3, 0.2, 1), np.deg2rad(135)) + actor_handle = env.gym.get_actor_handle(env.envs[0], 0) + body_handle = env.gym.get_actor_rigid_body_handle(env.envs[0], actor_handle, 0) + logger.info("body_handle: %s", body_handle) + logger.info("actor_handle: %s", actor_handle) + env.gym.attach_camera_to_body( + h1, env.envs[0], body_handle, gymapi.Transform(camera_offset, camera_rotation), gymapi.FOLLOW_POSITION + ) + + fourcc = cv2.VideoWriter_fourcc(*"MJPG") # type: ignore[attr-defined] + + # Creates a directory to store videos. + video_dir = run_dir() / "videos" + experiment_dir = video_dir / train_cfg.runner.experiment_name + experiment_dir.mkdir(parents=True, exist_ok=True) + + dir = os.path.join(experiment_dir, now + str(args.run_name) + ".mp4") + if not os.path.exists(video_dir): + os.mkdir(video_dir) + if not os.path.exists(experiment_dir): + os.mkdir(experiment_dir) + video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080)) + + for t in tqdm(range(stop_state_log)): + actions = policy(obs.detach()) + if args.log_h5: + dset_actions[t] = actions.detach().numpy() + if args.fix_command: + env.commands[:, 0] = 0.5 + env.commands[:, 1] = 0.0 + env.commands[:, 2] = 0.0 + env.commands[:, 3] = 0.0 + obs, critic_obs, rews, dones, infos = env.step(actions.detach()) + print(f"IMU: {obs[0, (3 * env.num_actions + 5) + 3 : (3 * env.num_actions + 5) + 2 * 3]}") + + if args.render: + env.gym.fetch_results(env.sim, True) + env.gym.step_graphics(env.sim) + env.gym.render_all_camera_sensors(env.sim) + img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR) + img = np.reshape(img, (1080, 1920, 4)) + img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR) + + video.write(img[..., :3]) + + # Log states + dof_pos_target = actions[robot_index, joint_index].item() * env.cfg.control.action_scale + dof_pos = env.dof_pos[robot_index, joint_index].item() + dof_vel = env.dof_vel[robot_index, joint_index].item() + dof_torque = env.torques[robot_index, joint_index].item() + command_x = env.commands[robot_index, 0].item() + command_y = env.commands[robot_index, 1].item() + command_yaw = env.commands[robot_index, 2].item() + base_vel_x = env.base_lin_vel[robot_index, 0].item() + base_vel_y = env.base_lin_vel[robot_index, 1].item() + base_vel_z = env.base_lin_vel[robot_index, 2].item() + base_vel_yaw = env.base_ang_vel[robot_index, 2].item() + contact_forces_z = env.contact_forces[robot_index, env.feet_indices, 2].cpu().numpy() + + if args.log_h5: + for i in range(buf_len): + cur_obs = env.obs_history[i].tolist()[0] + dset_2D_command[t, i] = cur_obs[0:2] # sin and cos commands + dset_3D_command[t, i] = cur_obs[2:5] # x, y, yaw commands + dset_q[t, i] = cur_obs[5 : 5 + num_dof] # joint positions + dset_dq[t, i] = cur_obs[5 + num_dof : 5 + 2 * num_dof] # joint velocities + dset_obs_actions[t, i] = cur_obs[5 + 2 * num_dof : 5 + 3 * num_dof] # actions + dset_ang_vel[t, i] = cur_obs[5 + 3 * num_dof : 8 + 3 * num_dof] # root angular velocity + dset_euler[t, i] = cur_obs[8 + 3 * num_dof : 11 + 3 * num_dof] # root orientation + + env_logger.log_states( + { + "dof_pos_target": dof_pos_target, + "dof_pos": dof_pos, + "dof_vel": dof_vel, + "dof_torque": dof_torque, + "command_x": command_x, + "command_y": command_y, + "command_yaw": command_yaw, + "base_vel_x": base_vel_x, + "base_vel_y": base_vel_y, + "base_vel_z": base_vel_z, + "base_vel_yaw": base_vel_yaw, + "contact_forces_z": contact_forces_z, + } + ) + if infos["episode"]: + num_episodes = env.reset_buf.sum().item() + if num_episodes > 0: + env_logger.log_rewards(infos["episode"], num_episodes) + + env_logger.print_rewards() + env_logger.plot_states() + + if args.render: + video.release() + + if args.log_h5: + print("Saving data to " + os.path.abspath(f"data{now}.h5")) + h5_file.close() + + +if __name__ == "__main__": + base_args = get_args() + parser = argparse.ArgumentParser(description="Extend base arguments with log_h5") + parser.add_argument("--log_h5", action="store_true", help="Enable HDF5 logging") + parser.add_argument("--render", action="store_true", help="Enable rendering", default=True) + parser.add_argument("--fix_command", action="store_true", help="Fix command", default=True) + parser.add_argument("--export_onnx", action="store_true", help="Export policy as ONNX", default=True) + parser.add_argument("--export_policy", action="store_true", help="Export policy as JIT", default=True) + args, unknown = parser.parse_known_args(namespace=base_args) + + play(args) diff --git a/sim/requirements.txt b/sim/requirements.txt index 8a025bea..ff8a57a2 100755 --- a/sim/requirements.txt +++ b/sim/requirements.txt @@ -10,6 +10,7 @@ numpy<2.0.0 wandb tensorboard==2.14.0 onnxscript +mujoco==2.3.6 # onnxruntime kinfer diff --git a/sim/resources/gpr/README.md b/sim/resources/gpr/README.md new file mode 100644 index 00000000..58ff4a70 --- /dev/null +++ b/sim/resources/gpr/README.md @@ -0,0 +1,13 @@ +# URDF/XML development: +1. kol run https://cad.onshape.com/documents/bc3a557e2f92bdcea4099f0d/w/09713ac603d461fc1dff6b5d/e/5a4acdbffe6f8ed7c4e34970 --config-path config_example.json # noqa: E501 +2. python sim/scripts/create_fixed_torso.py +3. Rotate first link + + + + + + +4. Fix left leg axes to align with expected directions +5. urf2mjcf robot.urdf -o robot.xml + \ No newline at end of file diff --git a/sim/resources/stompymini/__init__.py b/sim/resources/gpr/__init__.py similarity index 100% rename from sim/resources/stompymini/__init__.py rename to sim/resources/gpr/__init__.py diff --git a/sim/resources/stompypro/joints.py b/sim/resources/gpr/joints.py similarity index 75% rename from sim/resources/stompypro/joints.py rename to sim/resources/gpr/joints.py index 27c6c2c6..d1f966eb 100755 --- a/sim/resources/stompypro/joints.py +++ b/sim/resources/gpr/joints.py @@ -1,9 +1,4 @@ -"""Defines a more Pythonic interface for specifying the joint names. - -The best way to re-generate this snippet for a new robot is to use the -`sim/scripts/print_joints.py` script. This script will print out a hierarchical -tree of the various joint names in the robot. -""" +"""Defines a more Pythonic interface for specifying the joint names.""" import textwrap from abc import ABC @@ -96,8 +91,23 @@ class Robot(Node): legs = Legs() # arms = Arms() - height = 0.63 - rotation = [0.0, 0.0, 0, 1] + height = 1.06 + rotation = [0, 0, 0, 1] + + @classmethod + def isaac_to_real_signs(cls) -> Dict[str, int]: + return { + Robot.legs.left.hip_pitch: 1, + Robot.legs.left.hip_yaw: 1, + Robot.legs.left.hip_roll: 1, + Robot.legs.left.knee_pitch: 1, + Robot.legs.left.ankle_pitch: 1, + Robot.legs.right.hip_pitch: -1, + Robot.legs.right.hip_yaw: -1, + Robot.legs.right.hip_roll: 1, + Robot.legs.right.knee_pitch: -1, + Robot.legs.right.ankle_pitch: 1, + } @classmethod def isaac_to_mujoco_signs(cls) -> Dict[str, int]: @@ -118,37 +128,39 @@ def isaac_to_mujoco_signs(cls) -> Dict[str, int]: def default_positions(cls) -> Dict[str, float]: return {} - """This should be ordered according to how the policy is trained. E.g. the first - entry should be the angle of the first joint in the policy.""" - + # CONTRACT - this should be ordered according to how the policy is trained. + # E.g. the first entry should be the angle of the first joint in the policy. @classmethod def default_standing(cls) -> Dict[str, float]: return { Robot.legs.left.hip_pitch: -0.23, Robot.legs.left.hip_yaw: 0.0, Robot.legs.left.hip_roll: 0.0, - Robot.legs.left.knee_pitch: 0.441, - Robot.legs.left.ankle_pitch: -0.258, - Robot.legs.right.hip_pitch: -0.23, + Robot.legs.left.knee_pitch: -0.441, + Robot.legs.left.ankle_pitch: -0.195, + Robot.legs.right.hip_pitch: 0.23, Robot.legs.right.hip_yaw: 0.0, Robot.legs.right.hip_roll: 0.0, Robot.legs.right.knee_pitch: 0.441, - Robot.legs.right.ankle_pitch: -0.258, + Robot.legs.right.ankle_pitch: 0.195, } @classmethod def default_limits(cls) -> Dict[str, Dict[str, float]]: - return {} + return { + Robot.legs.left.knee_pitch: {"lower": -1.57, "upper": 0}, + Robot.legs.right.knee_pitch: {"lower": 0, "upper": 1.57} + } # p_gains @classmethod def stiffness(cls) -> Dict[str, float]: return { - "hip_y": 300, - "hip_x": 200, - "hip_z": 200, - "knee": 300, - "ankle_y": 300, + "hip_y": 120, + "hip_x": 60, + "hip_z": 60, + "knee": 120, + "ankle_y": 17, } # d_gains @@ -156,23 +168,22 @@ def stiffness(cls) -> Dict[str, float]: def damping(cls) -> Dict[str, float]: return { "hip_y": 10, - "hip_x": 10, - "hip_z": 10, + "hip_x": 5, + "hip_z": 5, "knee": 10, - "ankle_y": 10, + "ankle_y": 5, } # # pos_limits @classmethod def effort(cls) -> Dict[str, float]: return { - "hip_y": 250, + "hip_y": 120, "hip_x": 100, "hip_z": 100, - "knee": 250, - "ankle_y": 17, + "knee": 120, + "ankle_y": 50, } - # # vel_limits @classmethod def velocity(cls) -> Dict[str, float]: @@ -196,4 +207,4 @@ def print_joints() -> None: if __name__ == "__main__": # python -m sim.Robot.joints - print_joints() + print_joints() \ No newline at end of file diff --git a/sim/resources/gpr/meshes/arm1_top.stl b/sim/resources/gpr/meshes/arm1_top.stl new file mode 100644 index 00000000..d3937233 Binary files /dev/null and b/sim/resources/gpr/meshes/arm1_top.stl differ diff --git a/sim/resources/gpr/meshes/arm1_top_2.stl b/sim/resources/gpr/meshes/arm1_top_2.stl new file mode 100644 index 00000000..20808d7c Binary files /dev/null and b/sim/resources/gpr/meshes/arm1_top_2.stl differ diff --git a/sim/resources/gpr/meshes/arm2_shell.stl b/sim/resources/gpr/meshes/arm2_shell.stl new file mode 100644 index 00000000..d095688d Binary files /dev/null and b/sim/resources/gpr/meshes/arm2_shell.stl differ diff --git a/sim/resources/gpr/meshes/arm2_shell_2.stl b/sim/resources/gpr/meshes/arm2_shell_2.stl new file mode 100644 index 00000000..98302751 Binary files /dev/null and b/sim/resources/gpr/meshes/arm2_shell_2.stl differ diff --git a/sim/resources/gpr/meshes/arm3_shell.stl b/sim/resources/gpr/meshes/arm3_shell.stl new file mode 100644 index 00000000..dd787c59 Binary files /dev/null and b/sim/resources/gpr/meshes/arm3_shell.stl differ diff --git a/sim/resources/gpr/meshes/arm3_shell2.stl b/sim/resources/gpr/meshes/arm3_shell2.stl new file mode 100644 index 00000000..9690a4e5 Binary files /dev/null and b/sim/resources/gpr/meshes/arm3_shell2.stl differ diff --git a/sim/resources/gpr/meshes/body1-part.stl b/sim/resources/gpr/meshes/body1-part.stl new file mode 100644 index 00000000..83dc0912 Binary files /dev/null and b/sim/resources/gpr/meshes/body1-part.stl differ diff --git a/sim/resources/gpr/meshes/foot1.stl b/sim/resources/gpr/meshes/foot1.stl new file mode 100644 index 00000000..93874859 Binary files /dev/null and b/sim/resources/gpr/meshes/foot1.stl differ diff --git a/sim/resources/gpr/meshes/foot3.stl b/sim/resources/gpr/meshes/foot3.stl new file mode 100644 index 00000000..841369df Binary files /dev/null and b/sim/resources/gpr/meshes/foot3.stl differ diff --git a/sim/resources/gpr/meshes/hand_shell.stl b/sim/resources/gpr/meshes/hand_shell.stl new file mode 100644 index 00000000..4aff47dc Binary files /dev/null and b/sim/resources/gpr/meshes/hand_shell.stl differ diff --git a/sim/resources/gpr/meshes/hand_shell_2.stl b/sim/resources/gpr/meshes/hand_shell_2.stl new file mode 100644 index 00000000..4aff47dc Binary files /dev/null and b/sim/resources/gpr/meshes/hand_shell_2.stl differ diff --git a/sim/resources/gpr/meshes/leg0_shell.stl b/sim/resources/gpr/meshes/leg0_shell.stl new file mode 100644 index 00000000..c2a3806e Binary files /dev/null and b/sim/resources/gpr/meshes/leg0_shell.stl differ diff --git a/sim/resources/gpr/meshes/leg0_shell_2.stl b/sim/resources/gpr/meshes/leg0_shell_2.stl new file mode 100644 index 00000000..c2a3806e Binary files /dev/null and b/sim/resources/gpr/meshes/leg0_shell_2.stl differ diff --git a/sim/resources/gpr/meshes/leg1_shell.stl b/sim/resources/gpr/meshes/leg1_shell.stl new file mode 100644 index 00000000..b3b29475 Binary files /dev/null and b/sim/resources/gpr/meshes/leg1_shell.stl differ diff --git a/sim/resources/gpr/meshes/leg1_shell3.stl b/sim/resources/gpr/meshes/leg1_shell3.stl new file mode 100644 index 00000000..249d8151 Binary files /dev/null and b/sim/resources/gpr/meshes/leg1_shell3.stl differ diff --git a/sim/resources/gpr/meshes/leg2_shell.stl b/sim/resources/gpr/meshes/leg2_shell.stl new file mode 100644 index 00000000..a9e9d2c6 Binary files /dev/null and b/sim/resources/gpr/meshes/leg2_shell.stl differ diff --git a/sim/resources/gpr/meshes/leg2_shell_2.stl b/sim/resources/gpr/meshes/leg2_shell_2.stl new file mode 100644 index 00000000..a9e9d2c6 Binary files /dev/null and b/sim/resources/gpr/meshes/leg2_shell_2.stl differ diff --git a/sim/resources/gpr/meshes/leg3_shell2.stl b/sim/resources/gpr/meshes/leg3_shell2.stl new file mode 100644 index 00000000..6c8c68aa Binary files /dev/null and b/sim/resources/gpr/meshes/leg3_shell2.stl differ diff --git a/sim/resources/gpr/meshes/leg3_shell22.stl b/sim/resources/gpr/meshes/leg3_shell22.stl new file mode 100644 index 00000000..952779e8 Binary files /dev/null and b/sim/resources/gpr/meshes/leg3_shell22.stl differ diff --git a/sim/resources/gpr/meshes/shoulder.stl b/sim/resources/gpr/meshes/shoulder.stl new file mode 100644 index 00000000..90bc3924 Binary files /dev/null and b/sim/resources/gpr/meshes/shoulder.stl differ diff --git a/sim/resources/gpr/meshes/shoulder_2.stl b/sim/resources/gpr/meshes/shoulder_2.stl new file mode 100644 index 00000000..59c1316a Binary files /dev/null and b/sim/resources/gpr/meshes/shoulder_2.stl differ diff --git a/sim/resources/gpr/robot.urdf b/sim/resources/gpr/robot.urdf new file mode 100644 index 00000000..46370774 --- /dev/null +++ b/sim/resources/gpr/robot.urdf @@ -0,0 +1,616 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sim/resources/gpr/robot_fixed.urdf b/sim/resources/gpr/robot_fixed.urdf new file mode 100644 index 00000000..30b11861 --- /dev/null +++ b/sim/resources/gpr/robot_fixed.urdf @@ -0,0 +1,616 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/sim/resources/gpr/robot_fixed.xml b/sim/resources/gpr/robot_fixed.xml new file mode 100644 index 00000000..57330943 --- /dev/null +++ b/sim/resources/gpr/robot_fixed.xml @@ -0,0 +1,193 @@ + + \ No newline at end of file diff --git a/sim/resources/stompymini/joints.py b/sim/resources/stompymini/joints.py deleted file mode 100644 index 9d3a22b6..00000000 --- a/sim/resources/stompymini/joints.py +++ /dev/null @@ -1,426 +0,0 @@ -"""Defines a more Pythonic interface for specifying the joint names. - -The best way to re-generate this snippet for a new robot is to use the -`sim/scripts/print_joints.py` script. This script will print out a hierarchical -tree of the various joint names in the robot. -""" - -import textwrap -from abc import ABC -from typing import Dict, List, Tuple, Union - - -class Node(ABC): - @classmethod - def children(cls) -> List["Union[Node, str]"]: - return [ - attr - for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__")) - if isinstance(attr, (Node, str)) - ] - - @classmethod - def joints(cls) -> List[str]: - return [ - attr - for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__")) - if isinstance(attr, str) - ] - - @classmethod - def joints_motors(cls) -> List[Tuple[str, str]]: - joint_names: List[Tuple[str, str]] = [] - for attr in dir(cls): - if not attr.startswith("__"): - attr2 = getattr(cls, attr) - if isinstance(attr2, str): - joint_names.append((attr, attr2)) - - return joint_names - - @classmethod - def all_joints(cls) -> List[str]: - leaves = cls.joints() - for child in cls.children(): - if isinstance(child, Node): - leaves.extend(child.all_joints()) - return leaves - - def __str__(self) -> str: - parts = [str(child) for child in self.children()] - parts_str = textwrap.indent("\n" + "\n".join(parts), " ") - return f"[{self.__class__.__name__}]{parts_str}" - - -class LeftHand(Node): - wrist_roll = "left hand roll" - gripper = "left hand gripper" - - -class LeftArm(Node): - shoulder_yaw = "left shoulder yaw" - shoulder_pitch = "left shoulder pitch" - shoulder_roll = "left shoulder roll" - elbow_pitch = "left elbow pitch" - hand = LeftHand() - - -class RightHand(Node): - wrist_roll = "right hand roll" - gripper = "right hand gripper" - - -class RightArm(Node): - shoulder_yaw = "right shoulder yaw" - shoulder_pitch = "right shoulder pitch" - shoulder_roll = "right shoulder roll" - elbow_pitch = "right elbow pitch" - hand = RightHand() - - -class LeftLeg(Node): - hip_roll = "left hip roll" - hip_yaw = "left hip yaw" - hip_pitch = "left hip pitch" - knee_pitch = "left knee pitch" - ankle_pitch = "left ankle pitch" - - -class RightLeg(Node): - hip_roll = "right hip roll" - hip_yaw = "right hip yaw" - hip_pitch = "right hip pitch" - knee_pitch = "right knee pitch" - ankle_pitch = "right ankle pitch" - - -class Legs(Node): - left = LeftLeg() - right = RightLeg() - - -class Robot(Node): - height = 0.92 - rotation = [0.5000, -0.4996, -0.5000, 0.5004] - collision_links = [ - "lower_half_assembly_1_left_leg_1_foot_pad_1_simple", - "lower_half_assembly_1_right_leg_1_foot_pad_1_simple", - ] - - left_arm = LeftArm() - right_arm = RightArm() - legs = Legs() - - @classmethod - def default_standing(cls) -> Dict[str, float]: - return { - # arms - Robot.left_arm.shoulder_pitch: 2.25, - Robot.left_arm.shoulder_yaw: 1.57, - Robot.left_arm.shoulder_roll: 3.14, - Robot.left_arm.elbow_pitch: -1.61, - Robot.left_arm.hand.wrist_roll: -1.56, - Robot.left_arm.hand.gripper: -0.1, - Robot.right_arm.shoulder_pitch: 3.45, - Robot.right_arm.shoulder_yaw: -0.1, - Robot.right_arm.shoulder_roll: -1.61, - Robot.right_arm.elbow_pitch: -1.49, - Robot.right_arm.hand.wrist_roll: 0, - Robot.right_arm.hand.gripper: 0.1, - # legs - Robot.legs.left.hip_pitch: 0.33, - Robot.legs.left.hip_roll: -1.52, - Robot.legs.left.hip_yaw: 4.67, - Robot.legs.left.knee_pitch: -0.61, - Robot.legs.left.ankle_pitch: 1.88, - Robot.legs.right.hip_pitch: 2.91, - Robot.legs.right.hip_roll: 3.24, - Robot.legs.right.hip_yaw: 3.22, - Robot.legs.right.knee_pitch: 0.65, - Robot.legs.right.ankle_pitch: -0.54, - } - - @classmethod - def default_limits2(cls) -> Dict[str, Dict[str, float]]: - return { - # left arm - Robot.left_arm.shoulder_pitch: { - "lower": 2.54, - "upper": 2.56, - }, - Robot.left_arm.shoulder_yaw: { - "lower": 1.56, - "upper": 1.58, - }, - Robot.left_arm.shoulder_roll: { - "lower": 3.13, - "upper": 3.14, - }, - Robot.left_arm.elbow_pitch: { - "lower": -1.56, - "upper": -1.58, - }, - Robot.left_arm.hand.wrist_roll: { - "lower": -1.56, - "upper": -1.58, - }, - Robot.left_arm.hand.gripper: { - "lower": 0, - "upper": 1.57, - }, - # right arm - Robot.right_arm.shoulder_pitch: { - "lower": 3.119, - "upper": 3.121, - }, - Robot.right_arm.shoulder_yaw: { - "lower": 1.981, - "upper": 1.979, - }, - Robot.right_arm.shoulder_roll: { - "lower": -1.381, - "upper": -1.979, - }, - Robot.right_arm.elbow_pitch: { - "lower": -3.319, - "upper": 3.321, - }, - Robot.right_arm.hand.wrist_roll: { - "lower": -0.001, - "upper": 0.001, - }, - Robot.right_arm.hand.gripper: { - "lower": 0, - "upper": 1.57, - }, - # left leg - Robot.legs.left.hip_pitch: { - "lower": -1.14, - "upper": 1.14, - }, - Robot.legs.left.hip_roll: { - "lower": -3.5, - "upper": 0.5, - }, - Robot.legs.left.hip_yaw: { - "lower": 3.14, - "upper": 5.14, - }, - Robot.legs.left.knee_pitch: { - "lower": -2, - "upper": 0, - }, - Robot.legs.left.ankle_pitch: { - "lower": 1.4, - "upper": 2.2, - }, - # right leg - Robot.legs.right.hip_pitch: { - "lower": 0.55, - "upper": 3.55, - }, - Robot.legs.right.hip_roll: { - "lower": 2.75, - "upper": 3.99, - }, - Robot.legs.right.hip_yaw: { - "lower": 2.24, - "upper": 4.24, - }, - Robot.legs.right.knee_pitch: { - "lower": 0, - "upper": 2, - }, - Robot.legs.right.ankle_pitch: { - "lower": -1.0, - "upper": 0.2, - }, - } - - @classmethod - def default_limits(cls) -> Dict[str, Dict[str, float]]: - return { - # left arm - Robot.left_arm.shoulder_pitch: { - "lower": 2.04, - "upper": 3.06, - }, - Robot.left_arm.shoulder_yaw: { - "lower": -1, - "upper": 2, - }, - Robot.left_arm.shoulder_roll: { - "lower": 2.63, - "upper": 3.64, - }, - Robot.left_arm.elbow_pitch: { - "lower": -2.06, - "upper": -1.08, - }, - Robot.left_arm.hand.wrist_roll: { - "lower": -2.06, - "upper": -1.08, - }, - Robot.left_arm.hand.gripper: { - "lower": -0.5, - "upper": 2.07, - }, - # right arm - Robot.right_arm.shoulder_pitch: { - "lower": 2.619, - "upper": 3.621, - }, - Robot.right_arm.shoulder_yaw: { - "lower": -1.481, - "upper": 1, - }, - Robot.right_arm.shoulder_roll: { - "lower": -1.881, - "upper": -1.479, - }, - Robot.right_arm.elbow_pitch: { - "lower": -3.819, - "upper": 3.821, - }, - Robot.right_arm.hand.wrist_roll: { - "lower": -0.501, - "upper": 0.501, - }, - Robot.right_arm.hand.gripper: { - "lower": -0.5, - "upper": 2.07, - }, - # left leg - Robot.legs.left.hip_pitch: { - "lower": -1.64, - "upper": 1.64, - }, - Robot.legs.left.hip_roll: { - "lower": -4.0, - "upper": 1.0, - }, - Robot.legs.left.hip_yaw: { - "lower": 2.64, - "upper": 5.64, - }, - Robot.legs.left.knee_pitch: { - "lower": -2.5, - "upper": 0.5, - }, - Robot.legs.left.ankle_pitch: { - "lower": 0.9, - "upper": 2.7, - }, - # right leg - Robot.legs.right.hip_pitch: { - "lower": 0.05, - "upper": 4.05, - }, - Robot.legs.right.hip_roll: { - "lower": 2.25, - "upper": 4.49, - }, - Robot.legs.right.hip_yaw: { - "lower": 1.74, - "upper": 4.74, - }, - Robot.legs.right.knee_pitch: { - "lower": -0.5, - "upper": 2.5, - }, - Robot.legs.right.ankle_pitch: { - "lower": -1.5, - "upper": 0.7, - }, - } - - # p_gains - @classmethod - def stiffness(cls) -> Dict[str, float]: - return { - "hip pitch": 250, - "hip yaw": 250, - "hip roll": 150, - "knee pitch": 250, - "ankle pitch": 150, - "shoulder pitch": 150, - "shoulder yaw": 45, - "shoulder roll": 45, - "elbow pitch": 45, - "hand roll": 45, - "gripper": 45, - } - - # d_gains - @classmethod - def damping(cls) -> Dict[str, float]: - return { - "hip pitch": 10, - "hip yaw": 10, - "hip roll": 10, - "knee pitch": 10, - "ankle pitch": 10, - "shoulder pitch": 10, - "shoulder yaw": 10, - "shoulder roll": 5, - "elbow pitch": 5, - "hand roll": 5, - "gripper": 5, - } - - # pos_limits - @classmethod - def effort(cls) -> Dict[str, float]: - return { - "hip pitch": 120, - "hip yaw": 120, - "hip roll": 17, - "knee pitch": 120, - "ankle pitch": 17, - "shoulder pitch": 120, - "shoulder yaw": 17, - "shoulder roll": 17, - "elbow pitch": 17, - "hand roll": 17, - "gripper": 17, - } - - # vel_limits - @classmethod - def velocity(cls) -> Dict[str, float]: - return { - "hip pitch": 40, - "hip yaw": 40, - "hip roll": 40, - "knee pitch": 40, - "ankle pitch": 40, - "shoulder pitch": 40, - "shoulder yaw": 40, - "shoulder roll": 40, - "elbow pitch": 40, - "hand roll": 40, - "gripper": 40, - } - - @classmethod - def friction(cls) -> Dict[str, float]: - return { - "hip pitch": 0.0, - "hip yaw": 0.0, - "hip roll": 0.0, - "knee pitch": 0.0, - "ankle pitch": 0.0, - "hand roll": 0.0, - "gripper": 0.0, - } - - -def print_joints() -> None: - joints = Robot.all_joints() - assert len(joints) == len(set(joints)), "Duplicate joint names found!" - print(Robot()) - - -if __name__ == "__main__": - # python -m sim.Robot.joints - print_joints() diff --git a/sim/resources/stompymini/robot.urdf b/sim/resources/stompymini/robot.urdf deleted file mode 100644 index d5c4059c..00000000 --- a/sim/resources/stompymini/robot.urdf +++ /dev/null @@ -1,663 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sim/resources/stompymini/robot_fixed.urdf b/sim/resources/stompymini/robot_fixed.urdf deleted file mode 100644 index ebf74644..00000000 --- a/sim/resources/stompymini/robot_fixed.urdf +++ /dev/null @@ -1,662 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/sim/resources/stompypro/__init__.py b/sim/resources/stompypro/__init__.py deleted file mode 100755 index e69de29b..00000000 diff --git a/sim/resources/stompypro/robot.urdf b/sim/resources/stompypro/robot.urdf deleted file mode 100644 index 2e1c1a73..00000000 --- a/sim/resources/stompypro/robot.urdf +++ /dev/null @@ -1,499 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/sim/resources/stompypro/robot_calibration.xml b/sim/resources/stompypro/robot_calibration.xml deleted file mode 100644 index 7cb3c9f6..00000000 --- a/sim/resources/stompypro/robot_calibration.xml +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/sim/resources/stompypro/robot_fixed.urdf b/sim/resources/stompypro/robot_fixed.urdf deleted file mode 100644 index a8b1f04c..00000000 --- a/sim/resources/stompypro/robot_fixed.urdf +++ /dev/null @@ -1,522 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/sim/resources/stompypro/robot_fixed.xml b/sim/resources/stompypro/robot_fixed.xml deleted file mode 100644 index 63345107..00000000 --- a/sim/resources/stompypro/robot_fixed.xml +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - - - - - diff --git a/sim/scripts/create_fixed_torso.py b/sim/scripts/create_fixed_torso.py index 7002eb9b..0a1cf123 100755 --- a/sim/scripts/create_fixed_torso.py +++ b/sim/scripts/create_fixed_torso.py @@ -52,8 +52,8 @@ def update_urdf(model_path: str, embodiment: str) -> None: def main() -> None: parser = argparse.ArgumentParser(description="Update URDF file to fix robot joints.") - parser.add_argument("--model_path", type=str, help="Path to the model directory", default="sim/resources/stompypro") - parser.add_argument("--embodiment", type=str, help="Embodiment to use", default="stompypro") + parser.add_argument("--model_path", type=str, help="Path to the model directory", default="sim/resources/gpr") + parser.add_argument("--embodiment", type=str, help="Embodiment to use", default="gpr") args = parser.parse_args() update_urdf(args.model_path, args.embodiment) diff --git a/sim/sim2sim.py b/sim/sim2sim.py index 59955505..dfc19bd4 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -1,8 +1,7 @@ -""" -Difference setup -python sim/play.py --task mini_ppo --sim_device cpu -python sim/sim2sim.py --load_model examples/standing_pro.pt --embodiment stompypro -python sim/sim2sim.py --load_model examples/standing_micro.pt --embodiment stompymicro +"""Sim2sim deployment test. + +Run: + python sim/sim2sim.py --load_model examples/walking_policy.onnx --embodiment gpr """ import argparse import math @@ -32,7 +31,7 @@ class Sim2simCfg: dt: float = 0.001 decimation: int = 10 tau_factor: float = 3 - cycle_time: float = 0.4 + cycle_time: float = 0.25 @@ -316,8 +315,8 @@ def run_mujoco( x_vel_cmd, y_vel_cmd, yaw_vel_cmd = np.random.uniform(0.1, 0.5), 0.0, 0.0 policy_cfg = ActorCfg(embodiment=args.embodiment) - if args.embodiment == "stompypro": - policy_cfg.cycle_time = 0.4 + if args.embodiment == "gpr": + policy_cfg.cycle_time = 0.25 cfg = Sim2simCfg( sim_duration=10.0, dt=0.001, @@ -325,7 +324,7 @@ def run_mujoco( tau_factor=4.0, cycle_time=policy_cfg.cycle_time, ) - elif args.embodiment == "stompymicro": + elif args.embodiment == "zeroth": policy_cfg.cycle_time = 0.2 cfg = Sim2simCfg( sim_duration=10.0, @@ -361,7 +360,6 @@ def run_mujoco( "robot_damping": metadata["robot_damping"], } - run_mujoco( args.embodiment, policy, @@ -371,4 +369,4 @@ def run_mujoco( args.log_h5, args.render, args.h5_out_dir, - ) + ) \ No newline at end of file diff --git a/sim/sim2sim_old.py b/sim/sim2sim_old.py new file mode 100755 index 00000000..fd761aa8 --- /dev/null +++ b/sim/sim2sim_old.py @@ -0,0 +1,354 @@ +""" +python sim/sim2sim.py --load_model examples/standing_pro.pt --embodiment gpr +python sim/sim2sim.py --load_model examples/standing_micro.pt --embodiment zeroth +""" +import argparse +import math +import os +from copy import deepcopy +from dataclasses import dataclass +from typing import Dict, List, Tuple, Union + +import mujoco +import mujoco_viewer +import numpy as np +import onnxruntime as ort +import pygame +from scipy.spatial.transform import Rotation as R +from tqdm import tqdm + +# from sim.h5_logger import HDF5Logger +from sim.model_export import ActorCfg, convert_model_to_onnx + + +@dataclass +class Sim2simCfg: + sim_duration: float = 60.0 + dt: float = 0.001 + decimation: int = 10 + tau_factor: float = 3 + cycle_time: float = 0.4 + + +def handle_keyboard_input() -> None: + global x_vel_cmd, y_vel_cmd, yaw_vel_cmd + + keys = pygame.key.get_pressed() + + # Update movement commands based on arrow keys + if keys[pygame.K_UP]: + x_vel_cmd += 0.0005 + if keys[pygame.K_DOWN]: + x_vel_cmd -= 0.0005 + if keys[pygame.K_LEFT]: + y_vel_cmd += 0.0005 + if keys[pygame.K_RIGHT]: + y_vel_cmd -= 0.0005 + + # Yaw control + if keys[pygame.K_a]: + yaw_vel_cmd += 0.001 + if keys[pygame.K_z]: + yaw_vel_cmd -= 0.001 + + +def quaternion_to_euler_array(quat: np.ndarray) -> np.ndarray: + # Ensure quaternion is in the correct format [x, y, z, w] + x, y, z, w = quat + + # Roll (x-axis rotation) + t0 = +2.0 * (w * x + y * z) + t1 = +1.0 - 2.0 * (x * x + y * y) + roll_x = np.arctan2(t0, t1) + + # Pitch (y-axis rotation) + t2 = +2.0 * (w * y - z * x) + t2 = np.clip(t2, -1.0, 1.0) + pitch_y = np.arcsin(t2) + + # Yaw (z-axis rotation) + t3 = +2.0 * (w * z + x * y) + t4 = +1.0 - 2.0 * (y * y + z * z) + yaw_z = np.arctan2(t3, t4) + + # Returns roll, pitch, yaw in a NumPy array in radians + return np.array([roll_x, pitch_y, yaw_z]) + + +def get_obs(data: mujoco.MjData) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """Extracts an observation from the mujoco data structure""" + q = data.qpos.astype(np.double) + dq = data.qvel.astype(np.double) + quat = data.sensor("orientation").data[[1, 2, 3, 0]].astype(np.double) + r = R.from_quat(quat) + v = r.apply(data.qvel[:3], inverse=True).astype(np.double) # In the base frame + omega = data.sensor("angular-velocity").data.astype(np.double) + gvec = r.apply(np.array([0.0, 0.0, -1.0]), inverse=True).astype(np.double) + return (q, dq, quat, v, omega, gvec) + + +def pd_control( + target_q: np.ndarray, + q: np.ndarray, + kp: np.ndarray, + dq: np.ndarray, + kd: np.ndarray, + default: np.ndarray, +) -> np.ndarray: + """Calculates torques from position commands""" + return kp * (target_q + default - q) - kd * dq + + +def run_mujoco( + embodiment: str, + policy: ort.InferenceSession, + cfg: Sim2simCfg, + model_info: Dict[str, Union[float, List[float], str]], + keyboard_use: bool = False, + log_h5: bool = False, + render: bool = True, + h5_out_dir: str = "sim/resources", +) -> None: + """ + Run the Mujoco simulation using the provided policy and configuration. + + Args: + policy: The policy used for controlling the simulation. + cfg: The configuration object containing simulation settings. + + Returns: + None + """ + model_dir = os.environ.get("MODEL_DIR", "sim/resources") + mujoco_model_path = f"{model_dir}/{embodiment}/robot_fixed.xml" + + model = mujoco.MjModel.from_xml_path(mujoco_model_path) + model.opt.timestep = cfg.dt + data = mujoco.MjData(model) + + assert isinstance(model_info["num_actions"], int) + assert isinstance(model_info["num_observations"], int) + assert isinstance(model_info["robot_effort"], list) + assert isinstance(model_info["robot_stiffness"], list) + assert isinstance(model_info["robot_damping"], list) + + tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * cfg.tau_factor + kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"])) + kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"])) + + try: + data.qpos = model.keyframe("default").qpos + default = deepcopy(model.keyframe("default").qpos)[-model_info["num_actions"] :] + print("Default position:", default) + except: + print("No default position found, using zero initialization") + default = np.zeros(model_info["num_actions"]) # 3 for pos, 4 for quat, cfg.num_actions for joints + default += np.random.uniform(-0.03, 0.03, size=default.shape) + print("Default position:", default) + mujoco.mj_step(model, data) + for ii in range(len(data.ctrl) + 1): + print(data.joint(ii).id, data.joint(ii).name) + + data.qvel = np.zeros_like(data.qvel) + data.qacc = np.zeros_like(data.qacc) + + if render: + viewer = mujoco_viewer.MujocoViewer(model, data) + + target_q = np.zeros((model_info["num_actions"]), dtype=np.double) + prev_actions = np.zeros((model_info["num_actions"]), dtype=np.double) + hist_obs = np.zeros((model_info["num_observations"]), dtype=np.double) + + count_lowlevel = 0 + + input_data = { + "x_vel.1": np.zeros(1).astype(np.float32), + "y_vel.1": np.zeros(1).astype(np.float32), + "rot.1": np.zeros(1).astype(np.float32), + "t.1": np.zeros(1).astype(np.float32), + "dof_pos.1": np.zeros(model_info["num_actions"]).astype(np.float32), + "dof_vel.1": np.zeros(model_info["num_actions"]).astype(np.float32), + "prev_actions.1": np.zeros(model_info["num_actions"]).astype(np.float32), + "imu_ang_vel.1": np.zeros(3).astype(np.float32), + "imu_euler_xyz.1": np.zeros(3).astype(np.float32), + "buffer.1": np.zeros(model_info["num_observations"]).astype(np.float32), + } + + if log_h5: + stop_state_log = int(cfg.sim_duration / cfg.dt) / cfg.decimation + logger = HDF5Logger( + data_name=embodiment, + num_actions=model_info["num_actions"], + max_timesteps=stop_state_log, + num_observations=model_info["num_observations"], + h5_out_dir=h5_out_dir + ) + + # Initialize variables for tracking upright steps and average speed + upright_steps = 0 + total_speed = 0.0 + step_count = 0 + + for _ in tqdm(range(int(cfg.sim_duration / cfg.dt)), desc="Simulating..."): + if keyboard_use: + handle_keyboard_input() + + # Obtain an observation + q, dq, quat, v, omega, gvec = get_obs(data) + q = q[-model_info["num_actions"] :] + dq = dq[-model_info["num_actions"] :] + + eu_ang = quaternion_to_euler_array(quat) + eu_ang[eu_ang > math.pi] -= 2 * math.pi + + # Check if the robot is upright (roll and pitch within ±30 degrees) + if abs(eu_ang[0]) > math.radians(30) or abs(eu_ang[1]) > math.radians(30): + print("Robot tilted heavily, ending simulation.") + break + else: + upright_steps += 1 # Increment upright steps + + # Calculate speed and accumulate for average speed calculation + speed = np.linalg.norm(v[:2]) # Speed in the x-y plane + total_speed += speed + step_count += 1 + + # 1000hz -> Nhz + if count_lowlevel % cfg.decimation == 0: + # Convert sim coordinates to policy coordinates + cur_pos_obs = q - default + cur_vel_obs = dq + + input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32) + input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32) + input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32) + + input_data["t.1"] = np.array([count_lowlevel * cfg.dt], dtype=np.float32) + + input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32) + input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32) + + input_data["prev_actions.1"] = prev_actions.astype(np.float32) + + input_data["imu_ang_vel.1"] = omega.astype(np.float32) + input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32) + + input_data["buffer.1"] = hist_obs.astype(np.float32) + + positions, curr_actions, hist_obs = policy.run(None, input_data) + target_q = positions + prev_actions = curr_actions + + # Generate PD control + tau = pd_control(target_q, q, kps, dq, kds, default) # Calc torques + tau = np.clip(tau, -tau_limit, tau_limit) # Clamp torques + + data.ctrl = tau + mujoco.mj_step(model, data) + + if render: + viewer.render() + count_lowlevel += 1 + + if render: + viewer.close() + + # Calculate average speed + if step_count > 0: + average_speed = total_speed / step_count + else: + average_speed = 0.0 + + # Save or print the statistics at the end of the episode + print(f"Number of upright steps: {upright_steps}") + print(f"Average speed: {average_speed:.4f} m/s") + + if log_h5: + logger.close() + + +def parse_modelmeta( + modelmeta: List[Tuple[str, str]], + verbose: bool = False, +) -> Dict[str, Union[float, List[float], str]]: + parsed_meta: Dict[str, Union[float, List[float], str]] = {} + for key, value in modelmeta: + if value.startswith("[") and value.endswith("]"): + parsed_meta[key] = list(map(float, value.strip("[]").split(","))) + else: + try: + parsed_meta[key] = float(value) + try: + if int(value) == parsed_meta[key]: + parsed_meta[key] = int(value) + except ValueError: + pass + except ValueError: + print(f"Failed to convert {value} to float") + parsed_meta[key] = value + if verbose: + for key, value in parsed_meta.items(): + print(f"{key}: {value}") + return parsed_meta + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Deployment script.") + parser.add_argument("--embodiment", type=str, required=True, help="Embodiment name.") + parser.add_argument("--load_model", type=str, required=True, help="Path to run to load from.") + parser.add_argument("--keyboard_use", action="store_true", help="keyboard_use") + parser.add_argument("--log_h5", action="store_true", help="log_h5") + parser.add_argument("--h5_out_dir", type=str, default="sim/resources", help="Directory to save HDF5 files") + parser.add_argument("--no_render", action="store_false", dest="render", help="Disable rendering") + parser.set_defaults(render=True) + args = parser.parse_args() + + if args.keyboard_use: + x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.0, 0.0, 0.0 + pygame.init() + pygame.display.set_caption("Simulation Control") + else: + x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.4, 0.0, 0.0 + + policy_cfg = ActorCfg(embodiment=args.embodiment) + if args.embodiment == "gpr": + policy_cfg.cycle_time = 0.64 + cfg = Sim2simCfg( + sim_duration=10.0, + dt=0.001, + decimation=10, + tau_factor=4.0, + cycle_time=policy_cfg.cycle_time, + ) + elif args.embodiment == "zeroth": + policy_cfg.cycle_time = 0.2 + cfg = Sim2simCfg( + sim_duration=10.0, + dt=0.001, + decimation=10, + tau_factor=1, + cycle_time=policy_cfg.cycle_time, + ) + + if args.load_model.endswith(".onnx"): + policy = ort.InferenceSession(args.load_model) + else: + policy = convert_model_to_onnx( + args.load_model, policy_cfg, save_path="policy.onnx" + ) + + model_info = parse_modelmeta( + policy.get_modelmeta().custom_metadata_map.items(), + verbose=True, + ) + + run_mujoco( + args.embodiment, + policy, + cfg, + model_info, + args.keyboard_use, + args.log_h5, + args.render, + args.h5_out_dir, + )