From f8e1655decd7d23cfb02abede0076e0941fded23 Mon Sep 17 00:00:00 2001 From: Benjamin Bolte Date: Mon, 1 Apr 2024 20:32:45 -0700 Subject: [PATCH] commit stuff --- Makefile | 8 +-- sim/humanoid_gym/envs/humanoid_config.py | 58 ++++++++++--------- sim/humanoid_gym/envs/humanoid_env.py | 72 ++++++++++++++---------- sim/scripts/drop_urdf.py | 11 ++-- 4 files changed, 84 insertions(+), 65 deletions(-) diff --git a/Makefile b/Makefile index f951604c..01894632 100644 --- a/Makefile +++ b/Makefile @@ -26,13 +26,13 @@ all: # Train # # ------------------------ # -train-one: +train-one-vis: @python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1 -train: - @python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1024 +train-many-vis: + @python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 16 -train-full: +train: @python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 2048 --headless play: diff --git a/sim/humanoid_gym/envs/humanoid_config.py b/sim/humanoid_gym/envs/humanoid_config.py index f0137c07..2de3068f 100644 --- a/sim/humanoid_gym/envs/humanoid_config.py +++ b/sim/humanoid_gym/envs/humanoid_config.py @@ -21,7 +21,7 @@ class env(LeggedRobotCfg.env): # noqa: N801 num_privileged_obs = int(c_frame_stack * single_num_privileged_obs) num_actions = NUM_JOINTS # Torque command for each joint. num_envs = 4096 - episode_length_s = 24 # Maximum episode length + episode_length_s = 8 # Maximum episode length use_ref_actions = False class safety: # noqa: N801 @@ -53,8 +53,8 @@ class terrain(LeggedRobotCfg.terrain): # noqa: N801 curriculum = False # For rough terrain only: measure_heights = False - static_friction = 0.6 - dynamic_friction = 0.6 + static_friction = 1.0 + dynamic_friction = 1.0 terrain_length = 8.0 terrain_width = 8.0 num_rows = 20 # Number of terrain rows (levels) @@ -62,11 +62,11 @@ class terrain(LeggedRobotCfg.terrain): # noqa: N801 max_init_terrain_level = 10 # Starting curriculum state # plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0] - restitution = 0.0 + restitution = 1.0 class noise: # noqa: N801 add_noise = True - noise_level = 0.6 # Scales other values + noise_level = 0.1 # Scales other values class noise_scales: # noqa: N801 dof_pos = 0.05 @@ -82,18 +82,18 @@ class init_state(LeggedRobotCfg.init_state): # noqa: N801 class sim(LeggedRobotCfg.sim): # noqa: N801 dt = 0.005 - substeps = 2 + substeps = 1 class physx(LeggedRobotCfg.sim.physx): # noqa: N801 num_threads = 10 solver_type = 1 num_position_iterations = 4 num_velocity_iterations = 1 - contact_offset = 0.01 - rest_offset = -0.02 + contact_offset = -0.01 + rest_offset = -0.015 bounce_threshold_velocity = 0.5 - max_depenetration_velocity = 1.0 - max_gpu_contact_pairs = 2**24 + max_depenetration_velocity = 10.0 + max_gpu_contact_pairs = 2**23 default_buffer_size_multiplier = 5 contact_collection = 2 # gymapi.CC_ALL_SUBSTEPS @@ -113,7 +113,7 @@ class commands(LeggedRobotCfg.commands): # noqa: N801 resampling_time = 8.0 class rewards: # noqa: N801 - base_height_target = 1.1 + # base_height_target = 1.1 min_dist = 0.2 max_dist = 0.5 @@ -122,8 +122,8 @@ class rewards: # noqa: N801 target_feet_height = 0.06 # m cycle_time = 0.64 # sec - # If truem negative total rewards are clipped at zero. - only_positive_rewards = True + # If true, negative total rewards are clipped at zero. + only_positive_rewards = False # Max contact force should be a bit above the weight of the robot. In # our case the robot weighs 62 Kg, so we set it to 700. @@ -153,13 +153,18 @@ class scales: # noqa: N801 # Base position base_height = 1.0 - base_acc = 0.1 + # base_acc = 0.1 + base_acc = 0.0 # Energy - torques = -1e-5 - dof_vel = -5e-4 - dof_acc = -1e-7 - collision = -1e-2 + # torques = -1e-6 + # dof_vel = -5e-5 + # dof_acc = -1e-8 + # collision = -1e-2 + torques = 0.0 + dof_vel = 0.0 + dof_acc = 0.0 + collision = 0.0 class normalization: # noqa: N801 class obs_scales: # noqa: N801 @@ -170,8 +175,8 @@ class obs_scales: # noqa: N801 quat = 1.0 height_measurements = 5.0 - clip_observations = 18.0 - clip_actions = 18.0 + clip_observations = 100.0 + clip_actions = 100.0 class StompyPPO(LeggedRobotCfgPPO): @@ -180,16 +185,17 @@ class StompyPPO(LeggedRobotCfgPPO): class policy: # noqa: N801 init_noise_std = 1.0 - actor_hidden_dims = [512, 256, 128] - critic_hidden_dims = [768, 256, 128] + actor_hidden_dims = [512, 512, 256] + critic_hidden_dims = [1024, 512, 256] class algorithm(LeggedRobotCfgPPO.algorithm): # noqa: N801 - entropy_coef = 0.001 - learning_rate = 1e-5 + entropy_coef = 0.01 + learning_rate = 3e-4 num_learning_epochs = 2 - gamma = 0.994 - lam = 0.9 + gamma = 0.99 + lam = 0.95 num_mini_batches = 4 + max_grad_norm = 0.5 class runner: # noqa: N801 policy_class_name = "ActorCritic" diff --git a/sim/humanoid_gym/envs/humanoid_env.py b/sim/humanoid_gym/envs/humanoid_env.py index 214610b2..0dcfc126 100644 --- a/sim/humanoid_gym/envs/humanoid_env.py +++ b/sim/humanoid_gym/envs/humanoid_env.py @@ -6,6 +6,7 @@ import torch from humanoid.envs import LeggedRobot from humanoid.utils.terrain import HumanoidTerrain +from isaacgym import gymtorch from torch import Tensor from sim.humanoid_gym.envs.humanoid_config import StompyCfg @@ -78,31 +79,19 @@ def __init__( self.compute_observations() def _push_robots(self) -> None: - # """Random pushes the robots. Emulates an impulse by setting a randomized base velocity.""" - # max_vel = self.cfg.domain_rand.max_push_vel_xy - # max_push_angular = self.cfg.domain_rand.max_push_ang_vel - - # # Linear velocity in the X / Y axes. - # self.rand_push_force[:, :2] = torch_rand_float( - # -max_vel, - # max_vel, - # (self.num_envs, 2), - # device=self.device, - # ) - # self.root_states[:, 7:9] = self.rand_push_force[:, :2] - - # # Random torques in all three axes. - # self.rand_push_torque = torch_rand_float( - # -max_push_angular, - # max_push_angular, - # (self.num_envs, 3), - # device=self.device, - # ) - # self.root_states[:, 10:13] = self.rand_push_torque - - # self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states)) - - pass + """Random pushes the robots. Emulates an impulse by setting a randomized base velocity.""" + max_vel = self.cfg.domain_rand.max_push_vel_xy + max_push_angular = self.cfg.domain_rand.max_push_ang_vel + + # Random forces in the X and Y axes. + self.rand_push_force[:, :2] = (torch.rand((self.num_envs, 2), device=self.device) * 2 - 1) * max_vel + self.root_states[:, 7:9] = self.rand_push_force[:, :2] + + # Random torques in all three axes. + self.rand_push_torque = (torch.rand((self.num_envs, 3), device=self.device) * 2 - 1) * max_push_angular + self.root_states[:, 10:13] = self.rand_push_torque + + self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states)) def create_sim(self) -> None: """Creates simulation, terrain and evironments.""" @@ -219,19 +208,42 @@ def reset_idx(self, env_ids: Tensor) -> None: for i in range(self.critic_history.maxlen): self.critic_history[i][env_ids] *= 0 + def _compute_torques(self, actions: Tensor) -> Tensor: + # Need to override this so we can just use the motor torques directly. + # return torch.clip(actions * self.cfg.control.action_scale, -self.torque_limits, self.torque_limits) + return torch.tanh(actions * self.cfg.control.action_scale) * self.torque_limits + + def check_termination(self) -> None: + self.reset_buf = self.episode_length_buf > self.max_episode_length + + def _reset_dofs(self, env_ids: Tensor) -> Tensor: + # Resets the DOF positions to random positions within their limits. + min_pos, max_pos = self.dof_pos_limits.unbind(1) + rand_pos = torch.rand((len(env_ids), self.num_dof), device=self.device) + self.dof_pos[env_ids] = min_pos[None, :] + rand_pos * (max_pos - min_pos)[None, :] + + self.dof_vel[env_ids] = 0.0 + + env_ids_int32 = env_ids.to(dtype=torch.int32) + self.gym.set_dof_state_tensor_indexed( + self.sim, + gymtorch.unwrap_tensor(self.dof_state), + gymtorch.unwrap_tensor(env_ids_int32), + len(env_ids_int32), + ) + def _reward_base_height(self) -> Tensor: """Calculates the reward based on the robot's base height. - This rewards the robot for being close to the target height without - going over (we actually penalise it for going over). + This rewards the robot for keeping the feet as far below the base as + possible. Returns: The reward for maximizing the base height. """ base_height = self.root_states[:, 2] - reward = base_height / self.cfg.rewards.base_height_target - reward[reward > 1.0] = 0.0 - return reward + max_foot_height = self.rigid_state[:, self.feet_indices, 2].max(dim=1).values + return base_height - max_foot_height def _reward_base_acc(self) -> Tensor: """Computes the reward based on the base's acceleration. diff --git a/sim/scripts/drop_urdf.py b/sim/scripts/drop_urdf.py index 3624a282..d03e00e8 100644 --- a/sim/scripts/drop_urdf.py +++ b/sim/scripts/drop_urdf.py @@ -66,11 +66,11 @@ def load_gym() -> GymParams: sim_params.physx.solver_type = 1 sim_params.physx.num_position_iterations = 4 - sim_params.physx.num_velocity_iterations = 0 - sim_params.physx.contact_offset = 0.01 - sim_params.physx.rest_offset = -0.02 + sim_params.physx.num_velocity_iterations = 1 + sim_params.physx.contact_offset = -0.01 + sim_params.physx.rest_offset = -0.015 sim_params.physx.bounce_threshold_velocity = 0.5 - sim_params.physx.max_depenetration_velocity = 1.0 + sim_params.physx.max_depenetration_velocity = 10.0 sim_params.physx.max_gpu_contact_pairs = 2**24 sim_params.physx.default_buffer_size_multiplier = 5 sim_params.physx.contact_collection = gymapi.CC_ALL_SUBSTEPS @@ -99,6 +99,7 @@ def load_gym() -> GymParams: # Add ground plane. plane_params = gymapi.PlaneParams() + plane_params.restitution = 1.0 gym.add_ground(sim, plane_params) # Set up the environment grid. @@ -152,7 +153,7 @@ def run_gym(gym: GymParams, mode: Literal["one_at_a_time", "all_at_once"] = "all body_ids: List[str] = gym.gym.get_actor_rigid_body_names(gym.env, gym.robot) joint_id = 0 - effort = 10.0 + effort = 5.0 while not gym.gym.query_viewer_has_closed(gym.viewer): gym.gym.simulate(gym.sim)