play script

kscalelabs · Apr 2, 2024 · fa55377 · fa55377
1 parent 6d7de25
commit fa55377
Show file tree

Hide file tree

Showing 6 changed files with 185 additions and 106 deletions.
diff --git a/Makefile b/Makefile
@@ -30,7 +30,13 @@ train-one:
 	@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1
 
 train:
-	@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1024 --headless
+	@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1024
+
+train-full:
+	@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 2048 --headless
+
+play:
+	@python -m sim.humanoid_gym.play --task humanoid_ppo --run_name v1
 
 # ------------------------ #
 #          Build           #

diff --git a/sim/env.py b/sim/env.py
@@ -8,6 +8,10 @@ def model_dir() -> Path:
     return Path(os.environ.get("MODEL_DIR", "models"))
 
 
+def run_dir() -> Path:
+    return Path(os.environ.get("RUN_DIR", "runs"))
+
+
 def stompy_urdf_path() -> Path:
     stompy_path = model_dir() / "robots" / "stompy" / "robot.urdf"
     if not stompy_path.exists():

diff --git a/sim/humanoid_gym/envs/humanoid_config.py b/sim/humanoid_gym/envs/humanoid_config.py
@@ -82,17 +82,17 @@ class init_state(LeggedRobotCfg.init_state):  # noqa: N801
 
     class sim(LeggedRobotCfg.sim):  # noqa: N801
         dt = 0.005
-        substeps = 1
+        substeps = 2
 
         class physx(LeggedRobotCfg.sim.physx):  # noqa: N801
             num_threads = 10
             solver_type = 1
             num_position_iterations = 4
-            num_velocity_iterations = 0
-            contact_offset = 0.02
-            rest_offset = 0.01
-            bounce_threshold_velocity = 1.0
-            max_depenetration_velocity = 0.5
+            num_velocity_iterations = 1
+            contact_offset = 0.01
+            rest_offset = -0.02
+            bounce_threshold_velocity = 0.5
+            max_depenetration_velocity = 1.0
             max_gpu_contact_pairs = 2**24
             default_buffer_size_multiplier = 5
             contact_collection = 2  # gymapi.CC_ALL_SUBSTEPS
@@ -113,7 +113,7 @@ class commands(LeggedRobotCfg.commands):  # noqa: N801
         resampling_time = 8.0
 
     class rewards:  # noqa: N801
-        base_height_target = 0.89
+        base_height_target = 1.1
         min_dist = 0.2
         max_dist = 0.5
 
@@ -127,7 +127,7 @@ class rewards:  # noqa: N801
 
         # Max contact force should be a bit above the weight of the robot. In
         # our case the robot weighs 62 Kg, so we set it to 700.
-        max_contact_force = 700
+        # max_contact_force = 700
 
         class scales:  # noqa: N801
             # Reference motion tracking
@@ -137,12 +137,12 @@ class scales:  # noqa: N801
 
             # Gait
             # feet_air_time = 1.0
-            foot_slip = -0.05
+            # foot_slip = -0.05
             # feet_distance = 0.2
             # knee_distance = 0.2
 
             # Contact
-            feet_contact_forces = -0.01
+            # feet_contact_forces = -0.01
 
             # Velocity tracking
             # tracking_lin_vel = 1.2
@@ -152,17 +152,14 @@ class scales:  # noqa: N801
             # track_vel_hard = 0.5
 
             # Base position
-            default_joint_pos = 0.5
-            orientation = 1.0
-            base_height = 0.2
-            base_acc = 0.2
+            base_height = 1.0
+            base_acc = 0.1
 
             # Energy
-            action_smoothness = -0.002
             torques = -1e-5
             dof_vel = -5e-4
             dof_acc = -1e-7
-            collision = -1.0
+            collision = -1e-2
 
     class normalization:  # noqa: N801
         class obs_scales:  # noqa: N801

diff --git a/sim/humanoid_gym/envs/humanoid_env.py b/sim/humanoid_gym/envs/humanoid_env.py
@@ -219,81 +219,19 @@ def reset_idx(self, env_ids: Tensor) -> None:
         for i in range(self.critic_history.maxlen):
             self.critic_history[i][env_ids] *= 0
 
-    def _reward_foot_slip(self) -> Tensor:
-        """Calculates the reward for minimizing foot slip.
-
-        The reward is based on the contact forces and the speed of the feet.
-        A contact threshold is used to determine if the foot is in contact with
-        the ground. The speed of the foot is calculated and scaled by the
-        contact condition.
-
-        Returns:
-            The reward for minimizing foot slip.
-        """
-        contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
-        rew = torch.sqrt(foot_speed_norm)
-        rew *= contact
-        return torch.sum(rew, dim=1)
-
-    def _reward_feet_contact_forces(self) -> Tensor:
-        """Calculates the reward for keeping contact forces within a range.
-
-        Penalizes high contact forces on the feet.
-
-        Returns:
-            The reward for keeping contact forces within a specified range.
-        """
-        return torch.sum(
-            (
-                torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force
-            ).clip(0, 400),
-            dim=1,
-        )
-
-    def _reward_default_joint_pos(self) -> Tensor:
-        """Calculates the reward for matching the target position.
-
-        Calculates the reward for keeping joint positions close to default
-        positions, with a focus on penalizing deviation in yaw and roll
-        directions. Excludes yaw and roll from the main penalty.
-
-        Returns:
-            The reward for matching the target joint position.
-        """
-        joint_diff = self.dof_pos - self.default_joint_pd_target
-        left_yaw_roll = joint_diff[:, :2]
-        right_yaw_roll = joint_diff[:, 6:8]
-        yaw_roll = torch.norm(left_yaw_roll, dim=1) + torch.norm(right_yaw_roll, dim=1)
-        yaw_roll = torch.clamp(yaw_roll - 0.1, 0, 50)
-        return torch.exp(-yaw_roll * 100) - 0.01 * torch.norm(joint_diff, dim=1)
-
-    def _reward_orientation(self) -> Tensor:
-        """Calculates the reward for maintaining a flat base orientation.
-
-        It penalizes deviation from the desired base orientation using the
-        base euler angles and the projected gravity vector.
-
-        Returns:
-            The reward for maintaining a flat base orientation.
-        """
-        quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10)
-        orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20)
-        return (quat_mismatch + orientation) / 2.0
-
     def _reward_base_height(self) -> Tensor:
         """Calculates the reward based on the robot's base height.
 
-        Penalizes deviation from a target base height. The reward is computed
-        based on the height difference between the robot's base and the average
-        height of its feet when they are in contact with the ground.
+        This rewards the robot for being close to the target height without
+        going over (we actually penalise it for going over).
 
         Returns:
-            The reward for maintaining the robot's base height.
+            The reward for maximizing the base height.
         """
-        measured_heights = torch.sum(self.rigid_state[:, self.feet_indices, 2], dim=1)
-        base_height = self.root_states[:, 2] - (measured_heights - 0.05)
-        return torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100)
+        base_height = self.root_states[:, 2]
+        reward = base_height / self.cfg.rewards.base_height_target
+        reward[reward > 1.0] = 0.0
+        return reward
 
     def _reward_base_acc(self) -> Tensor:
         """Computes the reward based on the base's acceleration.
@@ -354,16 +292,3 @@ def _reward_collision(self) -> Tensor:
         return torch.sum(
             1.0 * (torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1), dim=1
         )
-
-    def _reward_action_smoothness(self) -> Tensor:
-        """Penalizes large differences between consecutive actions.
-
-        This is important for achieving fluid motion and reducing mechanical stress.
-
-        Returns:
-            The reward for minimizing differences between consecutive actions.
-        """
-        term_1 = torch.sum(torch.square(self.last_actions - self.actions), dim=1)
-        term_2 = torch.sum(torch.square(self.actions + self.last_last_actions - 2 * self.last_actions), dim=1)
-        term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1)
-        return term_1 + term_2 + term_3
diff --git a/sim/humanoid_gym/play.py b/sim/humanoid_gym/play.py
@@ -0,0 +1,136 @@
+"""Script to replay a trained policy in the environment."""
+
+import argparse
+import os
+from datetime import datetime
+
+import cv2
+import numpy as np
+from isaacgym import gymapi
+from tqdm import tqdm
+
+from sim.env import run_dir
+from sim.humanoid_gym.envs import *  # noqa: F403
+
+
+def play(args: argparse.Namespace) -> None:
+    env_cfg, train_cfg = task_registry.get_cfgs(name=args.task)
+    # override some parameters for testing
+    env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1)
+    env_cfg.sim.max_gpu_contact_pairs = 2**10
+    # env_cfg.terrain.mesh_type = 'trimesh'
+    env_cfg.terrain.mesh_type = "plane"
+    env_cfg.terrain.num_rows = 5
+    env_cfg.terrain.num_cols = 5
+    env_cfg.terrain.curriculum = False
+    env_cfg.terrain.max_init_terrain_level = 5
+    env_cfg.noise.add_noise = True
+    env_cfg.domain_rand.push_robots = False
+    env_cfg.domain_rand.joint_angle_noise = 0.0
+    env_cfg.noise.curriculum = False
+    env_cfg.noise.noise_level = 0.5
+
+    train_cfg.seed = 123145
+    print("train_cfg.runner_class_name:", train_cfg.runner_class_name)
+
+    # prepare environment
+    env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg)
+    env.set_camera(env_cfg.viewer.pos, env_cfg.viewer.lookat)
+
+    obs = env.get_observations()
+
+    # load policy
+    train_cfg.runner.resume = True
+    ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg)
+    policy = ppo_runner.get_inference_policy(device=env.device)
+
+    logger = Logger(env.dt)
+    robot_index = 0  # which robot is used for logging
+    joint_index = 1  # which joint is used for logging
+    stop_state_log = 1200  # number of steps before plotting states
+    if RENDER:
+        camera_properties = gymapi.CameraProperties()
+        camera_properties.width = 1920
+        camera_properties.height = 1080
+        h1 = env.gym.create_camera_sensor(env.envs[0], camera_properties)
+        camera_offset = gymapi.Vec3(1, -1, 0.5)
+        camera_rotation = gymapi.Quat.from_axis_angle(gymapi.Vec3(-0.3, 0.2, 1), np.deg2rad(135))
+        actor_handle = env.gym.get_actor_handle(env.envs[0], 0)
+        body_handle = env.gym.get_actor_rigid_body_handle(env.envs[0], actor_handle, 0)
+        env.gym.attach_camera_to_body(
+            h1, env.envs[0], body_handle, gymapi.Transform(camera_offset, camera_rotation), gymapi.FOLLOW_POSITION
+        )
+
+        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+
+        # Creates a directory to store videos.
+        video_dir = run_dir() / "humanoid" / "videos"
+        experiment_dir = video_dir / train_cfg.runner.experiment_name
+        experiment_dir.mkdir(parents=True, exist_ok=True)
+
+        dir = os.path.join(experiment_dir, datetime.now().strftime("%b%d_%H-%M-%S") + args.run_name + ".mp4")
+        if not os.path.exists(video_dir):
+            os.mkdir(video_dir)
+        if not os.path.exists(experiment_dir):
+            os.mkdir(experiment_dir)
+        video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080))
+
+    for _ in tqdm(range(stop_state_log)):
+
+        actions = policy(obs.detach())  # * 0.
+
+        if FIX_COMMAND:
+            env.commands[:, 0] = 0.5  # 1.0
+            env.commands[:, 1] = 0.0
+            env.commands[:, 2] = 0.0
+            env.commands[:, 3] = 0.0
+
+        obs, critic_obs, rews, dones, infos = env.step(actions.detach())
+
+        if RENDER:
+            env.gym.fetch_results(env.sim, True)
+            env.gym.step_graphics(env.sim)
+            env.gym.render_all_camera_sensors(env.sim)
+            img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR)
+            img = np.reshape(img, (1080, 1920, 4))
+            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+            video.write(img[..., :3])
+
+        logger.log_states(
+            {
+                "dof_pos_target": actions[robot_index, joint_index].item() * env.cfg.control.action_scale,
+                "dof_pos": env.dof_pos[robot_index, joint_index].item(),
+                "dof_vel": env.dof_vel[robot_index, joint_index].item(),
+                "dof_torque": env.torques[robot_index, joint_index].item(),
+                "command_x": env.commands[robot_index, 0].item(),
+                "command_y": env.commands[robot_index, 1].item(),
+                "command_yaw": env.commands[robot_index, 2].item(),
+                "base_vel_x": env.base_lin_vel[robot_index, 0].item(),
+                "base_vel_y": env.base_lin_vel[robot_index, 1].item(),
+                "base_vel_z": env.base_lin_vel[robot_index, 2].item(),
+                "base_vel_yaw": env.base_ang_vel[robot_index, 2].item(),
+                "contact_forces_z": env.contact_forces[robot_index, env.feet_indices, 2].cpu().numpy(),
+            }
+        )
+        # ====================== Log states ======================
+        if infos["episode"]:
+            num_episodes = env.reset_buf.sum().item()
+            if num_episodes > 0:
+                logger.log_rewards(infos["episode"], num_episodes)
+
+    logger.print_rewards()
+    logger.plot_states()
+
+    if RENDER:
+        video.release()
+
+
+# Puts this import down here so that the environments are registered
+# before we try to use them.
+from humanoid.utils import Logger, get_args, task_registry  # noqa: E402
+
+if __name__ == "__main__":
+    RENDER = True
+    FIX_COMMAND = True
+
+    play(get_args())