Skip to content

Commit

Permalink
play script
Browse files Browse the repository at this point in the history
  • Loading branch information
codekansas committed Apr 2, 2024
1 parent 6d7de25 commit fa55377
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 106 deletions.
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,13 @@ train-one:
@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1

train:
@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1024 --headless
@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1024

train-full:
@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 2048 --headless

play:
@python -m sim.humanoid_gym.play --task humanoid_ppo --run_name v1

# ------------------------ #
# Build #
Expand Down
4 changes: 4 additions & 0 deletions sim/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ def model_dir() -> Path:
return Path(os.environ.get("MODEL_DIR", "models"))


def run_dir() -> Path:
return Path(os.environ.get("RUN_DIR", "runs"))


def stompy_urdf_path() -> Path:
stompy_path = model_dir() / "robots" / "stompy" / "robot.urdf"
if not stompy_path.exists():
Expand Down
29 changes: 13 additions & 16 deletions sim/humanoid_gym/envs/humanoid_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,17 @@ class init_state(LeggedRobotCfg.init_state): # noqa: N801

class sim(LeggedRobotCfg.sim): # noqa: N801
dt = 0.005
substeps = 1
substeps = 2

class physx(LeggedRobotCfg.sim.physx): # noqa: N801
num_threads = 10
solver_type = 1
num_position_iterations = 4
num_velocity_iterations = 0
contact_offset = 0.02
rest_offset = 0.01
bounce_threshold_velocity = 1.0
max_depenetration_velocity = 0.5
num_velocity_iterations = 1
contact_offset = 0.01
rest_offset = -0.02
bounce_threshold_velocity = 0.5
max_depenetration_velocity = 1.0
max_gpu_contact_pairs = 2**24
default_buffer_size_multiplier = 5
contact_collection = 2 # gymapi.CC_ALL_SUBSTEPS
Expand All @@ -113,7 +113,7 @@ class commands(LeggedRobotCfg.commands): # noqa: N801
resampling_time = 8.0

class rewards: # noqa: N801
base_height_target = 0.89
base_height_target = 1.1
min_dist = 0.2
max_dist = 0.5

Expand All @@ -127,7 +127,7 @@ class rewards: # noqa: N801

# Max contact force should be a bit above the weight of the robot. In
# our case the robot weighs 62 Kg, so we set it to 700.
max_contact_force = 700
# max_contact_force = 700

class scales: # noqa: N801
# Reference motion tracking
Expand All @@ -137,12 +137,12 @@ class scales: # noqa: N801

# Gait
# feet_air_time = 1.0
foot_slip = -0.05
# foot_slip = -0.05
# feet_distance = 0.2
# knee_distance = 0.2

# Contact
feet_contact_forces = -0.01
# feet_contact_forces = -0.01

# Velocity tracking
# tracking_lin_vel = 1.2
Expand All @@ -152,17 +152,14 @@ class scales: # noqa: N801
# track_vel_hard = 0.5

# Base position
default_joint_pos = 0.5
orientation = 1.0
base_height = 0.2
base_acc = 0.2
base_height = 1.0
base_acc = 0.1

# Energy
action_smoothness = -0.002
torques = -1e-5
dof_vel = -5e-4
dof_acc = -1e-7
collision = -1.0
collision = -1e-2

class normalization: # noqa: N801
class obs_scales: # noqa: N801
Expand Down
89 changes: 7 additions & 82 deletions sim/humanoid_gym/envs/humanoid_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,81 +219,19 @@ def reset_idx(self, env_ids: Tensor) -> None:
for i in range(self.critic_history.maxlen):
self.critic_history[i][env_ids] *= 0

def _reward_foot_slip(self) -> Tensor:
"""Calculates the reward for minimizing foot slip.
The reward is based on the contact forces and the speed of the feet.
A contact threshold is used to determine if the foot is in contact with
the ground. The speed of the foot is calculated and scaled by the
contact condition.
Returns:
The reward for minimizing foot slip.
"""
contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
rew = torch.sqrt(foot_speed_norm)
rew *= contact
return torch.sum(rew, dim=1)

def _reward_feet_contact_forces(self) -> Tensor:
"""Calculates the reward for keeping contact forces within a range.
Penalizes high contact forces on the feet.
Returns:
The reward for keeping contact forces within a specified range.
"""
return torch.sum(
(
torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force
).clip(0, 400),
dim=1,
)

def _reward_default_joint_pos(self) -> Tensor:
"""Calculates the reward for matching the target position.
Calculates the reward for keeping joint positions close to default
positions, with a focus on penalizing deviation in yaw and roll
directions. Excludes yaw and roll from the main penalty.
Returns:
The reward for matching the target joint position.
"""
joint_diff = self.dof_pos - self.default_joint_pd_target
left_yaw_roll = joint_diff[:, :2]
right_yaw_roll = joint_diff[:, 6:8]
yaw_roll = torch.norm(left_yaw_roll, dim=1) + torch.norm(right_yaw_roll, dim=1)
yaw_roll = torch.clamp(yaw_roll - 0.1, 0, 50)
return torch.exp(-yaw_roll * 100) - 0.01 * torch.norm(joint_diff, dim=1)

def _reward_orientation(self) -> Tensor:
"""Calculates the reward for maintaining a flat base orientation.
It penalizes deviation from the desired base orientation using the
base euler angles and the projected gravity vector.
Returns:
The reward for maintaining a flat base orientation.
"""
quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10)
orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20)
return (quat_mismatch + orientation) / 2.0

def _reward_base_height(self) -> Tensor:
"""Calculates the reward based on the robot's base height.
Penalizes deviation from a target base height. The reward is computed
based on the height difference between the robot's base and the average
height of its feet when they are in contact with the ground.
This rewards the robot for being close to the target height without
going over (we actually penalise it for going over).
Returns:
The reward for maintaining the robot's base height.
The reward for maximizing the base height.
"""
measured_heights = torch.sum(self.rigid_state[:, self.feet_indices, 2], dim=1)
base_height = self.root_states[:, 2] - (measured_heights - 0.05)
return torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100)
base_height = self.root_states[:, 2]
reward = base_height / self.cfg.rewards.base_height_target
reward[reward > 1.0] = 0.0
return reward

def _reward_base_acc(self) -> Tensor:
"""Computes the reward based on the base's acceleration.
Expand Down Expand Up @@ -354,16 +292,3 @@ def _reward_collision(self) -> Tensor:
return torch.sum(
1.0 * (torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1), dim=1
)

def _reward_action_smoothness(self) -> Tensor:
"""Penalizes large differences between consecutive actions.
This is important for achieving fluid motion and reducing mechanical stress.
Returns:
The reward for minimizing differences between consecutive actions.
"""
term_1 = torch.sum(torch.square(self.last_actions - self.actions), dim=1)
term_2 = torch.sum(torch.square(self.actions + self.last_last_actions - 2 * self.last_actions), dim=1)
term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1)
return term_1 + term_2 + term_3
136 changes: 136 additions & 0 deletions sim/humanoid_gym/play.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
"""Script to replay a trained policy in the environment."""

import argparse
import os
from datetime import datetime

import cv2
import numpy as np
from isaacgym import gymapi
from tqdm import tqdm

from sim.env import run_dir
from sim.humanoid_gym.envs import * # noqa: F403


def play(args: argparse.Namespace) -> None:
env_cfg, train_cfg = task_registry.get_cfgs(name=args.task)
# override some parameters for testing
env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1)
env_cfg.sim.max_gpu_contact_pairs = 2**10
# env_cfg.terrain.mesh_type = 'trimesh'
env_cfg.terrain.mesh_type = "plane"
env_cfg.terrain.num_rows = 5
env_cfg.terrain.num_cols = 5
env_cfg.terrain.curriculum = False
env_cfg.terrain.max_init_terrain_level = 5
env_cfg.noise.add_noise = True
env_cfg.domain_rand.push_robots = False
env_cfg.domain_rand.joint_angle_noise = 0.0
env_cfg.noise.curriculum = False
env_cfg.noise.noise_level = 0.5

train_cfg.seed = 123145
print("train_cfg.runner_class_name:", train_cfg.runner_class_name)

# prepare environment
env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg)
env.set_camera(env_cfg.viewer.pos, env_cfg.viewer.lookat)

obs = env.get_observations()

# load policy
train_cfg.runner.resume = True
ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg)
policy = ppo_runner.get_inference_policy(device=env.device)

logger = Logger(env.dt)
robot_index = 0 # which robot is used for logging
joint_index = 1 # which joint is used for logging
stop_state_log = 1200 # number of steps before plotting states
if RENDER:
camera_properties = gymapi.CameraProperties()
camera_properties.width = 1920
camera_properties.height = 1080
h1 = env.gym.create_camera_sensor(env.envs[0], camera_properties)
camera_offset = gymapi.Vec3(1, -1, 0.5)
camera_rotation = gymapi.Quat.from_axis_angle(gymapi.Vec3(-0.3, 0.2, 1), np.deg2rad(135))
actor_handle = env.gym.get_actor_handle(env.envs[0], 0)
body_handle = env.gym.get_actor_rigid_body_handle(env.envs[0], actor_handle, 0)
env.gym.attach_camera_to_body(
h1, env.envs[0], body_handle, gymapi.Transform(camera_offset, camera_rotation), gymapi.FOLLOW_POSITION
)

fourcc = cv2.VideoWriter_fourcc(*"mp4v")

# Creates a directory to store videos.
video_dir = run_dir() / "humanoid" / "videos"
experiment_dir = video_dir / train_cfg.runner.experiment_name
experiment_dir.mkdir(parents=True, exist_ok=True)

dir = os.path.join(experiment_dir, datetime.now().strftime("%b%d_%H-%M-%S") + args.run_name + ".mp4")
if not os.path.exists(video_dir):
os.mkdir(video_dir)
if not os.path.exists(experiment_dir):
os.mkdir(experiment_dir)
video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080))

for _ in tqdm(range(stop_state_log)):

actions = policy(obs.detach()) # * 0.

if FIX_COMMAND:
env.commands[:, 0] = 0.5 # 1.0
env.commands[:, 1] = 0.0
env.commands[:, 2] = 0.0
env.commands[:, 3] = 0.0

obs, critic_obs, rews, dones, infos = env.step(actions.detach())

if RENDER:
env.gym.fetch_results(env.sim, True)
env.gym.step_graphics(env.sim)
env.gym.render_all_camera_sensors(env.sim)
img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR)
img = np.reshape(img, (1080, 1920, 4))
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
video.write(img[..., :3])

logger.log_states(
{
"dof_pos_target": actions[robot_index, joint_index].item() * env.cfg.control.action_scale,
"dof_pos": env.dof_pos[robot_index, joint_index].item(),
"dof_vel": env.dof_vel[robot_index, joint_index].item(),
"dof_torque": env.torques[robot_index, joint_index].item(),
"command_x": env.commands[robot_index, 0].item(),
"command_y": env.commands[robot_index, 1].item(),
"command_yaw": env.commands[robot_index, 2].item(),
"base_vel_x": env.base_lin_vel[robot_index, 0].item(),
"base_vel_y": env.base_lin_vel[robot_index, 1].item(),
"base_vel_z": env.base_lin_vel[robot_index, 2].item(),
"base_vel_yaw": env.base_ang_vel[robot_index, 2].item(),
"contact_forces_z": env.contact_forces[robot_index, env.feet_indices, 2].cpu().numpy(),
}
)
# ====================== Log states ======================
if infos["episode"]:
num_episodes = env.reset_buf.sum().item()
if num_episodes > 0:
logger.log_rewards(infos["episode"], num_episodes)

logger.print_rewards()
logger.plot_states()

if RENDER:
video.release()


# Puts this import down here so that the environments are registered
# before we try to use them.
from humanoid.utils import Logger, get_args, task_registry # noqa: E402

if __name__ == "__main__":
RENDER = True
FIX_COMMAND = True

play(get_args())
Loading

0 comments on commit fa55377

Please sign in to comment.