Skip to content

Commit

Permalink
clean up setup
Browse files Browse the repository at this point in the history
  • Loading branch information
budzianowski committed May 17, 2024
1 parent 0459879 commit a92391e
Show file tree
Hide file tree
Showing 6 changed files with 202 additions and 66 deletions.
3 changes: 3 additions & 0 deletions sim/deploy/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""This module contains the configuration dataclasses for the robot."""

import numpy as np
from dataclasses import dataclass, field


# Default velocity command
@dataclass
class cmd:
Expand Down Expand Up @@ -48,6 +50,7 @@ class RobotConfig:
num_observations: The total number of observations (frame_stack * num_single_obs).
normalization: The normalization constants for observations and actions.
"""

dof: int
kps: np.ndarray
kds: np.ndarray
Expand Down
9 changes: 6 additions & 3 deletions sim/deploy/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class Policy(ABC):
model: Loaded PyTorch model.
action: Current action.
"""

cmd: cmd

def __init__(
Expand Down Expand Up @@ -51,7 +52,9 @@ def next_action(self, obs: np.ndarray) -> np.ndarray:
pass

@abstractmethod
def pd_control(self, target_dof_pos: np.ndarray, dof_pos: np.ndarray, kp: float, dof_vel: np.ndarray, kd: float) -> np.ndarray:
def pd_control(
self, target_dof_pos: np.ndarray, dof_pos: np.ndarray, kp: float, dof_vel: np.ndarray, kd: float
) -> np.ndarray:
"""Calculates torques from position commands using PD control.
Args:
Expand Down Expand Up @@ -104,7 +107,6 @@ def pd_control(
target_dof_vel = np.zeros(self.cfg.num_actions, dtype=np.double)
return (target_dof_pos - dof_pos) * kp + (target_dof_vel - dof_vel) * kd


def parse_action(
self, dof_pos: np.ndarray, dof_vel: np.ndarray, eu_ang: np.ndarray, ang_vel: np.ndarray, count_lowlevel: int
) -> np.ndarray:
Expand Down Expand Up @@ -174,4 +176,5 @@ def next_action(

class RealPolicy(Policy):
"""Policy for real-world deployment."""
pass

pass
177 changes: 136 additions & 41 deletions sim/deploy/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Run example:
mjpython sim/deploy/run.py --load_model sim/deploy/tests/walking_policy.pt --world MUJOCO
"""

import argparse
from abc import ABC, abstractmethod
from enum import Enum
Expand All @@ -13,11 +14,12 @@
import numpy as np
from tqdm import tqdm
from sim.env import stompy_mjcf_path
from sim.stompy.joints import StompyFixed

# from humanoid.envs import *
# from humanoid.utils import Logger, task_registry
# from isaacgym.torch_utils import *
# from isaacgym import gymapi
from humanoid.envs import *
from humanoid.utils import task_registry
from isaacgym.torch_utils import *
from isaacgym import gymapi

from policy import SimPolicy

Expand Down Expand Up @@ -96,15 +98,15 @@ def get_observation(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarra
- ang_vel: The angular velocity of the robot.
"""
# test this
dof_pos = self.data.qpos.astype(np.double) # dofx1
dof_vel = self.data.qvel.astype(np.double) # dofx1
dof_pos = self.data.qpos.astype(np.double) # dofx1
dof_vel = self.data.qvel.astype(np.double) # dofx1
# change to quaternion with w at the end
orientation = self.data.sensor("orientation").data[[1, 2, 3, 0]].astype(np.double) # 4x1
ang_vel = self.data.sensor("angular-velocity").data.astype(np.double) # 3x1
orientation = self.data.sensor("orientation").data[[1, 2, 3, 0]].astype(np.double) # 4x1
ang_vel = self.data.sensor("angular-velocity").data.astype(np.double) # 3x1

# Extract only dof
dof_pos = dof_pos[-self.cfg.num_actions:]
dof_vel = dof_vel[-self.cfg.num_actions:]
dof_pos = dof_pos[-self.cfg.num_actions :]
dof_vel = dof_vel[-self.cfg.num_actions :]

return dof_pos, dof_vel, orientation, ang_vel

Expand All @@ -113,21 +115,133 @@ def simulate(self, policy: SimPolicy) -> None:
for step in tqdm(range(int(cfg.duration / cfg.dt)), desc="Simulating..."):
# Get the world state
dof_pos, dof_vel, orientation, ang_vel = self.get_observation()

# We update the policy at a lower frequency
# The simulation runs at 1000hz, but the policy is updated at 100hz
if step % cfg.decimation == 0:
action = policy.next_action(dof_pos, dof_vel, orientation, ang_vel, step)
target_dof_pos = action * cfg.action_scale

tau = policy.pd_control(target_dof_pos, dof_pos, cfg.kps, dof_vel, cfg.kds)

self.step(tau=tau)
viewer.sync()

viewer.close()


class IsaacWorld(World):
"""Simulated world using Isaac.
Attributes:
cfg: The robot configuration.
"""

def __init__(self, cfg: RobotConfig):
# Arguments are overwritten
delattr(args, "world")
delattr(args, "load_model")

# adapt
args.task = "legs_ppo"
args.device = "cpu"
args.physics_engine = gymapi.SIM_PHYSX
args.num_envs = 1
args.subscenes = 0
args.use_gpu = False
args.use_gpu_pipeline = False
args.num_threads = 1
args.sim_device = "cpu"
args.headless = False
args.seed = None
args.resume = False
args.max_iterations = None
args.experiment_name = None
args.run_name = "v1"
args.load_run = None
args.checkpoint = None # "300"
args.rl_device = "cpu"

args.load_run = "Apr04_17-35-43_height_right3"
args.run_name = "height_right3"

env_cfg, train_cfg = task_registry.get_cfgs(name=args.task)
# override some parameters for testing
env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1)
env_cfg.sim.max_gpu_contact_pairs = 2**10
# env_cfg.terrain.mesh_type = 'trimesh'
env_cfg.terrain.mesh_type = "plane"
env_cfg.terrain.num_rows = 5
env_cfg.terrain.num_cols = 5
env_cfg.terrain.curriculum = False
env_cfg.terrain.max_init_terrain_level = 5
env_cfg.noise.add_noise = True # True
env_cfg.domain_rand.push_robots = False
env_cfg.domain_rand.joint_angle_noise = 0.0
env_cfg.noise.curriculum = False
env_cfg.noise.noise_level = 0.5

train_cfg.seed = 123145
print("train_cfg.runner_class_name:", train_cfg.runner_class_name)

env, _ = task_registry.make_env(name=args.task, env_cfg=env_cfg, args=args)
env.set_camera(env_cfg.viewer.pos, env_cfg.viewer.lookat)
self.env = env

# load policy
train_cfg.runner.resume = True
ppo_runner, train_cfg = task_registry.make_alg_runner(
env=env,
name=args.task,
train_cfg=train_cfg,
args=args,
)
# Load it from the jit
self.policy = ppo_runner.get_inference_policy(device=args.device)

# Load it up
env.commands[:, 0] = 0.0 # y axis + down - up
env.commands[:, 1] = 0.5 # x axis - left + right
env.commands[:, 2] = 0.0
env.commands[:, 3] = 0.0

def step(
self,
target_dof_pos: np.ndarray,
dof_pos: np.ndarray,
dof_vel: np.ndarray,
target_dof_vel: np.ndarray = None,
) -> None:
"""Performs a simulation step in the MuJoCo world.
Args:
target_dof_pos: The target joint positions.
q: The current joint positions.
dq: The current joint velocities.
target_dq: The target joint velocities (optional).
"""
if target_dof_vel is None:
target_dof_vel = np.zeros((self.cfg.num_actions), dtype=np.double)

# Generate PD control
tau = self.pd_control(
target_dof_pos, dof_pos, self.cfg.kps, target_dof_vel, dof_vel, self.cfg.kds
) # Calc torques
tau = np.clip(tau, -self.cfg.tau_limit, self.cfg.tau_limit) # Clamp torques
self.data.ctrl = tau
mujoco.mj_step(self.model, self.data)
self.viewer.render()

def get_observation(self, actions) -> np.ndarray:
obs, _, _, _, _ = self.env.step(actions.detach())
return obs

def simulate(self, policy: SimPolicy) -> None:
for step in tqdm(range(int(cfg.duration / cfg.dt)), desc="Simulating..."):
actions = policy(obs.detach()) # * 0.
obs = self.get_observation(actions)


def main(args: argparse.Namespace, cfg: RobotConfig) -> None:
"""
Run the policy simulation using the provided policy and configuration.
Expand All @@ -153,34 +267,15 @@ def main(args: argparse.Namespace, cfg: RobotConfig) -> None:
parser.add_argument("--world", type=str, default="MUJOCO", help="Type of deployment.")
args = parser.parse_args()

if "xbot" in args.load_model:
robot_path = f"/Users/pfb30/sim/third_party/humanoid-gym/resources/robots/XBot/mjcf/XBot-L.xml"
dof: int = 12
kps: np.ndarray = np.array(
[200, 200, 350, 350, 15, 15, 200, 200, 350, 350, 15, 15], dtype=np.double)
kds: np.ndarray = np.array(
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10], dtype=np.double)
tau_limit: np.ndarray = 200. * np.ones(dof, dtype=np.double)
num_single_obs = dof * 3 + 11
else:
dof=17
robot_path = stompy_mjcf_path()
num_single_obs = dof * 3 + 11

kps = np.ones((dof), dtype=np.double) * 200
kds = np.ones((dof), dtype=np.double) * 10
tau_limit = np.ones((dof), dtype=np.double) * 200

# # TODO - this should be read from the config
# robot_path = f"/Users/pfb30/sim/stompy/robot_new.xml"
# num_single_obs = dof * 3 + 11
# dof =
# kps = np.ones((dof), dtype=np.double) * 200
# kds = np.ones((dof), dtype=np.double) * 10
# tau_limit = np.ones((dof), dtype=np.double) * 200
dof = len(StompyFixed.default_standing())
robot_path = stompy_mjcf_path(legs_only=True)
num_single_obs = dof * 3 + 11

kps = np.ones((dof), dtype=np.double) * 200
kds = np.ones((dof), dtype=np.double) * 10
tau_limit = np.ones((dof), dtype=np.double) * 200

cfg = RobotConfig(
robot_model_path=robot_path,
dof=dof, kps=kps, kds=kds, tau_limit=tau_limit,
num_single_obs=num_single_obs
robot_model_path=robot_path, dof=dof, kps=kps, kds=kds, tau_limit=tau_limit, num_single_obs=num_single_obs
)
main(args, cfg)
35 changes: 35 additions & 0 deletions sim/deploy/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Utility functions for the policy integration module."""

import numpy as np


def quaternion_to_euler_array(quat: np.ndarray) -> np.ndarray:
"""
Converts a quaternion to Euler angles (roll, pitch, yaw).
Args:
quat: Quaternion in the format [x, y, z, w].
Returns:
Euler angles (roll, pitch, yaw) in radians.
"""
# Ensure quaternion is in the correct format [x, y, z, w]
x, y, z, w = quat

# Roll (x-axis rotation)
t0 = +2.0 * (w * x + y * z)
t1 = +1.0 - 2.0 * (x * x + y * y)
roll_x = np.arctan2(t0, t1)

# Pitch (y-axis rotation)
t2 = +2.0 * (w * y - z * x)
t2 = np.clip(t2, -1.0, 1.0)
pitch_y = np.arcsin(t2)

# Yaw (z-axis rotation)
t3 = +2.0 * (w * z + x * y)
t4 = +1.0 - 2.0 * (y * y + z * z)
yaw_z = np.arctan2(t3, t4)

# Returns roll, pitch, yaw in a NumPy array in radians
return np.array([roll_x, pitch_y, yaw_z])
7 changes: 5 additions & 2 deletions sim/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@ def stompy_urdf_path(legs_only: bool = False) -> Path:
return stompy_path.resolve()


def stompy_mjcf_path() -> Path:
stompy_path = model_dir() / "robot.xml"
def stompy_mjcf_path(legs_only: bool = False) -> Path:
if legs_only:
stompy_path = model_dir() / "robot_fixed.xml"
else:
stompy_path = model_dir() / "robot.xml"

if not stompy_path.exists():
raise FileNotFoundError(f"MJCF file not found: {stompy_path}")
Expand Down
Loading

0 comments on commit a92391e

Please sign in to comment.