Skip to content

Commit

Permalink
add mujoco evaluation (#12)
Browse files Browse the repository at this point in the history
* e2e setup works, inertia and forces are off

* clean up setup

* fix some renaming

* deploy format

* push updates

* add toml
  • Loading branch information
Paweł Budzianowski authored May 17, 2024
1 parent 963f094 commit b584d8d
Show file tree
Hide file tree
Showing 10 changed files with 627 additions and 544 deletions.
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ warn_redundant_casts = true
incremental = true
namespace_packages = false

exclude = "sim/humanoid_gym/"
exclude = ["sim/humanoid_gym/", "sim/deploy", "sim/scripts/create_mjcf.py"]



[[tool.mypy.overrides]]

Expand All @@ -52,7 +54,7 @@ profile = "black"
line-length = 120
target-version = "py310"

exclude = ["sim/humanoid_gym"]
exclude = ["sim/humanoid_gym", "sim/deploy/"]

[tool.ruff.lint]

Expand All @@ -66,6 +68,7 @@ ignore = [
"PLW0603", "PLW2901",
]


dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

[tool.ruff.lint.per-file-ignores]
Expand Down
76 changes: 76 additions & 0 deletions sim/deploy/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""This module contains the configuration dataclasses for the robot."""

from dataclasses import dataclass, field

import numpy as np


# Default velocity command
@dataclass
class cmd:
vx: float = 0.4
vy: float = 0.0
dyaw: float = 0.0


@dataclass
class Normalization:
"""Normalization constants for observations and actions."""

@dataclass
class obs_scales:
lin_vel: float = 2.0
ang_vel: float = 1.0
dof_pos: float = 1.0
dof_vel: float = 0.05
quat: float = 1.0
height_measurements: float = 5.0

clip_observations: float = 18.0
clip_actions: float = 18.0


@dataclass
class RobotConfig:
"""This dataclass contains various parameters and settings for configuring the robot.
Attributes:
dof: The number of degrees of freedom of the robot.
num_actions: The number of actions the robot can perform.
kps: The proportional gains for the PD controller.
kds: The derivative gains for the PD controller.
tau_limit: The torque limits for the robot's joints.
robot_model_path: The path to the robot model file.
dt: The simulation time step.
phase_duration: The duration of each phase in the robot's gait.
duration: The total duration of the simulation.
decimation: The decimation factor for updating the policy.
frame_stack: The number of frames to stack for observation.
num_single_obs: The number of single observations.
action_scale: The scaling factor for actions.
num_observations: The total number of observations (frame_stack * num_single_obs).
normalization: The normalization constants for observations and actions.
"""

dof: int
kps: np.ndarray
kds: np.ndarray
tau_limit: np.ndarray
robot_model_path: str
dt: float = 0.001
phase_duration: float = 0.64
duration: float = 10.0
decimation: int = 10
frame_stack: int = 15
num_single_obs: int = 47
action_scale: float = 0.25

normalization: Normalization = field(default_factory=Normalization)

@property
def num_actions(self) -> int:
return self.dof

@property
def num_observations(self) -> int:
return self.frame_stack * self.num_single_obs
179 changes: 179 additions & 0 deletions sim/deploy/policy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
"""Policy module."""

import math
from abc import ABC, abstractmethod
from collections import deque
from pathlib import Path
from typing import Union

import numpy as np
import torch
import utils

from sim.deploy.config import RobotConfig, cmd


class Policy(ABC):
"""Abstract base class for policies.
Attributes:
cmd: Command parameters.
hist_obs: History of observations.
model: Loaded PyTorch model.
action: Current action.
"""

cmd: cmd

def __init__(
self,
model_path: Union[str, Path],
obs_size: int,
frame_stack: int = 3,
num_actions: int = 12,
) -> None:
self.hist_obs = deque()
for _ in range(frame_stack):
self.hist_obs.append(np.zeros([1, obs_size], dtype=np.double))
self.model = torch.jit.load(model_path)
self.action = np.zeros((num_actions), dtype=np.double)

@abstractmethod
def next_action(self, obs: np.ndarray) -> np.ndarray:
"""Computes the next action based on the observation.
Args:
obs: Observation.
Returns:
Next action.
"""
pass

@abstractmethod
def pd_control(
self, target_dof_pos: np.ndarray, dof_pos: np.ndarray, kp: float, dof_vel: np.ndarray, kd: float
) -> np.ndarray:
"""Calculates torques from position commands using PD control.
Args:
target_dof_pos: Target generalized positions.
dof_pos: Current generalized positions.
kp: Proportional gain.
dof_vel: Current generalized velocities.
kd: Derivative gain.
Returns:
Calculated torques.
"""
pass


class SimPolicy(Policy):
"""Policy for simulation.
Attributes:
dof: Degrees of freedom.
cfg: Robot configuration.
"""

def __init__(self, model_path: Union[str, Path], cfg: RobotConfig) -> None:
self.cfg = cfg
super().__init__(model_path, cfg.num_single_obs, cfg.frame_stack, cfg.num_actions)

def pd_control(
self,
target_dof_pos: np.ndarray,
dof_pos: np.ndarray,
kp: float,
dof_vel: np.ndarray,
kd: float,
) -> np.ndarray:
"""
Calculates torques from position commands using PD control.
Args:
target_dof_pos: Target generalized positions.
dof_pos: Current generalized positions.
kp: Proportional gain.
target_dof_vel: Target generalized velocities.
dof_vel: Current generalized velocities.
kd: Derivative gain.
Returns:
Calculated torques.
"""
target_dof_vel = np.zeros(self.cfg.num_actions, dtype=np.double)
return (target_dof_pos - dof_pos) * kp + (target_dof_vel - dof_vel) * kd

def parse_action(
self, dof_pos: np.ndarray, dof_vel: np.ndarray, eu_ang: np.ndarray, ang_vel: np.ndarray, count_lowlevel: int
) -> np.ndarray:
"""Parses the action from the observation.
Args:
dof_pos: Joint positions.
dof_vel: Joint velocities.
eu_ang: Euler angles.
ang_vel: Angular velocity.
count_lowlevel: Low-level count.
Returns:
Parsed action.
"""
obs = np.zeros([1, self.cfg.num_single_obs], dtype=np.float32)
obs[0, 0] = math.sin(2 * math.pi * count_lowlevel * self.cfg.dt / self.cfg.phase_duration)
obs[0, 1] = math.cos(2 * math.pi * count_lowlevel * self.cfg.dt / self.cfg.phase_duration)
obs[0, 2] = cmd.vx * self.cfg.normalization.obs_scales.lin_vel
obs[0, 3] = cmd.vy * self.cfg.normalization.obs_scales.lin_vel
obs[0, 4] = cmd.dyaw * self.cfg.normalization.obs_scales.ang_vel
obs[0, 5 : (self.cfg.num_actions + 5)] = dof_pos * self.cfg.normalization.obs_scales.dof_pos
obs[0, (self.cfg.num_actions + 5) : (2 * self.cfg.num_actions + 5)] = (
dof_vel * self.cfg.normalization.obs_scales.dof_vel
)
obs[0, (2 * self.cfg.num_actions + 5) : (3 * self.cfg.num_actions + 5)] = self.action
obs[0, (3 * self.cfg.num_actions + 5) : (3 * self.cfg.num_actions + 5) + 3] = ang_vel
obs[0, (3 * self.cfg.num_actions + 5) + 3 : (3 * self.cfg.num_actions + 5) + 2 * 3] = eu_ang
obs = np.clip(obs, -self.cfg.normalization.clip_observations, self.cfg.normalization.clip_observations)
return obs

def next_action(
self,
dof_pos: np.ndarray,
dof_vel: np.ndarray,
orientation: np.ndarray,
ang_vel: np.ndarray,
count_lowlevel: int,
) -> np.ndarray:
"""Computes the next action based on the observation.
Args:
dof_pos: Joint positions.
dof_vel: Joint velocities.
orientation: Orientation quaternion.
ang_vel: Angular velocity.
count_lowlevel: Low-level count.
Returns:
Next action.
"""
eu_ang = utils.quaternion_to_euler_array(orientation)
eu_ang[eu_ang > math.pi] -= 2 * math.pi

obs = self.parse_action(dof_pos, dof_vel, eu_ang, ang_vel, count_lowlevel)
self.hist_obs.append(obs)
self.hist_obs.popleft()
policy_input = np.zeros([1, self.cfg.num_observations], dtype=np.float32)

for i in range(self.cfg.frame_stack):
policy_input[0, i * self.cfg.num_single_obs : (i + 1) * self.cfg.num_single_obs] = self.hist_obs[i][0, :]

self.action[:] = self.model(torch.tensor(policy_input))[0].detach().numpy()

self.action = np.clip(self.action, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions)
return self.action


class RealPolicy(Policy):
"""Policy for real-world deployment."""

pass
Loading

0 comments on commit b584d8d

Please sign in to comment.