-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* e2e setup works, inertia and forces are off * clean up setup * fix some renaming * deploy format * push updates * add toml
- Loading branch information
Paweł Budzianowski
authored
May 17, 2024
1 parent
963f094
commit b584d8d
Showing
10 changed files
with
627 additions
and
544 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
"""This module contains the configuration dataclasses for the robot.""" | ||
|
||
from dataclasses import dataclass, field | ||
|
||
import numpy as np | ||
|
||
|
||
# Default velocity command | ||
@dataclass | ||
class cmd: | ||
vx: float = 0.4 | ||
vy: float = 0.0 | ||
dyaw: float = 0.0 | ||
|
||
|
||
@dataclass | ||
class Normalization: | ||
"""Normalization constants for observations and actions.""" | ||
|
||
@dataclass | ||
class obs_scales: | ||
lin_vel: float = 2.0 | ||
ang_vel: float = 1.0 | ||
dof_pos: float = 1.0 | ||
dof_vel: float = 0.05 | ||
quat: float = 1.0 | ||
height_measurements: float = 5.0 | ||
|
||
clip_observations: float = 18.0 | ||
clip_actions: float = 18.0 | ||
|
||
|
||
@dataclass | ||
class RobotConfig: | ||
"""This dataclass contains various parameters and settings for configuring the robot. | ||
Attributes: | ||
dof: The number of degrees of freedom of the robot. | ||
num_actions: The number of actions the robot can perform. | ||
kps: The proportional gains for the PD controller. | ||
kds: The derivative gains for the PD controller. | ||
tau_limit: The torque limits for the robot's joints. | ||
robot_model_path: The path to the robot model file. | ||
dt: The simulation time step. | ||
phase_duration: The duration of each phase in the robot's gait. | ||
duration: The total duration of the simulation. | ||
decimation: The decimation factor for updating the policy. | ||
frame_stack: The number of frames to stack for observation. | ||
num_single_obs: The number of single observations. | ||
action_scale: The scaling factor for actions. | ||
num_observations: The total number of observations (frame_stack * num_single_obs). | ||
normalization: The normalization constants for observations and actions. | ||
""" | ||
|
||
dof: int | ||
kps: np.ndarray | ||
kds: np.ndarray | ||
tau_limit: np.ndarray | ||
robot_model_path: str | ||
dt: float = 0.001 | ||
phase_duration: float = 0.64 | ||
duration: float = 10.0 | ||
decimation: int = 10 | ||
frame_stack: int = 15 | ||
num_single_obs: int = 47 | ||
action_scale: float = 0.25 | ||
|
||
normalization: Normalization = field(default_factory=Normalization) | ||
|
||
@property | ||
def num_actions(self) -> int: | ||
return self.dof | ||
|
||
@property | ||
def num_observations(self) -> int: | ||
return self.frame_stack * self.num_single_obs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
"""Policy module.""" | ||
|
||
import math | ||
from abc import ABC, abstractmethod | ||
from collections import deque | ||
from pathlib import Path | ||
from typing import Union | ||
|
||
import numpy as np | ||
import torch | ||
import utils | ||
|
||
from sim.deploy.config import RobotConfig, cmd | ||
|
||
|
||
class Policy(ABC): | ||
"""Abstract base class for policies. | ||
Attributes: | ||
cmd: Command parameters. | ||
hist_obs: History of observations. | ||
model: Loaded PyTorch model. | ||
action: Current action. | ||
""" | ||
|
||
cmd: cmd | ||
|
||
def __init__( | ||
self, | ||
model_path: Union[str, Path], | ||
obs_size: int, | ||
frame_stack: int = 3, | ||
num_actions: int = 12, | ||
) -> None: | ||
self.hist_obs = deque() | ||
for _ in range(frame_stack): | ||
self.hist_obs.append(np.zeros([1, obs_size], dtype=np.double)) | ||
self.model = torch.jit.load(model_path) | ||
self.action = np.zeros((num_actions), dtype=np.double) | ||
|
||
@abstractmethod | ||
def next_action(self, obs: np.ndarray) -> np.ndarray: | ||
"""Computes the next action based on the observation. | ||
Args: | ||
obs: Observation. | ||
Returns: | ||
Next action. | ||
""" | ||
pass | ||
|
||
@abstractmethod | ||
def pd_control( | ||
self, target_dof_pos: np.ndarray, dof_pos: np.ndarray, kp: float, dof_vel: np.ndarray, kd: float | ||
) -> np.ndarray: | ||
"""Calculates torques from position commands using PD control. | ||
Args: | ||
target_dof_pos: Target generalized positions. | ||
dof_pos: Current generalized positions. | ||
kp: Proportional gain. | ||
dof_vel: Current generalized velocities. | ||
kd: Derivative gain. | ||
Returns: | ||
Calculated torques. | ||
""" | ||
pass | ||
|
||
|
||
class SimPolicy(Policy): | ||
"""Policy for simulation. | ||
Attributes: | ||
dof: Degrees of freedom. | ||
cfg: Robot configuration. | ||
""" | ||
|
||
def __init__(self, model_path: Union[str, Path], cfg: RobotConfig) -> None: | ||
self.cfg = cfg | ||
super().__init__(model_path, cfg.num_single_obs, cfg.frame_stack, cfg.num_actions) | ||
|
||
def pd_control( | ||
self, | ||
target_dof_pos: np.ndarray, | ||
dof_pos: np.ndarray, | ||
kp: float, | ||
dof_vel: np.ndarray, | ||
kd: float, | ||
) -> np.ndarray: | ||
""" | ||
Calculates torques from position commands using PD control. | ||
Args: | ||
target_dof_pos: Target generalized positions. | ||
dof_pos: Current generalized positions. | ||
kp: Proportional gain. | ||
target_dof_vel: Target generalized velocities. | ||
dof_vel: Current generalized velocities. | ||
kd: Derivative gain. | ||
Returns: | ||
Calculated torques. | ||
""" | ||
target_dof_vel = np.zeros(self.cfg.num_actions, dtype=np.double) | ||
return (target_dof_pos - dof_pos) * kp + (target_dof_vel - dof_vel) * kd | ||
|
||
def parse_action( | ||
self, dof_pos: np.ndarray, dof_vel: np.ndarray, eu_ang: np.ndarray, ang_vel: np.ndarray, count_lowlevel: int | ||
) -> np.ndarray: | ||
"""Parses the action from the observation. | ||
Args: | ||
dof_pos: Joint positions. | ||
dof_vel: Joint velocities. | ||
eu_ang: Euler angles. | ||
ang_vel: Angular velocity. | ||
count_lowlevel: Low-level count. | ||
Returns: | ||
Parsed action. | ||
""" | ||
obs = np.zeros([1, self.cfg.num_single_obs], dtype=np.float32) | ||
obs[0, 0] = math.sin(2 * math.pi * count_lowlevel * self.cfg.dt / self.cfg.phase_duration) | ||
obs[0, 1] = math.cos(2 * math.pi * count_lowlevel * self.cfg.dt / self.cfg.phase_duration) | ||
obs[0, 2] = cmd.vx * self.cfg.normalization.obs_scales.lin_vel | ||
obs[0, 3] = cmd.vy * self.cfg.normalization.obs_scales.lin_vel | ||
obs[0, 4] = cmd.dyaw * self.cfg.normalization.obs_scales.ang_vel | ||
obs[0, 5 : (self.cfg.num_actions + 5)] = dof_pos * self.cfg.normalization.obs_scales.dof_pos | ||
obs[0, (self.cfg.num_actions + 5) : (2 * self.cfg.num_actions + 5)] = ( | ||
dof_vel * self.cfg.normalization.obs_scales.dof_vel | ||
) | ||
obs[0, (2 * self.cfg.num_actions + 5) : (3 * self.cfg.num_actions + 5)] = self.action | ||
obs[0, (3 * self.cfg.num_actions + 5) : (3 * self.cfg.num_actions + 5) + 3] = ang_vel | ||
obs[0, (3 * self.cfg.num_actions + 5) + 3 : (3 * self.cfg.num_actions + 5) + 2 * 3] = eu_ang | ||
obs = np.clip(obs, -self.cfg.normalization.clip_observations, self.cfg.normalization.clip_observations) | ||
return obs | ||
|
||
def next_action( | ||
self, | ||
dof_pos: np.ndarray, | ||
dof_vel: np.ndarray, | ||
orientation: np.ndarray, | ||
ang_vel: np.ndarray, | ||
count_lowlevel: int, | ||
) -> np.ndarray: | ||
"""Computes the next action based on the observation. | ||
Args: | ||
dof_pos: Joint positions. | ||
dof_vel: Joint velocities. | ||
orientation: Orientation quaternion. | ||
ang_vel: Angular velocity. | ||
count_lowlevel: Low-level count. | ||
Returns: | ||
Next action. | ||
""" | ||
eu_ang = utils.quaternion_to_euler_array(orientation) | ||
eu_ang[eu_ang > math.pi] -= 2 * math.pi | ||
|
||
obs = self.parse_action(dof_pos, dof_vel, eu_ang, ang_vel, count_lowlevel) | ||
self.hist_obs.append(obs) | ||
self.hist_obs.popleft() | ||
policy_input = np.zeros([1, self.cfg.num_observations], dtype=np.float32) | ||
|
||
for i in range(self.cfg.frame_stack): | ||
policy_input[0, i * self.cfg.num_single_obs : (i + 1) * self.cfg.num_single_obs] = self.hist_obs[i][0, :] | ||
|
||
self.action[:] = self.model(torch.tensor(policy_input))[0].detach().numpy() | ||
|
||
self.action = np.clip(self.action, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions) | ||
return self.action | ||
|
||
|
||
class RealPolicy(Policy): | ||
"""Policy for real-world deployment.""" | ||
|
||
pass |
Oops, something went wrong.