add mujoco evaluation (#12)

* e2e setup works, inertia and forces are off * clean up setup * fix some renaming * deploy format * push updates * add toml
kscalelabs · May 17, 2024 · b584d8d · b584d8d
1 parent 963f094
commit b584d8d
Show file tree

Hide file tree

Showing 10 changed files with 627 additions and 544 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,7 +30,9 @@ warn_redundant_casts = true
 incremental = true
 namespace_packages = false
 
-exclude = "sim/humanoid_gym/"
+exclude = ["sim/humanoid_gym/", "sim/deploy", "sim/scripts/create_mjcf.py"]
+
+
 
 [[tool.mypy.overrides]]
 
@@ -52,7 +54,7 @@ profile = "black"
 line-length = 120
 target-version = "py310"
 
-exclude = ["sim/humanoid_gym"]
+exclude = ["sim/humanoid_gym", "sim/deploy/"]
 
 [tool.ruff.lint]
 
@@ -66,6 +68,7 @@ ignore = [
     "PLW0603", "PLW2901",
 ]
 
+
 dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
 
 [tool.ruff.lint.per-file-ignores]

diff --git a/sim/deploy/config.py b/sim/deploy/config.py
@@ -0,0 +1,76 @@
+"""This module contains the configuration dataclasses for the robot."""
+
+from dataclasses import dataclass, field
+
+import numpy as np
+
+
+# Default velocity command
+@dataclass
+class cmd:
+    vx: float = 0.4
+    vy: float = 0.0
+    dyaw: float = 0.0
+
+
+@dataclass
+class Normalization:
+    """Normalization constants for observations and actions."""
+
+    @dataclass
+    class obs_scales:
+        lin_vel: float = 2.0
+        ang_vel: float = 1.0
+        dof_pos: float = 1.0
+        dof_vel: float = 0.05
+        quat: float = 1.0
+        height_measurements: float = 5.0
+
+    clip_observations: float = 18.0
+    clip_actions: float = 18.0
+
+
+@dataclass
+class RobotConfig:
+    """This dataclass contains various parameters and settings for configuring the robot.
+
+    Attributes:
+        dof: The number of degrees of freedom of the robot.
+        num_actions: The number of actions the robot can perform.
+        kps: The proportional gains for the PD controller.
+        kds: The derivative gains for the PD controller.
+        tau_limit: The torque limits for the robot's joints.
+        robot_model_path: The path to the robot model file.
+        dt: The simulation time step.
+        phase_duration: The duration of each phase in the robot's gait.
+        duration: The total duration of the simulation.
+        decimation: The decimation factor for updating the policy.
+        frame_stack: The number of frames to stack for observation.
+        num_single_obs: The number of single observations.
+        action_scale: The scaling factor for actions.
+        num_observations: The total number of observations (frame_stack * num_single_obs).
+        normalization: The normalization constants for observations and actions.
+    """
+
+    dof: int
+    kps: np.ndarray
+    kds: np.ndarray
+    tau_limit: np.ndarray
+    robot_model_path: str
+    dt: float = 0.001
+    phase_duration: float = 0.64
+    duration: float = 10.0
+    decimation: int = 10
+    frame_stack: int = 15
+    num_single_obs: int = 47
+    action_scale: float = 0.25
+
+    normalization: Normalization = field(default_factory=Normalization)
+
+    @property
+    def num_actions(self) -> int:
+        return self.dof
+
+    @property
+    def num_observations(self) -> int:
+        return self.frame_stack * self.num_single_obs
diff --git a/sim/deploy/policy.py b/sim/deploy/policy.py
@@ -0,0 +1,179 @@
+"""Policy module."""
+
+import math
+from abc import ABC, abstractmethod
+from collections import deque
+from pathlib import Path
+from typing import Union
+
+import numpy as np
+import torch
+import utils
+
+from sim.deploy.config import RobotConfig, cmd
+
+
+class Policy(ABC):
+    """Abstract base class for policies.
+
+    Attributes:
+        cmd: Command parameters.
+        hist_obs: History of observations.
+        model: Loaded PyTorch model.
+        action: Current action.
+    """
+
+    cmd: cmd
+
+    def __init__(
+        self,
+        model_path: Union[str, Path],
+        obs_size: int,
+        frame_stack: int = 3,
+        num_actions: int = 12,
+    ) -> None:
+        self.hist_obs = deque()
+        for _ in range(frame_stack):
+            self.hist_obs.append(np.zeros([1, obs_size], dtype=np.double))
+        self.model = torch.jit.load(model_path)
+        self.action = np.zeros((num_actions), dtype=np.double)
+
+    @abstractmethod
+    def next_action(self, obs: np.ndarray) -> np.ndarray:
+        """Computes the next action based on the observation.
+
+        Args:
+            obs: Observation.
+
+        Returns:
+            Next action.
+        """
+        pass
+
+    @abstractmethod
+    def pd_control(
+        self, target_dof_pos: np.ndarray, dof_pos: np.ndarray, kp: float, dof_vel: np.ndarray, kd: float
+    ) -> np.ndarray:
+        """Calculates torques from position commands using PD control.
+
+        Args:
+            target_dof_pos: Target generalized positions.
+            dof_pos: Current generalized positions.
+            kp: Proportional gain.
+            dof_vel: Current generalized velocities.
+            kd: Derivative gain.
+
+        Returns:
+            Calculated torques.
+        """
+        pass
+
+
+class SimPolicy(Policy):
+    """Policy for simulation.
+
+    Attributes:
+        dof: Degrees of freedom.
+        cfg: Robot configuration.
+    """
+
+    def __init__(self, model_path: Union[str, Path], cfg: RobotConfig) -> None:
+        self.cfg = cfg
+        super().__init__(model_path, cfg.num_single_obs, cfg.frame_stack, cfg.num_actions)
+
+    def pd_control(
+        self,
+        target_dof_pos: np.ndarray,
+        dof_pos: np.ndarray,
+        kp: float,
+        dof_vel: np.ndarray,
+        kd: float,
+    ) -> np.ndarray:
+        """
+        Calculates torques from position commands using PD control.
+
+        Args:
+            target_dof_pos: Target generalized positions.
+            dof_pos: Current generalized positions.
+            kp: Proportional gain.
+            target_dof_vel: Target generalized velocities.
+            dof_vel: Current generalized velocities.
+            kd: Derivative gain.
+
+        Returns:
+            Calculated torques.
+        """
+        target_dof_vel = np.zeros(self.cfg.num_actions, dtype=np.double)
+        return (target_dof_pos - dof_pos) * kp + (target_dof_vel - dof_vel) * kd
+
+    def parse_action(
+        self, dof_pos: np.ndarray, dof_vel: np.ndarray, eu_ang: np.ndarray, ang_vel: np.ndarray, count_lowlevel: int
+    ) -> np.ndarray:
+        """Parses the action from the observation.
+        Args:
+            dof_pos: Joint positions.
+            dof_vel: Joint velocities.
+            eu_ang: Euler angles.
+            ang_vel: Angular velocity.
+            count_lowlevel: Low-level count.
+
+        Returns:
+            Parsed action.
+        """
+        obs = np.zeros([1, self.cfg.num_single_obs], dtype=np.float32)
+        obs[0, 0] = math.sin(2 * math.pi * count_lowlevel * self.cfg.dt / self.cfg.phase_duration)
+        obs[0, 1] = math.cos(2 * math.pi * count_lowlevel * self.cfg.dt / self.cfg.phase_duration)
+        obs[0, 2] = cmd.vx * self.cfg.normalization.obs_scales.lin_vel
+        obs[0, 3] = cmd.vy * self.cfg.normalization.obs_scales.lin_vel
+        obs[0, 4] = cmd.dyaw * self.cfg.normalization.obs_scales.ang_vel
+        obs[0, 5 : (self.cfg.num_actions + 5)] = dof_pos * self.cfg.normalization.obs_scales.dof_pos
+        obs[0, (self.cfg.num_actions + 5) : (2 * self.cfg.num_actions + 5)] = (
+            dof_vel * self.cfg.normalization.obs_scales.dof_vel
+        )
+        obs[0, (2 * self.cfg.num_actions + 5) : (3 * self.cfg.num_actions + 5)] = self.action
+        obs[0, (3 * self.cfg.num_actions + 5) : (3 * self.cfg.num_actions + 5) + 3] = ang_vel
+        obs[0, (3 * self.cfg.num_actions + 5) + 3 : (3 * self.cfg.num_actions + 5) + 2 * 3] = eu_ang
+        obs = np.clip(obs, -self.cfg.normalization.clip_observations, self.cfg.normalization.clip_observations)
+        return obs
+
+    def next_action(
+        self,
+        dof_pos: np.ndarray,
+        dof_vel: np.ndarray,
+        orientation: np.ndarray,
+        ang_vel: np.ndarray,
+        count_lowlevel: int,
+    ) -> np.ndarray:
+        """Computes the next action based on the observation.
+
+        Args:
+            dof_pos: Joint positions.
+            dof_vel: Joint velocities.
+            orientation: Orientation quaternion.
+            ang_vel: Angular velocity.
+            count_lowlevel: Low-level count.
+
+        Returns:
+            Next action.
+        """
+        eu_ang = utils.quaternion_to_euler_array(orientation)
+        eu_ang[eu_ang > math.pi] -= 2 * math.pi
+
+        obs = self.parse_action(dof_pos, dof_vel, eu_ang, ang_vel, count_lowlevel)
+        self.hist_obs.append(obs)
+        self.hist_obs.popleft()
+        policy_input = np.zeros([1, self.cfg.num_observations], dtype=np.float32)
+
+        for i in range(self.cfg.frame_stack):
+            policy_input[0, i * self.cfg.num_single_obs : (i + 1) * self.cfg.num_single_obs] = self.hist_obs[i][0, :]
+
+        self.action[:] = self.model(torch.tensor(policy_input))[0].detach().numpy()
+
+        self.action = np.clip(self.action, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions)
+        return self.action
+
+
+class RealPolicy(Policy):
+    """Policy for real-world deployment."""
+
+    pass