From 5aade287f305cfeaa01d1c0b01a6c40bc883b2a3 Mon Sep 17 00:00:00 2001
From: Benjamin Bolte <ben@bolte.cc>
Date: Fri, 12 Apr 2024 19:45:18 -0700
Subject: [PATCH] formwar

---
 sim/env.py                             |   1 +
 sim/humanoid_gym/envs/getup_config.py  |   9 +-
 sim/humanoid_gym/envs/getup_env.py     |  90 +++++-----------
 sim/humanoid_gym/envs/legs_config.py   |   4 +-
 sim/humanoid_gym/envs/legs_env.py      | 140 +++++++------------------
 sim/humanoid_gym/envs/stompy_config.py |   4 +-
 sim/humanoid_gym/envs/stompy_env.py    | 140 +++++++------------------
 sim/humanoid_gym/play.py               |  26 ++---
 sim/scripts/create_fixed_urdf.py       |  16 +--
 sim/stompy/joints.py                   |  96 +++++++----------
 10 files changed, 155 insertions(+), 371 deletions(-)

diff --git a/sim/env.py b/sim/env.py
index c5fa7948..7cdf55fe 100755
--- a/sim/env.py
+++ b/sim/env.py
@@ -1,4 +1,5 @@
 """Defines common environment parameters."""
+
 import os
 from pathlib import Path
 
diff --git a/sim/humanoid_gym/envs/getup_config.py b/sim/humanoid_gym/envs/getup_config.py
index 0ab2ead3..cefed20c 100755
--- a/sim/humanoid_gym/envs/getup_config.py
+++ b/sim/humanoid_gym/envs/getup_config.py
@@ -4,9 +4,8 @@
      --load_run Apr08_00-02-41_ --run_name v1
 
 """
-from humanoid.envs.base.legged_robot_config import (
-    LeggedRobotCfg, LeggedRobotCfgPPO
-)
+
+from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
 
 from sim.env import stompy_urdf_path
 from sim.stompy.joints import Stompy
@@ -18,6 +17,7 @@ class GetupCfg(LeggedRobotCfg):
     """
     Configuration class for the Legs humanoid robot.
     """
+
     class env(LeggedRobotCfg.env):
         # change the observation dim
         frame_stack = 15
@@ -28,7 +28,7 @@ class env(LeggedRobotCfg.env):
         num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)
         num_actions = NUM_JOINTS
         num_envs = 4096
-        episode_length_s = 10 # episode length in seconds
+        episode_length_s = 10  # episode length in seconds
         use_ref_actions = False
 
     class safety:
@@ -95,7 +95,6 @@ class init_state(LeggedRobotCfg.init_state):
         for joint in default_positions:
             default_joint_angles[joint] = default_positions[joint]
 
-
     class control(LeggedRobotCfg.control):
         # PD Drive parameters:
         stiffness = {
diff --git a/sim/humanoid_gym/envs/getup_env.py b/sim/humanoid_gym/envs/getup_env.py
index 0a4451af..75766660 100755
--- a/sim/humanoid_gym/envs/getup_env.py
+++ b/sim/humanoid_gym/envs/getup_env.py
@@ -14,7 +14,9 @@
 default_feet_height = 0.0
 
 
-class GetupFreeEnv(LeggedRobot,):
+class GetupFreeEnv(
+    LeggedRobot,
+):
     """
     GetupFreeEnv is a class that represents getting up env requirments.
 
@@ -61,9 +63,7 @@ class that start with `_reward_` and adds them to the reward function.
         reset_idx(env_ids): Resets the environment for the specified environment IDs.
     """
 
-    def __init__(
-        self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless
-    ):
+    def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless):
         super().__init__(cfg, sim_params, physics_engine, sim_device, headless)
         self.last_feet_z = default_feet_height
         self.feet_height = torch.zeros((self.num_envs, 2), device=self.device)
@@ -76,16 +76,12 @@ def __init__(
         for joint in Stompy.all_joints():
             joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
             if joint in Stompy.default_sitting():
-                self.ref_dof_pos[:, joint_handle] = torch.tensor(
-                    Stompy.default_sitting()[joint])
+                self.ref_dof_pos[:, joint_handle] = torch.tensor(Stompy.default_sitting()[joint])
 
         self.compute_observations()
 
-        self.height_history = torch.zeros(
-            (self.num_envs, int(self.max_episode_length.item())),
-            device=self.device
-        )
-        self.window_size =int(self.max_episode_length.item()* 0.2)
+        self.height_history = torch.zeros((self.num_envs, int(self.max_episode_length.item())), device=self.device)
+        self.window_size = int(self.max_episode_length.item() * 0.2)
 
     def _push_robots(self):
         """Random pushes the robots. Emulates an impulse by setting a randomized base velocity."""
@@ -102,9 +98,7 @@ def _push_robots(self):
 
         self.root_states[:, 10:13] = self.rand_push_torque
 
-        self.gym.set_actor_root_state_tensor(
-            self.sim, gymtorch.unwrap_tensor(self.root_states)
-        )
+        self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
 
     def create_sim(self):
         """Creates simulation, terrain and evironments"""
@@ -125,9 +119,7 @@ def create_sim(self):
         elif mesh_type == "trimesh":
             self._create_trimesh()
         elif mesh_type is not None:
-            raise ValueError(
-                "Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]"
-            )
+            raise ValueError("Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]")
         self._create_envs()
 
     def _get_noise_scale_vec(self, cfg):
@@ -145,15 +137,9 @@ def _get_noise_scale_vec(self, cfg):
         self.add_noise = self.cfg.noise.add_noise
         noise_scales = self.cfg.noise.noise_scales
         noise_vec[0:5] = 0.0  # commands
-        noise_vec[5 : (num_actions + 5)] = (
-            noise_scales.dof_pos * self.obs_scales.dof_pos
-        )
-        noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = (
-            noise_scales.dof_vel * self.obs_scales.dof_vel
-        )
-        noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = (
-            0.0  # previous actions
-        )
+        noise_vec[5 : (num_actions + 5)] = noise_scales.dof_pos * self.obs_scales.dof_pos
+        noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel
+        noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = 0.0  # previous actions
         noise_vec[(3 * num_actions + 5) : (3 * num_actions + 5) + 3] = (
             noise_scales.ang_vel * self.obs_scales.ang_vel
         )  # ang vel
@@ -168,11 +154,7 @@ def step(self, actions):
         # dynamic randomization
         delay = torch.rand((self.num_envs, 1), device=self.device)
         actions = (1 - delay) * actions + delay * self.actions
-        actions += (
-            self.cfg.domain_rand.dynamic_randomization
-            * torch.randn_like(actions)
-            * actions
-        )
+        actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
         return super().step(actions)
 
     def compute_observations(self):
@@ -184,8 +166,7 @@ def compute_observations(self):
         self.privileged_obs_buf = torch.cat(
             (
                 self.command_input,  # 2 + 3
-                (self.dof_pos - self.default_joint_pd_target)
-                * self.obs_scales.dof_pos,  # 12
+                (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos,  # 12
                 self.dof_vel * self.obs_scales.dof_vel,  # 12
                 self.actions,  # 12
                 torch.zeros([self.num_envs, self.num_dof], device=self.device),  # 12
@@ -196,8 +177,8 @@ def compute_observations(self):
                 self.rand_push_torque,  # 3
                 self.env_frictions,  # 1
                 self.body_mass / 30.0,  # 1
-                torch.zeros([self.num_envs, 2], device=self.device), # 2
-                torch.zeros([self.num_envs, 2], device=self.device), # 2
+                torch.zeros([self.num_envs, 2], device=self.device),  # 2
+                torch.zeros([self.num_envs, 2], device=self.device),  # 2
             ),
             dim=-1,
         )
@@ -226,25 +207,16 @@ def compute_observations(self):
             self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1)
 
         if self.add_noise:
-            obs_now = (
-                obs_buf.clone()
-                + torch.randn_like(obs_buf)
-                * self.noise_scale_vec
-                * self.cfg.noise.noise_level
-            )
+            obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * self.cfg.noise.noise_level
         else:
             obs_now = obs_buf.clone()
         self.obs_history.append(obs_now)
         self.critic_history.append(self.privileged_obs_buf)
 
-        obs_buf_all = torch.stack(
-            [self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1
-        )  # N,T,K
+        obs_buf_all = torch.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1)  # N,T,K
 
         self.obs_buf = obs_buf_all.reshape(self.num_envs, -1)  # N, T*K
-        self.privileged_obs_buf = torch.cat(
-            [self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1
-        )
+        self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1)
 
     def reset_idx(self, env_ids):
         super().reset_idx(env_ids)
@@ -261,9 +233,7 @@ def _reward_joint_pos(self):
         joint_pos = self.dof_pos.clone()
         pos_target = self.ref_dof_pos.clone()
         diff = joint_pos - pos_target
-        r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(
-            diff, dim=1
-        )
+        r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(diff, dim=1)
         return r
 
     def _reward_orientation(self):
@@ -271,9 +241,7 @@ def _reward_orientation(self):
         Calculates the reward for maintaining a flat base orientation. It penalizes deviation
         from the desired base orientation using the base euler angles and the projected gravity vector.
         """
-        quat_mismatch = torch.exp(
-            -torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10
-        )
+        quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10)
         orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20)
 
         return (quat_mismatch + orientation) / 2.0
@@ -308,9 +276,7 @@ def check_termination(self):
                 dim=1,
             )
         """
-        self.time_out_buf = (
-            self.episode_length_buf > self.max_episode_length
-        )  # no terminal reward for time-outs
+        self.time_out_buf = self.episode_length_buf > self.max_episode_length  # no terminal reward for time-outs
         self.reset_buf = self.time_out_buf
 
     def _reward_base_height(self):
@@ -363,9 +329,7 @@ def _reward_dof_acc(self):
         Penalizes high accelerations at the robot's degrees of freedom (DOF). This is important for ensuring
         smooth and stable motion, reducing wear on the robot's mechanical parts.
         """
-        return torch.sum(
-            torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1
-        )
+        return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1)
 
     def _reward_collision(self):
         """
@@ -373,13 +337,7 @@ def _reward_collision(self):
         This encourages the robot to avoid undesired contact with objects or surfaces.
         """
         return torch.sum(
-            1.0
-            * (
-                torch.norm(
-                    self.contact_forces[:, self.penalised_contact_indices, :], dim=-1
-                )
-                > 0.1
-            ),
+            1.0 * (torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1),
             dim=1,
         )
 
diff --git a/sim/humanoid_gym/envs/legs_config.py b/sim/humanoid_gym/envs/legs_config.py
index 3a2c9fb5..2c98a793 100755
--- a/sim/humanoid_gym/envs/legs_config.py
+++ b/sim/humanoid_gym/envs/legs_config.py
@@ -1,8 +1,6 @@
 """Defines the environment configuration for the Getting up task"""
 
-from humanoid.envs.base.legged_robot_config import (
-    LeggedRobotCfg, LeggedRobotCfgPPO
-)
+from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
 
 from sim.env import stompy_urdf_path
 from sim.stompy.joints import StompyFixed
diff --git a/sim/humanoid_gym/envs/legs_env.py b/sim/humanoid_gym/envs/legs_env.py
index 14dc8167..5e5a7e44 100755
--- a/sim/humanoid_gym/envs/legs_env.py
+++ b/sim/humanoid_gym/envs/legs_env.py
@@ -1,6 +1,6 @@
 # mypy: disable-error-code="valid-newtype"
-"""Defines the environment for training the Stompy with fixed torso.
-"""
+"""Defines the environment for training the Stompy with fixed torso."""
+
 from humanoid.envs import LeggedRobot
 from humanoid.envs.base.legged_robot_config import LeggedRobotCfg
 from humanoid.utils.terrain import HumanoidTerrain
@@ -46,9 +46,8 @@ class LegsFreeEnv(LeggedRobot):
         compute_observations(): Computes the observations.
         reset_idx(env_ids): Resets the environment for the specified environment IDs.
     """
-    def __init__(
-        self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless
-    ):
+
+    def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless):
         super().__init__(cfg, sim_params, physics_engine, sim_device, headless)
         self.last_feet_z = self.cfg.asset.default_feet_height
         self.feet_height = torch.zeros((self.num_envs, 2), device=self.device)
@@ -59,15 +58,11 @@ def __init__(
 
         self.legs_joints = {}
         for name, joint in StompyFixed.legs.left.joints_motors():
-            joint_handle = self.gym.find_actor_dof_handle(
-                env_handle, actor_handle, joint
-            )
+            joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
             self.legs_joints["left_" + name] = joint_handle
 
         for name, joint in StompyFixed.legs.right.joints_motors():
-            joint_handle = self.gym.find_actor_dof_handle(
-                env_handle, actor_handle, joint
-            )
+            joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
             self.legs_joints["right_" + name] = joint_handle
         self.compute_observations()
 
@@ -86,9 +81,7 @@ def _push_robots(self):
 
         self.root_states[:, 10:13] = self.rand_push_torque
 
-        self.gym.set_actor_root_state_tensor(
-            self.sim, gymtorch.unwrap_tensor(self.root_states)
-        )
+        self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
 
     def _get_phase(self):
         cycle_time = self.cfg.rewards.cycle_time
@@ -113,16 +106,11 @@ def _get_gait_phase(self):
     def check_termination(self):
         """Check if environments need to be reset"""
         self.reset_buf = torch.any(
-            torch.norm(
-                self.contact_forces[:, self.termination_contact_indices, :], dim=-1
-            )
-            > 1.0,
+            torch.norm(self.contact_forces[:, self.termination_contact_indices, :], dim=-1) > 1.0,
             dim=1,
         )
         self.reset_buf |= self.root_states[:, 2] < self.cfg.asset.termination_height
-        self.time_out_buf = (
-            self.episode_length_buf > self.max_episode_length
-        )  # no terminal reward for time-outs
+        self.time_out_buf = self.episode_length_buf > self.max_episode_length  # no terminal reward for time-outs
         self.reset_buf |= self.time_out_buf
 
     def compute_ref_state(self):
@@ -171,9 +159,7 @@ def create_sim(self):
         elif mesh_type == "trimesh":
             self._create_trimesh()
         elif mesh_type is not None:
-            raise ValueError(
-                "Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]"
-            )
+            raise ValueError("Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]")
         self._create_envs()
 
     def _get_noise_scale_vec(self, cfg):
@@ -191,15 +177,9 @@ def _get_noise_scale_vec(self, cfg):
         self.add_noise = self.cfg.noise.add_noise
         noise_scales = self.cfg.noise.noise_scales
         noise_vec[0:5] = 0.0  # commands
-        noise_vec[5 : (num_actions + 5)] = (
-            noise_scales.dof_pos * self.obs_scales.dof_pos
-        )
-        noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = (
-            noise_scales.dof_vel * self.obs_scales.dof_vel
-        )
-        noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = (
-            0.0  # previous actions
-        )
+        noise_vec[5 : (num_actions + 5)] = noise_scales.dof_pos * self.obs_scales.dof_pos
+        noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel
+        noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = 0.0  # previous actions
         noise_vec[(3 * num_actions + 5) : (3 * num_actions + 5) + 3] = (
             noise_scales.ang_vel * self.obs_scales.ang_vel
         )  # ang vel
@@ -214,11 +194,7 @@ def step(self, actions):
         # dynamic randomization
         delay = torch.rand((self.num_envs, 1), device=self.device)
         actions = (1 - delay) * actions + delay * self.actions
-        actions += (
-            self.cfg.domain_rand.dynamic_randomization
-            * torch.randn_like(actions)
-            * actions
-        )
+        actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
         return super().step(actions)
 
     def compute_observations(self):
@@ -231,9 +207,7 @@ def compute_observations(self):
         stance_mask = self._get_gait_phase()
         contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5.0
 
-        self.command_input = torch.cat(
-            (sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1
-        )
+        self.command_input = torch.cat((sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1)
 
         q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos
         dq = self.dof_vel * self.obs_scales.dof_vel
@@ -243,8 +217,7 @@ def compute_observations(self):
         self.privileged_obs_buf = torch.cat(
             (
                 self.command_input,  # 2 + 3
-                (self.dof_pos - self.default_joint_pd_target)
-                * self.obs_scales.dof_pos,  # 12
+                (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos,  # 12
                 self.dof_vel * self.obs_scales.dof_vel,  # 12
                 self.actions,  # 12
                 diff,  # 12
@@ -285,25 +258,16 @@ def compute_observations(self):
             self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1)
 
         if self.add_noise:
-            obs_now = (
-                obs_buf.clone()
-                + torch.randn_like(obs_buf)
-                * self.noise_scale_vec
-                * self.cfg.noise.noise_level
-            )
+            obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * self.cfg.noise.noise_level
         else:
             obs_now = obs_buf.clone()
         self.obs_history.append(obs_now)
         self.critic_history.append(self.privileged_obs_buf)
 
-        obs_buf_all = torch.stack(
-            [self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1
-        )  # N,T,K
+        obs_buf_all = torch.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1)  # N,T,K
 
         self.obs_buf = obs_buf_all.reshape(self.num_envs, -1)  # N, T*K
-        self.privileged_obs_buf = torch.cat(
-            [self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1
-        )
+        self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1)
 
     def reset_idx(self, env_ids):
         super().reset_idx(env_ids)
@@ -320,9 +284,7 @@ def _reward_joint_pos(self):
         joint_pos = self.dof_pos.clone()
         pos_target = self.ref_dof_pos.clone()
         diff = joint_pos - pos_target
-        r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(
-            diff, dim=1
-        ).clamp(0, 0.5)
+        r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(diff, dim=1).clamp(0, 0.5)
 
         return r
 
@@ -336,9 +298,7 @@ def _reward_feet_distance(self):
         max_df = self.cfg.rewards.max_dist
         d_min = torch.clamp(foot_dist - fd, -0.5, 0.0)
         d_max = torch.clamp(foot_dist - max_df, 0, 0.5)
-        return (
-            torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)
-        ) / 2
+        return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2
 
     def _reward_knee_distance(self):
         """
@@ -350,9 +310,7 @@ def _reward_knee_distance(self):
         max_df = self.cfg.rewards.max_dist / 2
         d_min = torch.clamp(foot_dist - fd, -0.5, 0.0)
         d_max = torch.clamp(foot_dist - max_df, 0, 0.5)
-        return (
-            torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)
-        ) / 2
+        return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2
 
     def _reward_foot_slip(self):
         """
@@ -361,9 +319,7 @@ def _reward_foot_slip(self):
         with the ground. The speed of the foot is calculated and scaled by the contact condition.
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-        foot_speed_norm = torch.norm(
-            self.rigid_state[:, self.feet_indices, 10:12], dim=2
-        )
+        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
         rew = torch.sqrt(foot_speed_norm)
         rew *= contact
         return torch.sum(rew, dim=1)
@@ -376,9 +332,7 @@ def _reward_feet_air_time(self):
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
         stance_mask = self._get_gait_phase()
-        self.contact_filt = torch.logical_or(
-            torch.logical_or(contact, stance_mask), self.last_contacts
-        )
+        self.contact_filt = torch.logical_or(torch.logical_or(contact, stance_mask), self.last_contacts)
         self.last_contacts = contact
         first_contact = (self.feet_air_time > 0.0) * self.contact_filt
         self.feet_air_time += self.dt
@@ -401,9 +355,7 @@ def _reward_orientation(self):
         Calculates the reward for maintaining a flat base orientation. It penalizes deviation
         from the desired base orientation using the base euler angles and the projected gravity vector.
         """
-        quat_mismatch = torch.exp(
-            -torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10
-        )
+        quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10)
         orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20)
 
         return (quat_mismatch + orientation) / 2.0
@@ -415,8 +367,7 @@ def _reward_feet_contact_forces(self):
         """
         return torch.sum(
             (
-                torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1)
-                - self.cfg.rewards.max_contact_force
+                torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force
             ).clip(0, 400),
             dim=1,
         )
@@ -440,14 +391,11 @@ def _reward_base_height(self):
         of its feet when they are in contact with the ground.
         """
         stance_mask = self._get_gait_phase()
-        measured_heights = torch.sum(
-            self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1
-        ) / torch.sum(stance_mask, dim=1)
-        base_height = self.root_states[:, 2] - (
-            measured_heights - self.cfg.asset.default_feet_height)
-        reward = torch.exp(
-            -torch.abs(base_height - self.cfg.rewards.base_height_target) * 100
+        measured_heights = torch.sum(self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1) / torch.sum(
+            stance_mask, dim=1
         )
+        base_height = self.root_states[:, 2] - (measured_heights - self.cfg.asset.default_feet_height)
+        reward = torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100)
         return reward
 
     def _reward_base_acc(self):
@@ -477,9 +425,7 @@ def _reward_track_vel_hard(self):
         Penalizes deviations from specified linear and angular velocity targets.
         """
         # Tracking of linear velocity commands (xy axes)
-        lin_vel_error = torch.norm(
-            self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1
-        )
+        lin_vel_error = torch.norm(self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1)
         lin_vel_error_exp = torch.exp(-lin_vel_error * 10)
 
         # Tracking of angular velocity commands (yaw)
@@ -495,9 +441,7 @@ def _reward_tracking_lin_vel(self):
         Tracks linear velocity commands along the xy axes.
         Calculates a reward based on how closely the robot's linear velocity matches the commanded values.
         """
-        lin_vel_error = torch.sum(
-            torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1
-        )
+        lin_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1)
         return torch.exp(-lin_vel_error * self.cfg.rewards.tracking_sigma)
 
     def _reward_tracking_ang_vel(self):
@@ -528,9 +472,7 @@ def _reward_feet_clearance(self):
         swing_mask = 1 - self._get_gait_phase()
 
         # feet height should be closed to target feet height at the peak
-        rew_pos = (
-            torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.02
-        )
+        rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.02
         rew_pos = torch.sum(rew_pos * swing_mask, dim=1)
         self.feet_height *= ~contact
 
@@ -552,9 +494,7 @@ def _reward_low_speed(self):
         speed_desired = ~(speed_too_low | speed_too_high)
 
         # Check if the speed and command directions are mismatched
-        sign_mismatch = torch.sign(self.base_lin_vel[:, 0]) != torch.sign(
-            self.commands[:, 0]
-        )
+        sign_mismatch = torch.sign(self.base_lin_vel[:, 0]) != torch.sign(self.commands[:, 0])
 
         # Initialize reward tensor
         reward = torch.zeros_like(self.base_lin_vel[:, 0])
@@ -589,9 +529,7 @@ def _reward_dof_acc(self):
         Penalizes high accelerations at the robot's degrees of freedom (DOF). This is important for ensuring
         smooth and stable motion, reducing wear on the robot's mechanical parts.
         """
-        return torch.sum(
-            torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1
-        )
+        return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1)
 
     def _reward_collision(self):
         """
@@ -599,13 +537,7 @@ def _reward_collision(self):
         This encourages the robot to avoid undesired contact with objects or surfaces.
         """
         return torch.sum(
-            1.0
-            * (
-                torch.norm(
-                    self.contact_forces[:, self.penalised_contact_indices, :], dim=-1
-                )
-                > 0.1
-            ),
+            1.0 * (torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1),
             dim=1,
         )
 
diff --git a/sim/humanoid_gym/envs/stompy_config.py b/sim/humanoid_gym/envs/stompy_config.py
index b63e82d7..1833af37 100755
--- a/sim/humanoid_gym/envs/stompy_config.py
+++ b/sim/humanoid_gym/envs/stompy_config.py
@@ -1,8 +1,6 @@
 """Defines the environment configuration for the Getting up task"""
 
-from humanoid.envs.base.legged_robot_config import (
-    LeggedRobotCfg, LeggedRobotCfgPPO
-)
+from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
 
 from sim.env import stompy_urdf_path
 from sim.stompy.joints import Stompy
diff --git a/sim/humanoid_gym/envs/stompy_env.py b/sim/humanoid_gym/envs/stompy_env.py
index a4198397..3469e83b 100755
--- a/sim/humanoid_gym/envs/stompy_env.py
+++ b/sim/humanoid_gym/envs/stompy_env.py
@@ -1,6 +1,6 @@
 # mypy: disable-error-code="valid-newtype"
-"""Defines the environment for training the humanoid.
-"""
+"""Defines the environment for training the humanoid."""
+
 from humanoid.envs import LeggedRobot
 from humanoid.envs.base.legged_robot_config import LeggedRobotCfg
 from humanoid.utils.terrain import HumanoidTerrain
@@ -46,9 +46,8 @@ class StompyFreeEnv(LeggedRobot):
         compute_observations(): Computes the observations.
         reset_idx(env_ids): Resets the environment for the specified environment IDs.
     """
-    def __init__(
-        self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless
-    ):
+
+    def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless):
         super().__init__(cfg, sim_params, physics_engine, sim_device, headless)
         self.last_feet_z = self.cfg.asset.default_feet_height
         self.feet_height = torch.zeros((self.num_envs, 2), device=self.device)
@@ -59,15 +58,11 @@ def __init__(
 
         self.legs_joints = {}
         for name, joint in Stompy.legs.left.joints_motors():
-            joint_handle = self.gym.find_actor_dof_handle(
-                env_handle, actor_handle, joint
-            )
+            joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
             self.legs_joints["left_" + name] = joint_handle
 
         for name, joint in Stompy.legs.right.joints_motors():
-            joint_handle = self.gym.find_actor_dof_handle(
-                env_handle, actor_handle, joint
-            )
+            joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
             self.legs_joints["right_" + name] = joint_handle
 
         self.compute_observations()
@@ -87,9 +82,7 @@ def _push_robots(self):
 
         self.root_states[:, 10:13] = self.rand_push_torque
 
-        self.gym.set_actor_root_state_tensor(
-            self.sim, gymtorch.unwrap_tensor(self.root_states)
-        )
+        self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
 
     def _get_phase(self):
         cycle_time = self.cfg.rewards.cycle_time
@@ -114,16 +107,11 @@ def _get_gait_phase(self):
     def check_termination(self):
         """Check if environments need to be reset"""
         self.reset_buf = torch.any(
-            torch.norm(
-                self.contact_forces[:, self.termination_contact_indices, :], dim=-1
-            )
-            > 1.0,
+            torch.norm(self.contact_forces[:, self.termination_contact_indices, :], dim=-1) > 1.0,
             dim=1,
         )
         self.reset_buf |= self.root_states[:, 2] < self.cfg.asset.termination_height
-        self.time_out_buf = (
-            self.episode_length_buf > self.max_episode_length
-        )  # no terminal reward for time-outs
+        self.time_out_buf = self.episode_length_buf > self.max_episode_length  # no terminal reward for time-outs
         self.reset_buf |= self.time_out_buf
 
     def compute_ref_state(self):
@@ -172,9 +160,7 @@ def create_sim(self):
         elif mesh_type == "trimesh":
             self._create_trimesh()
         elif mesh_type is not None:
-            raise ValueError(
-                "Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]"
-            )
+            raise ValueError("Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]")
         self._create_envs()
 
     def _get_noise_scale_vec(self, cfg):
@@ -192,15 +178,9 @@ def _get_noise_scale_vec(self, cfg):
         self.add_noise = self.cfg.noise.add_noise
         noise_scales = self.cfg.noise.noise_scales
         noise_vec[0:5] = 0.0  # commands
-        noise_vec[5 : (num_actions + 5)] = (
-            noise_scales.dof_pos * self.obs_scales.dof_pos
-        )
-        noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = (
-            noise_scales.dof_vel * self.obs_scales.dof_vel
-        )
-        noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = (
-            0.0  # previous actions
-        )
+        noise_vec[5 : (num_actions + 5)] = noise_scales.dof_pos * self.obs_scales.dof_pos
+        noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel
+        noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = 0.0  # previous actions
         noise_vec[(3 * num_actions + 5) : (3 * num_actions + 5) + 3] = (
             noise_scales.ang_vel * self.obs_scales.ang_vel
         )  # ang vel
@@ -215,11 +195,7 @@ def step(self, actions):
         # dynamic randomization
         delay = torch.rand((self.num_envs, 1), device=self.device)
         actions = (1 - delay) * actions + delay * self.actions
-        actions += (
-            self.cfg.domain_rand.dynamic_randomization
-            * torch.randn_like(actions)
-            * actions
-        )
+        actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
         return super().step(actions)
 
     def compute_observations(self):
@@ -232,9 +208,7 @@ def compute_observations(self):
         stance_mask = self._get_gait_phase()
         contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5.0
 
-        self.command_input = torch.cat(
-            (sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1
-        )
+        self.command_input = torch.cat((sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1)
         breakpoint()
         q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos
         dq = self.dof_vel * self.obs_scales.dof_vel
@@ -244,8 +218,7 @@ def compute_observations(self):
         self.privileged_obs_buf = torch.cat(
             (
                 self.command_input,  # 2 + 3
-                (self.dof_pos - self.default_joint_pd_target)
-                * self.obs_scales.dof_pos,  # 12
+                (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos,  # 12
                 self.dof_vel * self.obs_scales.dof_vel,  # 12
                 self.actions,  # 12
                 diff,  # 12
@@ -286,25 +259,16 @@ def compute_observations(self):
             self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1)
 
         if self.add_noise:
-            obs_now = (
-                obs_buf.clone()
-                + torch.randn_like(obs_buf)
-                * self.noise_scale_vec
-                * self.cfg.noise.noise_level
-            )
+            obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * self.cfg.noise.noise_level
         else:
             obs_now = obs_buf.clone()
         self.obs_history.append(obs_now)
         self.critic_history.append(self.privileged_obs_buf)
 
-        obs_buf_all = torch.stack(
-            [self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1
-        )  # N,T,K
+        obs_buf_all = torch.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1)  # N,T,K
 
         self.obs_buf = obs_buf_all.reshape(self.num_envs, -1)  # N, T*K
-        self.privileged_obs_buf = torch.cat(
-            [self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1
-        )
+        self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1)
 
     def reset_idx(self, env_ids):
         super().reset_idx(env_ids)
@@ -321,9 +285,7 @@ def _reward_joint_pos(self):
         joint_pos = self.dof_pos.clone()
         pos_target = self.ref_dof_pos.clone()
         diff = joint_pos - pos_target
-        r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(
-            diff, dim=1
-        ).clamp(0, 0.5)
+        r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(diff, dim=1).clamp(0, 0.5)
 
         return r
 
@@ -337,9 +299,7 @@ def _reward_feet_distance(self):
         max_df = self.cfg.rewards.max_dist
         d_min = torch.clamp(foot_dist - fd, -0.5, 0.0)
         d_max = torch.clamp(foot_dist - max_df, 0, 0.5)
-        return (
-            torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)
-        ) / 2
+        return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2
 
     def _reward_knee_distance(self):
         """
@@ -351,9 +311,7 @@ def _reward_knee_distance(self):
         max_df = self.cfg.rewards.max_dist / 2
         d_min = torch.clamp(foot_dist - fd, -0.5, 0.0)
         d_max = torch.clamp(foot_dist - max_df, 0, 0.5)
-        return (
-            torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)
-        ) / 2
+        return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2
 
     def _reward_foot_slip(self):
         """
@@ -362,9 +320,7 @@ def _reward_foot_slip(self):
         with the ground. The speed of the foot is calculated and scaled by the contact condition.
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-        foot_speed_norm = torch.norm(
-            self.rigid_state[:, self.feet_indices, 10:12], dim=2
-        )
+        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
         rew = torch.sqrt(foot_speed_norm)
         rew *= contact
         return torch.sum(rew, dim=1)
@@ -377,9 +333,7 @@ def _reward_feet_air_time(self):
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
         stance_mask = self._get_gait_phase()
-        self.contact_filt = torch.logical_or(
-            torch.logical_or(contact, stance_mask), self.last_contacts
-        )
+        self.contact_filt = torch.logical_or(torch.logical_or(contact, stance_mask), self.last_contacts)
         self.last_contacts = contact
         first_contact = (self.feet_air_time > 0.0) * self.contact_filt
         self.feet_air_time += self.dt
@@ -402,9 +356,7 @@ def _reward_orientation(self):
         Calculates the reward for maintaining a flat base orientation. It penalizes deviation
         from the desired base orientation using the base euler angles and the projected gravity vector.
         """
-        quat_mismatch = torch.exp(
-            -torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10
-        )
+        quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10)
         orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20)
 
         return (quat_mismatch + orientation) / 2.0
@@ -416,8 +368,7 @@ def _reward_feet_contact_forces(self):
         """
         return torch.sum(
             (
-                torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1)
-                - self.cfg.rewards.max_contact_force
+                torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force
             ).clip(0, 400),
             dim=1,
         )
@@ -441,14 +392,11 @@ def _reward_base_height(self):
         of its feet when they are in contact with the ground.
         """
         stance_mask = self._get_gait_phase()
-        measured_heights = torch.sum(
-            self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1
-        ) / torch.sum(stance_mask, dim=1)
-        base_height = self.root_states[:, 2] - (
-            measured_heights - self.cfg.asset.default_feet_height)
-        reward = torch.exp(
-            -torch.abs(base_height - self.cfg.rewards.base_height_target) * 100
+        measured_heights = torch.sum(self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1) / torch.sum(
+            stance_mask, dim=1
         )
+        base_height = self.root_states[:, 2] - (measured_heights - self.cfg.asset.default_feet_height)
+        reward = torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100)
         return reward
 
     def _reward_base_acc(self):
@@ -478,9 +426,7 @@ def _reward_track_vel_hard(self):
         Penalizes deviations from specified linear and angular velocity targets.
         """
         # Tracking of linear velocity commands (xy axes)
-        lin_vel_error = torch.norm(
-            self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1
-        )
+        lin_vel_error = torch.norm(self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1)
         lin_vel_error_exp = torch.exp(-lin_vel_error * 10)
 
         # Tracking of angular velocity commands (yaw)
@@ -496,9 +442,7 @@ def _reward_tracking_lin_vel(self):
         Tracks linear velocity commands along the xy axes.
         Calculates a reward based on how closely the robot's linear velocity matches the commanded values.
         """
-        lin_vel_error = torch.sum(
-            torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1
-        )
+        lin_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1)
         return torch.exp(-lin_vel_error * self.cfg.rewards.tracking_sigma)
 
     def _reward_tracking_ang_vel(self):
@@ -529,9 +473,7 @@ def _reward_feet_clearance(self):
         swing_mask = 1 - self._get_gait_phase()
 
         # feet height should be closed to target feet height at the peak
-        rew_pos = (
-            torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.02
-        )
+        rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.02
         rew_pos = torch.sum(rew_pos * swing_mask, dim=1)
         self.feet_height *= ~contact
 
@@ -553,9 +495,7 @@ def _reward_low_speed(self):
         speed_desired = ~(speed_too_low | speed_too_high)
 
         # Check if the speed and command directions are mismatched
-        sign_mismatch = torch.sign(self.base_lin_vel[:, 0]) != torch.sign(
-            self.commands[:, 0]
-        )
+        sign_mismatch = torch.sign(self.base_lin_vel[:, 0]) != torch.sign(self.commands[:, 0])
 
         # Initialize reward tensor
         reward = torch.zeros_like(self.base_lin_vel[:, 0])
@@ -590,9 +530,7 @@ def _reward_dof_acc(self):
         Penalizes high accelerations at the robot's degrees of freedom (DOF). This is important for ensuring
         smooth and stable motion, reducing wear on the robot's mechanical parts.
         """
-        return torch.sum(
-            torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1
-        )
+        return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1)
 
     def _reward_collision(self):
         """
@@ -600,13 +538,7 @@ def _reward_collision(self):
         This encourages the robot to avoid undesired contact with objects or surfaces.
         """
         return torch.sum(
-            1.0
-            * (
-                torch.norm(
-                    self.contact_forces[:, self.penalised_contact_indices, :], dim=-1
-                )
-                > 0.1
-            ),
+            1.0 * (torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1),
             dim=1,
         )
 
diff --git a/sim/humanoid_gym/play.py b/sim/humanoid_gym/play.py
index 73a4b267..937753fb 100755
--- a/sim/humanoid_gym/play.py
+++ b/sim/humanoid_gym/play.py
@@ -55,17 +55,13 @@ def play(args: argparse.Namespace) -> None:
         camera_properties.height = 1080
         h1 = env.gym.create_camera_sensor(env.envs[0], camera_properties)
         camera_offset = gymapi.Vec3(3, -3, 1)
-        camera_rotation = gymapi.Quat.from_axis_angle(
-            gymapi.Vec3(-0.3, 0.2, 1), np.deg2rad(135))
+        camera_rotation = gymapi.Quat.from_axis_angle(gymapi.Vec3(-0.3, 0.2, 1), np.deg2rad(135))
         actor_handle = env.gym.get_actor_handle(env.envs[0], 0)
-        body_handle = env.gym.get_actor_rigid_body_handle(
-            env.envs[0], actor_handle, 0)
+        body_handle = env.gym.get_actor_rigid_body_handle(env.envs[0], actor_handle, 0)
         print(body_handle)
         print(actor_handle)
         env.gym.attach_camera_to_body(
-            h1, env.envs[0], body_handle,
-            gymapi.Transform(camera_offset, camera_rotation),
-            gymapi.FOLLOW_POSITION
+            h1, env.envs[0], body_handle, gymapi.Transform(camera_offset, camera_rotation), gymapi.FOLLOW_POSITION
         )
 
         fourcc = cv2.VideoWriter_fourcc(*"MJPG")
@@ -75,23 +71,19 @@ def play(args: argparse.Namespace) -> None:
         experiment_dir = video_dir / train_cfg.runner.experiment_name
         experiment_dir.mkdir(parents=True, exist_ok=True)
 
-        dir = os.path.join(
-            experiment_dir,
-            datetime.now().strftime("%b%d_%H-%M-%S") + args.run_name + ".mp4"
-        )
+        dir = os.path.join(experiment_dir, datetime.now().strftime("%b%d_%H-%M-%S") + args.run_name + ".mp4")
         if not os.path.exists(video_dir):
             os.mkdir(video_dir)
         if not os.path.exists(experiment_dir):
             os.mkdir(experiment_dir)
         video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080))
 
-
     for _ in tqdm(range(stop_state_log)):
         actions = policy(obs.detach())
 
         if FIX_COMMAND:
             env.commands[:, 0] = 0.0
-            env.commands[:, 1] = -0.5 # negative left, positive right
+            env.commands[:, 1] = -0.5  # negative left, positive right
             env.commands[:, 2] = 0.0
             env.commands[:, 3] = 0.0
 
@@ -101,12 +93,11 @@ def play(args: argparse.Namespace) -> None:
             env.gym.fetch_results(env.sim, True)
             env.gym.step_graphics(env.sim)
             env.gym.render_all_camera_sensors(env.sim)
-            img = env.gym.get_camera_image(
-                env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR)
+            img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR)
             img = np.reshape(img, (1080, 1920, 4))
             img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
 
-            video.write(img[..., :3]) # Write only the RGB channels
+            video.write(img[..., :3])  # Write only the RGB channels
 
         logger.log_states(
             {
@@ -121,8 +112,7 @@ def play(args: argparse.Namespace) -> None:
                 "base_vel_y": env.base_lin_vel[robot_index, 1].item(),
                 "base_vel_z": env.base_lin_vel[robot_index, 2].item(),
                 "base_vel_yaw": env.base_ang_vel[robot_index, 2].item(),
-                "contact_forces_z": env.contact_forces[
-                    robot_index, env.feet_indices, 2].cpu().numpy(),
+                "contact_forces_z": env.contact_forces[robot_index, env.feet_indices, 2].cpu().numpy(),
             }
         )
         # ====================== Log states ======================
diff --git a/sim/scripts/create_fixed_urdf.py b/sim/scripts/create_fixed_urdf.py
index a487ca73..653bc0dd 100644
--- a/sim/scripts/create_fixed_urdf.py
+++ b/sim/scripts/create_fixed_urdf.py
@@ -1,6 +1,6 @@
 # mypy: disable-error-code="valid-newtype"
-"""This script updates the URDF file to fix the joints of the robot.
-"""
+"""This script updates the URDF file to fix the joints of the robot."""
+
 import xml.etree.ElementTree as ET
 from sim.stompy.joints import StompyFixed
 
@@ -14,17 +14,17 @@ def update_urdf():
     revolute_joints = set(stompy.default_standing().keys())
     joint_limits = stompy.default_limits()
 
-    for joint in root.findall('joint'):
-        joint_name = joint.get('name')
+    for joint in root.findall("joint"):
+        joint_name = joint.get("name")
         if joint_name not in revolute_joints:
             print(joint)
-            joint.set('type', 'fixed')
+            joint.set("type", "fixed")
         else:
-            limit = joint.find('limit')
+            limit = joint.find("limit")
             if limit is not None:
                 limits = joint_limits.get(joint_name, {})
-                lower = str(limits.get('lower', 0.0))
-                upper = str(limits.get('upper', 0.0))
+                lower = str(limits.get("lower", 0.0))
+                upper = str(limits.get("upper", 0.0))
                 limit.set("lower", lower)
                 limit.set("upper", upper)
 
diff --git a/sim/stompy/joints.py b/sim/stompy/joints.py
index 0c33af51..2524c6f4 100755
--- a/sim/stompy/joints.py
+++ b/sim/stompy/joints.py
@@ -143,31 +143,25 @@ def default_positions(cls) -> Dict[str, float]:
     @classmethod
     def default_standing(cls) -> Dict[str, float]:
         return {
-            Stompy.head.left_right: np.deg2rad(-2), # -0.03
-
+            Stompy.head.left_right: np.deg2rad(-2),  # -0.03
             # arms
-            Stompy.left_arm.shoulder_yaw: np.deg2rad(-69.5), # -1.21
-            Stompy.left_arm.shoulder_pitch: np.deg2rad(-93), # 1.61
-            Stompy.right_arm.shoulder_yaw: np.deg2rad(85), # 1.48
+            Stompy.left_arm.shoulder_yaw: np.deg2rad(-69.5),  # -1.21
+            Stompy.left_arm.shoulder_pitch: np.deg2rad(-93),  # 1.61
+            Stompy.right_arm.shoulder_yaw: np.deg2rad(85),  # 1.48
             Stompy.right_arm.shoulder_pitch: np.deg2rad(104),  # 1.81
-
             # legs
-            Stompy.legs.left.hip_roll: np.deg2rad(29), # 0.5
-            Stompy.legs.left.hip_yaw: np.deg2rad(-29), # -0.5
-            Stompy.legs.left.hip_pitch: np.deg2rad(56), # 0.97
-
-            Stompy.legs.right.hip_roll: np.deg2rad(-29), # -0.5
-            Stompy.legs.right.hip_yaw: np.deg2rad(-29), # -0.5
-            Stompy.legs.right.hip_pitch: np.deg2rad(-56), # -0.97
-
-            Stompy.legs.left.knee: np.deg2rad(-6), # -0.1
-            Stompy.legs.right.knee: np.deg2rad(6), # 0.1
-
-            Stompy.legs.left.ankle: np.deg2rad(0), # 0
-            Stompy.legs.right.ankle: np.deg2rad(0), # 0
-
-            Stompy.legs.left.foot_roll: np.deg2rad(0), # 0
-            Stompy.legs.right.foot_roll: np.deg2rad(0), # 0
+            Stompy.legs.left.hip_roll: np.deg2rad(29),  # 0.5
+            Stompy.legs.left.hip_yaw: np.deg2rad(-29),  # -0.5
+            Stompy.legs.left.hip_pitch: np.deg2rad(56),  # 0.97
+            Stompy.legs.right.hip_roll: np.deg2rad(-29),  # -0.5
+            Stompy.legs.right.hip_yaw: np.deg2rad(-29),  # -0.5
+            Stompy.legs.right.hip_pitch: np.deg2rad(-56),  # -0.97
+            Stompy.legs.left.knee: np.deg2rad(-6),  # -0.1
+            Stompy.legs.right.knee: np.deg2rad(6),  # 0.1
+            Stompy.legs.left.ankle: np.deg2rad(0),  # 0
+            Stompy.legs.right.ankle: np.deg2rad(0),  # 0
+            Stompy.legs.left.foot_roll: np.deg2rad(0),  # 0
+            Stompy.legs.right.foot_roll: np.deg2rad(0),  # 0
         }
 
     @classmethod
@@ -176,33 +170,26 @@ def default_sitting(cls) -> Dict[str, float]:
             Stompy.head.left_right: np.deg2rad(-3),
             Stompy.head.up_down: 0.0,
             Stompy.torso.pitch: 0.0,
-
             # arms
             Stompy.left_arm.shoulder_yaw: np.deg2rad(-88),
             Stompy.left_arm.shoulder_pitch: np.deg2rad(-30),
             Stompy.left_arm.shoulder_roll: np.deg2rad(-190),
             Stompy.left_arm.elbow_yaw: np.deg2rad(-88),
-
             Stompy.right_arm.shoulder_yaw: np.deg2rad(88),
             Stompy.right_arm.shoulder_pitch: np.deg2rad(30),
             Stompy.right_arm.shoulder_roll: np.deg2rad(190),
             Stompy.right_arm.elbow_yaw: np.deg2rad(88),
-
             # hands
             Stompy.left_arm.hand.hand_roll: np.deg2rad(-60),
             Stompy.right_arm.hand.hand_roll: np.deg2rad(-60),
-
             # legs
             Stompy.legs.left.hip_roll: np.deg2rad(29),
             Stompy.legs.left.hip_yaw: np.deg2rad(-29),
             Stompy.legs.left.hip_pitch: np.deg2rad(56),
-
-
             Stompy.legs.right.hip_roll: np.deg2rad(-29),
             Stompy.legs.right.hip_yaw: np.deg2rad(-29),
             # check this
             Stompy.legs.right.hip_pitch: np.deg2rad(-56),
-
             Stompy.legs.left.knee: np.deg2rad(-6),
             Stompy.legs.right.knee: np.deg2rad(6),
         }
@@ -218,37 +205,31 @@ class StompyFixed(Stompy):
     @classmethod
     def default_standing(cls) -> Dict[str, float]:
         return {
-            Stompy.head.left_right: np.deg2rad(-2), # -0.03
-
+            Stompy.head.left_right: np.deg2rad(-2),  # -0.03
             # arms
-            Stompy.left_arm.shoulder_yaw: np.deg2rad(-69.5), # -1.21
-            Stompy.left_arm.shoulder_pitch: np.deg2rad(-93), # 1.61
-            Stompy.right_arm.shoulder_yaw: np.deg2rad(85), # 1.48
+            Stompy.left_arm.shoulder_yaw: np.deg2rad(-69.5),  # -1.21
+            Stompy.left_arm.shoulder_pitch: np.deg2rad(-93),  # 1.61
+            Stompy.right_arm.shoulder_yaw: np.deg2rad(85),  # 1.48
             Stompy.right_arm.shoulder_pitch: np.deg2rad(104),  # 1.81
-
             # legs
-            Stompy.legs.left.hip_roll: np.deg2rad(29), # 0.5
-            Stompy.legs.left.hip_yaw: np.deg2rad(-29), # -0.5
-            Stompy.legs.left.hip_pitch: np.deg2rad(56), # 0.97
-
-            Stompy.legs.right.hip_roll: np.deg2rad(-29), # -0.5
-            Stompy.legs.right.hip_yaw: np.deg2rad(-29), # -0.5
-            Stompy.legs.right.hip_pitch: np.deg2rad(-56), # -0.97
-
-            Stompy.legs.left.knee: np.deg2rad(-6), # -0.1
-            Stompy.legs.right.knee: np.deg2rad(6), # 0.1
-
-            Stompy.legs.left.ankle: np.deg2rad(0), # 0
-            Stompy.legs.right.ankle: np.deg2rad(0), # 0
-
-            Stompy.legs.left.foot_roll: np.deg2rad(0), # 0
-            Stompy.legs.right.foot_roll: np.deg2rad(0), # 0
+            Stompy.legs.left.hip_roll: np.deg2rad(29),  # 0.5
+            Stompy.legs.left.hip_yaw: np.deg2rad(-29),  # -0.5
+            Stompy.legs.left.hip_pitch: np.deg2rad(56),  # 0.97
+            Stompy.legs.right.hip_roll: np.deg2rad(-29),  # -0.5
+            Stompy.legs.right.hip_yaw: np.deg2rad(-29),  # -0.5
+            Stompy.legs.right.hip_pitch: np.deg2rad(-56),  # -0.97
+            Stompy.legs.left.knee: np.deg2rad(-6),  # -0.1
+            Stompy.legs.right.knee: np.deg2rad(6),  # 0.1
+            Stompy.legs.left.ankle: np.deg2rad(0),  # 0
+            Stompy.legs.right.ankle: np.deg2rad(0),  # 0
+            Stompy.legs.left.foot_roll: np.deg2rad(0),  # 0
+            Stompy.legs.right.foot_roll: np.deg2rad(0),  # 0
         }
 
     def default_limits(cls) -> Dict[str, Dict[str, float]]:
         return {
             Stompy.head.left_right: {
-                "lower": -.1,
+                "lower": -0.1,
                 "upper": 0.0,
             },
             Stompy.right_arm.shoulder_yaw: {
@@ -307,16 +288,11 @@ def default_limits(cls) -> Dict[str, Dict[str, float]]:
                 "lower": -0.3,
                 "upper": 0.3,
             },
-            Stompy.legs.right.foot_roll: {
-                "lower": -0.3,
-                "upper": 0.3
-            },
-            Stompy.legs.left.foot_roll: {
-                "lower": -0.3,
-                "upper": 0.3
-            },
+            Stompy.legs.right.foot_roll: {"lower": -0.3, "upper": 0.3},
+            Stompy.legs.left.foot_roll: {"lower": -0.3, "upper": 0.3},
         }
 
+
 def print_joints() -> None:
     joints = Stompy.all_joints()
     assert len(joints) == len(set(joints)), "Duplicate joint names found!"