diff --git a/examples/gpr_walking.kinfer b/examples/gpr_walking.kinfer
index 19d761c..84fa052 100644
Binary files a/examples/gpr_walking.kinfer and b/examples/gpr_walking.kinfer differ
diff --git a/examples/walking_policy.pt b/examples/walking_policy.pt
index f483c38..eb25c85 100644
Binary files a/examples/walking_policy.pt and b/examples/walking_policy.pt differ
diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py
index a3be536..8291e70 100755
--- a/sim/envs/__init__.py
+++ b/sim/envs/__init__.py
@@ -12,8 +12,8 @@
 from sim.envs.humanoids.dora_env import DoraFreeEnv
 from sim.envs.humanoids.g1_config import G1Cfg, G1CfgPPO
 from sim.envs.humanoids.g1_env import G1FreeEnv
-from sim.envs.humanoids.gpr_env import GprFreeEnv
 from sim.envs.humanoids.gpr_config import GprCfg, GprCfgPPO, GprStandingCfg
+from sim.envs.humanoids.gpr_env import GprFreeEnv
 from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO
 from sim.envs.humanoids.h1_env import H1FreeEnv
 from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 42bdbe2..91d66d7 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -30,7 +30,7 @@ class safety(LeggedRobotCfg.safety):
         # safety factors
         pos_limit = 1.0
         vel_limit = 1.0
-        torque_limit = 0.85
+        torque_limit = 1.0
 
     class asset(LeggedRobotCfg.asset):
         name = "gpr"
@@ -123,7 +123,7 @@ class domain_rand(LeggedRobotCfg.domain_rand):
 
         randomize_base_mass = True
         added_mass_range = [-3.0, 3.0]
-        push_robots = False
+        push_robots = True
         push_interval_s = 4
         max_push_vel_xy = 0.2
         max_push_ang_vel = 0.4
@@ -150,14 +150,14 @@ class rewards:
         max_dist = 0.5
         # put some settings here for LLM parameter tuning
         target_joint_pos_scale = 0.17  # rad
-        target_feet_height = 0.06  # m
+        target_feet_height = 0.05  # m
 
-        cycle_time = 0.64  # sec
+        cycle_time = 0.25  # sec
         # if true negative total rewards are clipped at zero (avoids early termination problems)
         only_positive_rewards = True
         # tracking reward = exp(error*sigma)
         tracking_sigma = 5.0
-        max_contact_force = 700  # forces above this value are penalized
+        max_contact_force = 400  # forces above this value are penalized
 
         class scales:
             # reference motion tracking
@@ -213,7 +213,7 @@ class GprStandingCfg(GprCfg):
 
     class rewards:
         # quite important to keep it right
-        base_height_target = 1.06
+        base_height_target = Robot.height
         min_dist = 0.2
         max_dist = 0.5
         # put some settings here for LLM parameter tuning
diff --git a/sim/model_export.py b/sim/model_export.py
index 09c190a..3777124 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -15,17 +15,20 @@
 
 @dataclass
 class ActorCfg:
-    embodiment: str = "gpr"
-    cycle_time: float = 0.4  # Cycle time for sinusoidal command input
-    action_scale: float = 0.25  # Scale for actions
-    lin_vel_scale: float = 2.0  # Scale for linear velocity
-    ang_vel_scale: float = 1.0  # Scale for angular velocity
-    quat_scale: float = 1.0  # Scale for quaternion
-    dof_pos_scale: float = 1.0  # Scale for joint positions
-    dof_vel_scale: float = 0.05  # Scale for joint velocities
-    frame_stack: int = 15  # Number of frames to stack for the policy input
-    clip_observations: float = 18.0  # Clip observations to this value
-    clip_actions: float = 18.0  # Clip actions to this value
+    embodiment: str
+    cycle_time: float  # Cycle time for sinusoidal command input
+    action_scale: float  # Scale for actions
+    lin_vel_scale: float  # Scale for linear velocity
+    ang_vel_scale: float  # Scale for angular velocity
+    quat_scale: float  # Scale for quaternion
+    dof_pos_scale: float  # Scale for joint positions
+    dof_vel_scale: float  # Scale for joint velocities
+    frame_stack: int  # Number of frames to stack for the policy input
+    clip_observations: float  # Clip observations to this value
+    clip_actions: float  # Clip actions to this value
+    sim_dt: float  # Simulation time step
+    sim_decimation: int  # Simulation decimation
+    tau_factor: float  # Torque limit factor
 
 
 class ActorCritic(nn.Module):
@@ -242,6 +245,8 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
     num_actions = a_model.num_actions
     num_observations = a_model.num_observations
 
+    default_standing = list(a_model.robot.default_standing().values())
+
     return (
         a_model,
         {
@@ -250,6 +255,7 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
             "robot_damping": robot_damping,
             "num_actions": num_actions,
             "num_observations": num_observations,
+            "default_standing": default_standing,
         },
         input_tensors,
     )
@@ -338,7 +344,3 @@ def convert_model_to_onnx(model_path: str, cfg: ActorCfg, save_path: Optional[st
     buffer2.seek(0)
 
     return ort.InferenceSession(buffer2.read())
-
-
-if __name__ == "__main__":
-    convert_model_to_onnx("model_3000.pt", ActorCfg(), "policy.onnx")
diff --git a/sim/play.py b/sim/play.py
index 9abe511..04465dd 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -21,15 +21,11 @@
 from kinfer.export.pytorch import export_to_onnx
 from tqdm import tqdm
 
-# Local imports third
 from sim.env import run_dir  # noqa: E402
 from sim.envs import task_registry  # noqa: E402
-from sim.h5_logger import HDF5Logger
-from sim.model_export import (  # noqa: E402
-    ActorCfg,
-    convert_model_to_onnx,
-    get_actor_policy,
-)
+
+# Local imports third
+from sim.model_export import ActorCfg, get_actor_policy
 from sim.utils.helpers import get_args  # noqa: E402
 from sim.utils.logger import Logger  # noqa: E402
 
@@ -92,15 +88,22 @@ def play(args: argparse.Namespace) -> None:
     if args.export_onnx:
         path = ppo_runner.load_path
         embodiment = ppo_runner.cfg["experiment_name"].lower()
-        policy_cfg = ActorCfg(embodiment=embodiment)
-
-        if embodiment == "stompypro":
-            policy_cfg.cycle_time = 0.4
-        elif embodiment == "stompymicro":
-            policy_cfg.cycle_time = 0.2
-        else:
-            print(f"Specific policy cfg for {embodiment} not implemented")
-
+        policy_cfg = ActorCfg(
+            embodiment=embodiment,
+            cycle_time=env_cfg.rewards.cycle_time,
+            sim_dt=env_cfg.sim.dt,
+            sim_decimation=env_cfg.control.decimation,
+            tau_factor=env_cfg.safety.torque_limit,
+            action_scale=env_cfg.control.action_scale,
+            lin_vel_scale=env_cfg.normalization.obs_scales.lin_vel,
+            ang_vel_scale=env_cfg.normalization.obs_scales.ang_vel,
+            quat_scale=env_cfg.normalization.obs_scales.quat,
+            dof_pos_scale=env_cfg.normalization.obs_scales.dof_pos,
+            dof_vel_scale=env_cfg.normalization.obs_scales.dof_vel,
+            frame_stack=env_cfg.env.frame_stack,
+            clip_observations=env_cfg.normalization.clip_observations,
+            clip_actions=env_cfg.normalization.clip_actions,
+        )
         actor_model, sim2sim_info, input_tensors = get_actor_policy(path, policy_cfg)
 
         # Merge policy_cfg and sim2sim_info into a single config object
@@ -117,6 +120,8 @@ def play(args: argparse.Namespace) -> None:
 
     now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
     if args.log_h5:
+        from sim.h5_logger import HDF5Logger
+
         # Create directory for HDF5 files
         h5_dir = run_dir() / "h5_out" / args.task / now
         h5_dir.mkdir(parents=True, exist_ok=True)
diff --git a/sim/play_old.py b/sim/play_old.py
deleted file mode 100755
index 6747abb..0000000
--- a/sim/play_old.py
+++ /dev/null
@@ -1,232 +0,0 @@
-# mypy: ignore-errors
-"""Play a trained policy in the environment.
-
-Run:
-    python sim/play.py --task g1 --log_h5
-    python sim/play.py --task stompymini --log_h5
-"""
-
-import argparse
-import copy
-import logging
-import os
-from datetime import datetime
-from typing import Any, Union
-
-import cv2
-import h5py
-import numpy as np
-from isaacgym import gymapi
-from tqdm import tqdm
-
-logger = logging.getLogger(__name__)
-
-from sim.env import run_dir  # noqa: E402
-from sim.envs import task_registry  # noqa: E402
-from sim.model_export import convert_model_to_onnx  # noqa: E402
-from sim.utils.helpers import get_args  # noqa: E402
-from sim.utils.logger import Logger  # noqa: E402
-
-import torch  # isort: skip
-
-
-def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None:
-    os.makedirs(path, exist_ok=True)
-    path = os.path.join(path, "policy_1.pt")
-    model = copy.deepcopy(actor_critic.actor).to("cpu")
-    traced_script_module = torch.jit.script(model)
-    traced_script_module.save(path)
-
-
-def play(args: argparse.Namespace) -> None:
-    logger.info("Configuring environment and training settings...")
-    env_cfg, train_cfg = task_registry.get_cfgs(name=args.task)
-
-    # override some parameters for testing
-    env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1)
-    env_cfg.sim.max_gpu_contact_pairs = 2**10
-    if args.trimesh:
-        env_cfg.terrain.mesh_type = "trimesh"
-    else:
-        env_cfg.terrain.mesh_type = "plane"
-    env_cfg.terrain.num_rows = 5
-    env_cfg.terrain.num_cols = 5
-    env_cfg.terrain.curriculum = False
-    env_cfg.terrain.max_init_terrain_level = 5
-    env_cfg.noise.add_noise = True
-    env_cfg.domain_rand.push_robots = False
-    env_cfg.domain_rand.joint_angle_noise = 0.01
-    env_cfg.noise.curriculum = False
-    env_cfg.noise.noise_level = 0.5
-
-    train_cfg.seed = 123145
-    logger.info("train_cfg.runner_class_name: %s", train_cfg.runner_class_name)
-
-    # prepare environment
-    env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg)
-    env.set_camera(env_cfg.viewer.pos, env_cfg.viewer.lookat)
-
-    obs = env.get_observations()
-
-    # load policy
-    train_cfg.runner.resume = True
-    ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg)
-    policy = ppo_runner.get_inference_policy(device=env.device)
-
-    # Prepare for logging
-    env_logger = Logger(env.dt)
-    robot_index = 0
-    joint_index = 1
-    stop_state_log = 1000
-    now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-    if args.log_h5:
-        h5_file = h5py.File(f"data{now}.h5", "w")
-
-        # Create dataset for actions
-        max_timesteps = stop_state_log
-        num_dof = env.num_dof
-        dset_actions = h5_file.create_dataset("actions", (max_timesteps, num_dof), dtype=np.float32)
-
-        # Create dataset of observations
-        buf_len = len(env.obs_history)  # length of observation buffer
-        dset_2D_command = h5_file.create_dataset(
-            "observations/2D_command", (max_timesteps, buf_len, 2), dtype=np.float32
-        )  # sin and cos commands
-        dset_3D_command = h5_file.create_dataset(
-            "observations/3D_command", (max_timesteps, buf_len, 3), dtype=np.float32
-        )  # x, y, yaw commands
-        dset_q = h5_file.create_dataset(
-            "observations/q", (max_timesteps, buf_len, num_dof), dtype=np.float32
-        )  # joint positions
-        dset_dq = h5_file.create_dataset(
-            "observations/dq", (max_timesteps, buf_len, num_dof), dtype=np.float32
-        )  # joint velocities
-        dset_obs_actions = h5_file.create_dataset(
-            "observations/actions", (max_timesteps, buf_len, num_dof), dtype=np.float32
-        )  # actions
-        dset_ang_vel = h5_file.create_dataset(
-            "observations/ang_vel", (max_timesteps, buf_len, 3), dtype=np.float32
-        )  # root angular velocity
-        dset_euler = h5_file.create_dataset(
-            "observations/euler", (max_timesteps, buf_len, 3), dtype=np.float32
-        )  # root orientation
-
-    if args.render:
-        camera_properties = gymapi.CameraProperties()
-        camera_properties.width = 1920
-        camera_properties.height = 1080
-        h1 = env.gym.create_camera_sensor(env.envs[0], camera_properties)
-        camera_offset = gymapi.Vec3(3, -3, 1)
-        camera_rotation = gymapi.Quat.from_axis_angle(gymapi.Vec3(-0.3, 0.2, 1), np.deg2rad(135))
-        actor_handle = env.gym.get_actor_handle(env.envs[0], 0)
-        body_handle = env.gym.get_actor_rigid_body_handle(env.envs[0], actor_handle, 0)
-        logger.info("body_handle: %s", body_handle)
-        logger.info("actor_handle: %s", actor_handle)
-        env.gym.attach_camera_to_body(
-            h1, env.envs[0], body_handle, gymapi.Transform(camera_offset, camera_rotation), gymapi.FOLLOW_POSITION
-        )
-
-        fourcc = cv2.VideoWriter_fourcc(*"MJPG")  # type: ignore[attr-defined]
-
-        # Creates a directory to store videos.
-        video_dir = run_dir() / "videos"
-        experiment_dir = video_dir / train_cfg.runner.experiment_name
-        experiment_dir.mkdir(parents=True, exist_ok=True)
-
-        dir = os.path.join(experiment_dir, now + str(args.run_name) + ".mp4")
-        if not os.path.exists(video_dir):
-            os.mkdir(video_dir)
-        if not os.path.exists(experiment_dir):
-            os.mkdir(experiment_dir)
-        video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080))
-
-    for t in tqdm(range(stop_state_log)):
-        actions = policy(obs.detach())
-        if args.log_h5:
-            dset_actions[t] = actions.detach().numpy()
-        if args.fix_command:
-            env.commands[:, 0] = 0.5
-            env.commands[:, 1] = 0.0
-            env.commands[:, 2] = 0.0
-            env.commands[:, 3] = 0.0
-        obs, critic_obs, rews, dones, infos = env.step(actions.detach())
-        print(f"IMU: {obs[0, (3 * env.num_actions + 5) + 3 : (3 * env.num_actions + 5) + 2 * 3]}")
-
-        if args.render:
-            env.gym.fetch_results(env.sim, True)
-            env.gym.step_graphics(env.sim)
-            env.gym.render_all_camera_sensors(env.sim)
-            img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR)
-            img = np.reshape(img, (1080, 1920, 4))
-            img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
-
-            video.write(img[..., :3])
-
-        # Log states
-        dof_pos_target = actions[robot_index, joint_index].item() * env.cfg.control.action_scale
-        dof_pos = env.dof_pos[robot_index, joint_index].item()
-        dof_vel = env.dof_vel[robot_index, joint_index].item()
-        dof_torque = env.torques[robot_index, joint_index].item()
-        command_x = env.commands[robot_index, 0].item()
-        command_y = env.commands[robot_index, 1].item()
-        command_yaw = env.commands[robot_index, 2].item()
-        base_vel_x = env.base_lin_vel[robot_index, 0].item()
-        base_vel_y = env.base_lin_vel[robot_index, 1].item()
-        base_vel_z = env.base_lin_vel[robot_index, 2].item()
-        base_vel_yaw = env.base_ang_vel[robot_index, 2].item()
-        contact_forces_z = env.contact_forces[robot_index, env.feet_indices, 2].cpu().numpy()
-
-        if args.log_h5:
-            for i in range(buf_len):
-                cur_obs = env.obs_history[i].tolist()[0]
-                dset_2D_command[t, i] = cur_obs[0:2]  # sin and cos commands
-                dset_3D_command[t, i] = cur_obs[2:5]  # x, y, yaw commands
-                dset_q[t, i] = cur_obs[5 : 5 + num_dof]  # joint positions
-                dset_dq[t, i] = cur_obs[5 + num_dof : 5 + 2 * num_dof]  # joint velocities
-                dset_obs_actions[t, i] = cur_obs[5 + 2 * num_dof : 5 + 3 * num_dof]  # actions
-                dset_ang_vel[t, i] = cur_obs[5 + 3 * num_dof : 8 + 3 * num_dof]  # root angular velocity
-                dset_euler[t, i] = cur_obs[8 + 3 * num_dof : 11 + 3 * num_dof]  # root orientation
-
-        env_logger.log_states(
-            {
-                "dof_pos_target": dof_pos_target,
-                "dof_pos": dof_pos,
-                "dof_vel": dof_vel,
-                "dof_torque": dof_torque,
-                "command_x": command_x,
-                "command_y": command_y,
-                "command_yaw": command_yaw,
-                "base_vel_x": base_vel_x,
-                "base_vel_y": base_vel_y,
-                "base_vel_z": base_vel_z,
-                "base_vel_yaw": base_vel_yaw,
-                "contact_forces_z": contact_forces_z,
-            }
-        )
-        if infos["episode"]:
-            num_episodes = env.reset_buf.sum().item()
-            if num_episodes > 0:
-                env_logger.log_rewards(infos["episode"], num_episodes)
-
-    env_logger.print_rewards()
-    env_logger.plot_states()
-
-    if args.render:
-        video.release()
-
-    if args.log_h5:
-        print("Saving data to " + os.path.abspath(f"data{now}.h5"))
-        h5_file.close()
-
-
-if __name__ == "__main__":
-    base_args = get_args()
-    parser = argparse.ArgumentParser(description="Extend base arguments with log_h5")
-    parser.add_argument("--log_h5", action="store_true", help="Enable HDF5 logging")
-    parser.add_argument("--render", action="store_true", help="Enable rendering", default=True)
-    parser.add_argument("--fix_command", action="store_true", help="Fix command", default=True)
-    parser.add_argument("--export_onnx", action="store_true", help="Export policy as ONNX", default=True)
-    parser.add_argument("--export_policy", action="store_true", help="Export policy as JIT", default=True)
-    args, unknown = parser.parse_known_args(namespace=base_args)
-
-    play(args)
diff --git a/sim/requirements-dev.txt b/sim/requirements-dev.txt
index c55c16b..3633b81 100755
--- a/sim/requirements-dev.txt
+++ b/sim/requirements-dev.txt
@@ -9,7 +9,7 @@ mediapy==1.2.2
 mujoco==2.3.6
 mujoco_python_viewer==0.1.4
 onnx==1.15.0
-onnxruntime
+onnxruntime==1.15.0
 pandas==1.4.4
 Pillow>6.2.0
 poselib==2.0.4
diff --git a/sim/requirements.txt b/sim/requirements.txt
index ab18cb3..a4bf776 100755
--- a/sim/requirements.txt
+++ b/sim/requirements.txt
@@ -6,7 +6,7 @@ python-dotenv
 h5py
 gdown
 matplotlib==3.3.4
-numpy==1.20.0
+numpy==1.21.6
 wandb
 tensorboard==2.14.0
 onnxscript
diff --git a/sim/resources/gpr/joints.py b/sim/resources/gpr/joints.py
index 149b7f3..a5b6472 100755
--- a/sim/resources/gpr/joints.py
+++ b/sim/resources/gpr/joints.py
@@ -63,35 +63,15 @@ class RightLeg(Node):
     ankle_pitch = "R_ankle_y"
 
 
-class LeftArm(Node):
-    shoulder_pitch = "L_shoulder_y"
-    shoulder_roll = "L_shoulder_z"
-    shoulder_yaw = "L_shoulder_x"
-    elbow_pitch = "L_elbow_x"
-
-
-class RightArm(Node):
-    shoulder_pitch = "R_shoulder_y"
-    shoulder_roll = "R_shoulder_z"
-    shoulder_yaw = "R_shoulder_x"
-    elbow_pitch = "R_elbow_x"
-
-
 class Legs(Node):
     left = LeftLeg()
     right = RightLeg()
 
 
-class Arms(Node):
-    left = LeftArm()
-    right = RightArm()
-
-
 class Robot(Node):
     legs = Legs()
-    # arms = Arms()
 
-    height = 1.06
+    height = 1.05
     rotation = [0, 0, 0, 1]
 
     @classmethod
@@ -178,10 +158,10 @@ def damping(cls) -> Dict[str, float]:
     @classmethod
     def effort(cls) -> Dict[str, float]:
         return {
-            "hip_y": 120,
-            "hip_x": 60,
-            "hip_z": 60,
-            "knee": 120,
+            "hip_y": 40,
+            "hip_x": 20,
+            "hip_z": 20,
+            "knee": 40,
             "ankle_y": 17,
         }
 
diff --git a/sim/resources/gpr/robot_fixed.urdf b/sim/resources/gpr/robot_fixed.urdf
index 1ff7bb0..c1e2aef 100644
--- a/sim/resources/gpr/robot_fixed.urdf
+++ b/sim/resources/gpr/robot_fixed.urdf
@@ -95,7 +95,7 @@
     <origin xyz="-0.008010608503738428 -0.43201043179234816 0.0877000068648673" rpy="-3.1415925071795874 4.641020678874952e-08 -3.1415926535897913" />
     <parent link="body1-part" />
     <child link="leg0_shell" />
-    <limit effort="120" velocity="10" lower="-1.5707963" upper="1.5707963" />
+    <limit effort="40" velocity="10" lower="-1.5707963" upper="1.5707963" />
     <axis xyz="0 0 -1" />
   </joint>
   <link name="leg0_shell">
@@ -124,7 +124,7 @@
     <origin xyz="-0.008010600340183055 -0.4320104467591277 -0.08819999313513187" rpy="9.99999991702083e-08 4.641020678874952e-08 -3.1415926535897913" />
     <parent link="body1-part" />
     <child link="leg0_shell_2" />
-    <limit effort="120" velocity="10" lower="-1.5707963" upper="1.5707963" />
+    <limit effort="40" velocity="10" lower="-1.5707963" upper="1.5707963" />
     <axis xyz="0 0 -1" />
   </joint>
   <link name="leg0_shell_2">
@@ -211,7 +211,7 @@
     <origin xyz="0.02649999999999997 -1.5165911463888016e-09 -0.06950000151659115" rpy="-1.5707963 0.0 1.5707963" />
     <parent link="leg0_shell" />
     <child link="leg1_shell" />
-    <limit effort="60" velocity="10" lower="-0.34906585" upper="3.1852259" />
+    <limit effort="20" velocity="10" lower="-0.34906585" upper="3.1852259" />
     <axis xyz="0 0 1" />
   </joint>
   <link name="leg1_shell">
@@ -240,7 +240,7 @@
     <origin xyz="0.02649999999999997 -1.5165911463888016e-09 -0.06950000151659115" rpy="1.5707963 -4.641020634466031e-08 -1.5707963535897922" />
     <parent link="leg0_shell_2" />
     <child link="leg1_shell3" />
-    <limit effort="60" velocity="10" lower="-0.34906585" upper="3.1852259" />
+    <limit effort="20" velocity="10" lower="-0.34906585" upper="3.1852259" />
     <axis xyz="0 0 1" />
   </joint>
   <link name="leg1_shell3">
@@ -327,7 +327,7 @@
     <origin xyz="-0.15649999999999997 0.0 0.027499998483408852" rpy="-0.0 1.5707963 0.0" />
     <parent link="leg1_shell" />
     <child link="leg2_shell" />
-    <limit effort="60" velocity="10" lower="-1.5707963" upper="1.5707963" />
+    <limit effort="20" velocity="10" lower="-1.5707963" upper="1.5707963" />
     <axis xyz="0 0 1" />
   </joint>
   <link name="leg2_shell">
@@ -356,7 +356,7 @@
     <origin xyz="-0.15649999999999997 0.0 0.027499998483408852" rpy="-3.1415926535897922 -1.5707962732050302 0.0" />
     <parent link="leg1_shell3" />
     <child link="leg2_shell_2" />
-    <limit effort="60" velocity="10" lower="-1.5707963" upper="1.5707963" />
+    <limit effort="20" velocity="10" lower="-1.5707963" upper="1.5707963" />
     <axis xyz="0 0 1" />
   </joint>
   <link name="leg2_shell_2">
@@ -443,7 +443,7 @@
     <origin xyz="0.0 0.0342 -0.14250000009378214" rpy="1.5707963000000003 0.0 0.0" />
     <parent link="leg2_shell" />
     <child link="leg3_shell2" />
-    <limit effort="120" velocity="10" lower="-1.57" upper="0" />
+    <limit effort="40" velocity="10" lower="-1.57" upper="0" />
     <axis xyz="0 0 1" />
   </joint>
   <link name="leg3_shell2">
@@ -472,7 +472,7 @@
     <origin xyz="0.0 0.0342 -0.14250000009378214" rpy="1.5707963000000003 0.0 0.0" />
     <parent link="leg2_shell_2" />
     <child link="leg3_shell22" />
-    <limit effort="120" velocity="10" lower="0" upper="1.57" />
+    <limit effort="40" velocity="10" lower="0" upper="1.57" />
     <axis xyz="0 0 1" />
   </joint>
   <link name="leg3_shell22">
diff --git a/sim/resources/gpr/robot_fixed.xml b/sim/resources/gpr/robot_fixed.xml
index 5733094..9349bdd 100644
--- a/sim/resources/gpr/robot_fixed.xml
+++ b/sim/resources/gpr/robot_fixed.xml
@@ -188,6 +188,6 @@
   </sensor>
 
   <keyframe>
-    <key name="default" qpos="0 0 1.06 1. 0.0 0.0 0.0 -0.23 0.0 0.0 -0.441 -0.195 0.23 0.0 0.0 0.441 0.195"/>
+    <key name="default" qpos="0 0 1.05 1. 0.0 0.0 0.0 -0.23 0.0 0.0 -0.441 -0.195 0.23 0.0 0.0 0.441 0.195"/>
   </keyframe>
 </mujoco>
\ No newline at end of file
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index b512952..82ce113 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -19,21 +19,9 @@
 import torch
 from kinfer.export.pytorch import export_to_onnx
 from kinfer.inference.python import ONNXModel
-from model_export import ActorCfg, get_actor_policy
 from scipy.spatial.transform import Rotation as R
 from tqdm import tqdm
 
-from sim.h5_logger import HDF5Logger
-
-
-@dataclass
-class Sim2simCfg:
-    sim_duration: float = 60.0
-    dt: float = 0.001
-    decimation: int = 10
-    tau_factor: float = 3
-    cycle_time: float = 0.25
-
 
 def handle_keyboard_input() -> None:
     global x_vel_cmd, y_vel_cmd, yaw_vel_cmd
@@ -107,11 +95,11 @@ def pd_control(
 def run_mujoco(
     embodiment: str,
     policy: ort.InferenceSession,
-    cfg: Sim2simCfg,
     model_info: Dict[str, Union[float, List[float], str]],
     keyboard_use: bool = False,
     log_h5: bool = False,
     render: bool = True,
+    sim_duration: float = 60.0,
     h5_out_dir: str = "sim/resources",
 ) -> None:
     """
@@ -120,15 +108,12 @@ def run_mujoco(
     Args:
         policy: The policy used for controlling the simulation.
         cfg: The configuration object containing simulation settings.
-
-    Returns:
-        None
     """
     model_dir = os.environ.get("MODEL_DIR", "sim/resources")
     mujoco_model_path = f"{model_dir}/{embodiment}/robot_fixed.xml"
 
     model = mujoco.MjModel.from_xml_path(mujoco_model_path)
-    model.opt.timestep = cfg.dt
+    model.opt.timestep = model_info["sim_dt"]
     data = mujoco.MjData(model)
 
     assert isinstance(model_info["num_actions"], int)
@@ -137,7 +122,7 @@ def run_mujoco(
     assert isinstance(model_info["robot_stiffness"], list)
     assert isinstance(model_info["robot_damping"], list)
 
-    tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * cfg.tau_factor
+    tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * model_info["tau_factor"]
     kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"]))
     kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"]))
 
@@ -180,7 +165,9 @@ def run_mujoco(
     }
 
     if log_h5:
-        stop_state_log = int(cfg.sim_duration / cfg.dt) / cfg.decimation
+        from sim.h5_logger import HDF5Logger
+
+        stop_state_log = int(sim_duration / model_info["sim_dt"]) / model_info["sim_decimation"]
         logger = HDF5Logger(
             data_name=embodiment,
             num_actions=model_info["num_actions"],
@@ -194,7 +181,7 @@ def run_mujoco(
     total_speed = 0.0
     step_count = 0
 
-    for _ in tqdm(range(int(cfg.sim_duration / cfg.dt)), desc="Simulating..."):
+    for _ in tqdm(range(int(sim_duration / model_info["sim_dt"])), desc="Simulating..."):
         if keyboard_use:
             handle_keyboard_input()
 
@@ -219,7 +206,7 @@ def run_mujoco(
         step_count += 1
 
         # 1000hz -> 50hz
-        if count_lowlevel % cfg.decimation == 0:
+        if count_lowlevel % model_info["sim_decimation"] == 0:
             # Convert sim coordinates to policy coordinates
             cur_pos_obs = q - default
             cur_vel_obs = dq
@@ -228,7 +215,7 @@ def run_mujoco(
             input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32)
             input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32)
 
-            input_data["t.1"] = np.array([count_lowlevel * cfg.dt], dtype=np.float32)
+            input_data["t.1"] = np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32)
 
             input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32)
             input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32)
@@ -250,11 +237,11 @@ def run_mujoco(
             if log_h5:
                 logger.log_data(
                     {
-                        "t": np.array([count_lowlevel * cfg.dt], dtype=np.float32),
+                        "t": np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32),
                         "2D_command": np.array(
                             [
-                                np.sin(2 * math.pi * count_lowlevel * cfg.dt / cfg.cycle_time),
-                                np.cos(2 * math.pi * count_lowlevel * cfg.dt / cfg.cycle_time),
+                                np.sin(2 * math.pi * count_lowlevel * model_info["sim_dt"] / model_info["cycle_time"]),
+                                np.cos(2 * math.pi * count_lowlevel * model_info["sim_dt"] / model_info["cycle_time"]),
                             ],
                             dtype=np.float32,
                         ),
@@ -317,54 +304,25 @@ def run_mujoco(
     else:
         x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.2, 0.0, 0.0
 
-    policy_cfg = ActorCfg(embodiment=args.embodiment)
-    if args.embodiment == "gpr":
-        policy_cfg.cycle_time = 0.25
-        cfg = Sim2simCfg(
-            sim_duration=10.0,
-            dt=0.001,
-            decimation=10,
-            tau_factor=4.0,
-            cycle_time=policy_cfg.cycle_time,
-        )
-    elif args.embodiment == "zeroth":
-        policy_cfg.cycle_time = 0.2
-        cfg = Sim2simCfg(
-            sim_duration=10.0,
-            dt=0.001,
-            decimation=10,
-            tau_factor=2,
-            cycle_time=policy_cfg.cycle_time,
-        )
-
-    if args.load_model.endswith(".kinfer"):
-        policy = ONNXModel(args.load_model)
-    else:
-        actor_model, sim2sim_info, input_tensors = get_actor_policy(args.load_model, policy_cfg)
-
-        # Merge policy_cfg and sim2sim_info into a single config object
-        export_config = {**vars(policy_cfg), **sim2sim_info}
-        print(export_config)
-        export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_test.onnx")
-        policy = ONNXModel("kinfer_test.onnx")
-
+    policy = ONNXModel(args.load_model)
     metadata = policy.get_metadata()
-
     model_info = {
         "num_actions": metadata["num_actions"],
         "num_observations": metadata["num_observations"],
         "robot_effort": metadata["robot_effort"],
         "robot_stiffness": metadata["robot_stiffness"],
         "robot_damping": metadata["robot_damping"],
+        "sim_dt": metadata["sim_dt"],
+        "sim_decimation": metadata["sim_decimation"],
+        "tau_factor": metadata["tau_factor"],
     }
 
     run_mujoco(
-        args.embodiment,
-        policy,
-        cfg,
-        model_info,
-        args.keyboard_use,
-        args.log_h5,
-        args.render,
-        args.h5_out_dir,
+        embodiment=args.embodiment,
+        policy=policy,
+        model_info=model_info,
+        keyboard_use=args.keyboard_use,
+        log_h5=args.log_h5,
+        render=args.render,
+        h5_out_dir=args.h5_out_dir,
     )
diff --git a/sim/sim2sim_old.py b/sim/sim2sim_old.py
deleted file mode 100755
index b887e3f..0000000
--- a/sim/sim2sim_old.py
+++ /dev/null
@@ -1,345 +0,0 @@
-""" Sim2sim script for running a policy in MuJoCo.
-
-Run:
-    python sim/sim2sim.py --load_model examples/standing_pro.pt --embodiment gpr
-    python sim/sim2sim.py --load_model examples/standing_micro.pt --embodiment zeroth
-"""
-
-import argparse
-import math
-import os
-from copy import deepcopy
-from dataclasses import dataclass
-from typing import Dict, List, Tuple, Union
-
-import mujoco
-import mujoco_viewer
-import numpy as np
-import onnxruntime as ort
-import pygame
-from scipy.spatial.transform import Rotation as R
-from tqdm import tqdm
-
-from sim.model_export import ActorCfg, convert_model_to_onnx
-
-
-@dataclass
-class Sim2simCfg:
-    sim_duration: float = 60.0
-    dt: float = 0.001
-    decimation: int = 10
-    tau_factor: float = 3
-    cycle_time: float = 0.4
-
-
-def handle_keyboard_input() -> None:
-    global x_vel_cmd, y_vel_cmd, yaw_vel_cmd
-
-    keys = pygame.key.get_pressed()
-
-    # Update movement commands based on arrow keys
-    if keys[pygame.K_UP]:
-        x_vel_cmd += 0.0005
-    if keys[pygame.K_DOWN]:
-        x_vel_cmd -= 0.0005
-    if keys[pygame.K_LEFT]:
-        y_vel_cmd += 0.0005
-    if keys[pygame.K_RIGHT]:
-        y_vel_cmd -= 0.0005
-
-    # Yaw control
-    if keys[pygame.K_a]:
-        yaw_vel_cmd += 0.001
-    if keys[pygame.K_z]:
-        yaw_vel_cmd -= 0.001
-
-
-def quaternion_to_euler_array(quat: np.ndarray) -> np.ndarray:
-    # Ensure quaternion is in the correct format [x, y, z, w]
-    x, y, z, w = quat
-
-    # Roll (x-axis rotation)
-    t0 = +2.0 * (w * x + y * z)
-    t1 = +1.0 - 2.0 * (x * x + y * y)
-    roll_x = np.arctan2(t0, t1)
-
-    # Pitch (y-axis rotation)
-    t2 = +2.0 * (w * y - z * x)
-    t2 = np.clip(t2, -1.0, 1.0)
-    pitch_y = np.arcsin(t2)
-
-    # Yaw (z-axis rotation)
-    t3 = +2.0 * (w * z + x * y)
-    t4 = +1.0 - 2.0 * (y * y + z * z)
-    yaw_z = np.arctan2(t3, t4)
-
-    # Returns roll, pitch, yaw in a NumPy array in radians
-    return np.array([roll_x, pitch_y, yaw_z])
-
-
-def get_obs(data: mujoco.MjData) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-    """Extracts an observation from the mujoco data structure."""
-    q = data.qpos.astype(np.double)
-    dq = data.qvel.astype(np.double)
-    quat = data.sensor("orientation").data[[1, 2, 3, 0]].astype(np.double)
-    r = R.from_quat(quat)
-    v = r.apply(data.qvel[:3], inverse=True).astype(np.double)  # In the base frame
-    omega = data.sensor("angular-velocity").data.astype(np.double)
-    gvec = r.apply(np.array([0.0, 0.0, -1.0]), inverse=True).astype(np.double)
-    return (q, dq, quat, v, omega, gvec)
-
-
-def pd_control(
-    target_q: np.ndarray,
-    q: np.ndarray,
-    kp: np.ndarray,
-    dq: np.ndarray,
-    kd: np.ndarray,
-    default: np.ndarray,
-) -> np.ndarray:
-    """Calculates torques from position commands."""
-    return kp * (target_q + default - q) - kd * dq
-
-
-def run_mujoco(
-    embodiment: str,
-    policy: ort.InferenceSession,
-    cfg: Sim2simCfg,
-    model_info: Dict[str, Union[float, List[float], str]],
-    keyboard_use: bool = False,
-    log_h5: bool = False,
-    render: bool = True,
-    h5_out_dir: str = "sim/resources",
-) -> None:
-    """Run the Mujoco simulation using the provided policy and configuration.
-
-    Args:
-        policy: The policy used for controlling the simulation.
-        cfg: The configuration object containing simulation settings.
-        model_info: The model information dictionary.
-        keyboard_use: Whether to use keyboard input.
-        log_h5: Whether to log data to HDF5.
-        render: Whether to render the simulation.
-        h5_out_dir: The directory to save HDF5 files.
-
-    Returns:
-        None
-    """
-    model_dir = os.environ.get("MODEL_DIR", "sim/resources")
-    mujoco_model_path = f"{model_dir}/{embodiment}/robot_fixed.xml"
-
-    model = mujoco.MjModel.from_xml_path(mujoco_model_path)
-    model.opt.timestep = cfg.dt
-    data = mujoco.MjData(model)
-
-    assert isinstance(model_info["num_actions"], int)
-    assert isinstance(model_info["num_observations"], int)
-    assert isinstance(model_info["robot_effort"], list)
-    assert isinstance(model_info["robot_stiffness"], list)
-    assert isinstance(model_info["robot_damping"], list)
-
-    tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * cfg.tau_factor
-    kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"]))
-    kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"]))
-
-    try:
-        data.qpos = model.keyframe("default").qpos
-        default = deepcopy(model.keyframe("default").qpos)[-model_info["num_actions"] :]
-        print("Default position:", default)
-    except:
-        print("No default position found, using zero initialization")
-        default = np.zeros(model_info["num_actions"])  # 3 for pos, 4 for quat, cfg.num_actions for joints
-    default += np.random.uniform(-0.03, 0.03, size=default.shape)
-    print("Default position:", default)
-    mujoco.mj_step(model, data)
-    for ii in range(len(data.ctrl) + 1):
-        print(data.joint(ii).id, data.joint(ii).name)
-
-    data.qvel = np.zeros_like(data.qvel)
-    data.qacc = np.zeros_like(data.qacc)
-
-    if render:
-        viewer = mujoco_viewer.MujocoViewer(model, data)
-
-    target_q = np.zeros((model_info["num_actions"]), dtype=np.double)
-    prev_actions = np.zeros((model_info["num_actions"]), dtype=np.double)
-    hist_obs = np.zeros((model_info["num_observations"]), dtype=np.double)
-
-    count_lowlevel = 0
-
-    input_data = {
-        "x_vel.1": np.zeros(1).astype(np.float32),
-        "y_vel.1": np.zeros(1).astype(np.float32),
-        "rot.1": np.zeros(1).astype(np.float32),
-        "t.1": np.zeros(1).astype(np.float32),
-        "dof_pos.1": np.zeros(model_info["num_actions"]).astype(np.float32),
-        "dof_vel.1": np.zeros(model_info["num_actions"]).astype(np.float32),
-        "prev_actions.1": np.zeros(model_info["num_actions"]).astype(np.float32),
-        "imu_ang_vel.1": np.zeros(3).astype(np.float32),
-        "imu_euler_xyz.1": np.zeros(3).astype(np.float32),
-        "buffer.1": np.zeros(model_info["num_observations"]).astype(np.float32),
-    }
-
-    # Initialize variables for tracking upright steps and average speed
-    upright_steps = 0
-    total_speed = 0.0
-    step_count = 0
-
-    for _ in tqdm(range(int(cfg.sim_duration / cfg.dt)), desc="Simulating..."):
-        if keyboard_use:
-            handle_keyboard_input()
-
-        # Obtain an observation
-        q, dq, quat, v, omega, gvec = get_obs(data)
-        q = q[-model_info["num_actions"] :]
-        dq = dq[-model_info["num_actions"] :]
-
-        eu_ang = quaternion_to_euler_array(quat)
-        eu_ang[eu_ang > math.pi] -= 2 * math.pi
-
-        # Check if the robot is upright (roll and pitch within ±30 degrees)
-        if abs(eu_ang[0]) > math.radians(30) or abs(eu_ang[1]) > math.radians(30):
-            print("Robot tilted heavily, ending simulation.")
-            break
-        else:
-            upright_steps += 1  # Increment upright steps
-
-        # Calculate speed and accumulate for average speed calculation
-        speed = np.linalg.norm(v[:2])  # Speed in the x-y plane
-        total_speed += speed
-        step_count += 1
-
-        # 1000hz -> Nhz
-        if count_lowlevel % cfg.decimation == 0:
-            # Convert sim coordinates to policy coordinates
-            cur_pos_obs = q - default
-            cur_vel_obs = dq
-
-            input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32)
-            input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32)
-            input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32)
-
-            input_data["t.1"] = np.array([count_lowlevel * cfg.dt], dtype=np.float32)
-
-            input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32)
-            input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32)
-
-            input_data["prev_actions.1"] = prev_actions.astype(np.float32)
-
-            input_data["imu_ang_vel.1"] = omega.astype(np.float32)
-            input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32)
-
-            input_data["buffer.1"] = hist_obs.astype(np.float32)
-
-            positions, curr_actions, hist_obs = policy.run(None, input_data)
-            target_q = positions
-            prev_actions = curr_actions
-
-        # Generate PD control
-        tau = pd_control(target_q, q, kps, dq, kds, default)  # Calc torques
-        tau = np.clip(tau, -tau_limit, tau_limit)  # Clamp torques
-
-        data.ctrl = tau
-        mujoco.mj_step(model, data)
-
-        if render:
-            viewer.render()
-        count_lowlevel += 1
-
-    if render:
-        viewer.close()
-
-    # Calculate average speed
-    if step_count > 0:
-        average_speed = total_speed / step_count
-    else:
-        average_speed = 0.0
-
-    # Save or print the statistics at the end of the episode
-    print(f"Number of upright steps: {upright_steps}")
-    print(f"Average speed: {average_speed:.4f} m/s")
-
-
-def parse_modelmeta(
-    modelmeta: List[Tuple[str, str]],
-    verbose: bool = False,
-) -> Dict[str, Union[float, List[float], str]]:
-    parsed_meta: Dict[str, Union[float, List[float], str]] = {}
-    for key, value in modelmeta:
-        if value.startswith("[") and value.endswith("]"):
-            parsed_meta[key] = list(map(float, value.strip("[]").split(",")))
-        else:
-            try:
-                parsed_meta[key] = float(value)
-                try:
-                    if int(value) == parsed_meta[key]:
-                        parsed_meta[key] = int(value)
-                except ValueError:
-                    pass
-            except ValueError:
-                print(f"Failed to convert {value} to float")
-                parsed_meta[key] = value
-    if verbose:
-        for key, value in parsed_meta.items():
-            print(f"{key}: {value}")
-    return parsed_meta
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Deployment script.")
-    parser.add_argument("--embodiment", type=str, required=True, help="Embodiment name.")
-    parser.add_argument("--load_model", type=str, required=True, help="Path to run to load from.")
-    parser.add_argument("--keyboard_use", action="store_true", help="keyboard_use")
-    parser.add_argument("--log_h5", action="store_true", help="log_h5")
-    parser.add_argument("--h5_out_dir", type=str, default="sim/resources", help="Directory to save HDF5 files")
-    parser.add_argument("--no_render", action="store_false", dest="render", help="Disable rendering")
-    parser.set_defaults(render=True)
-    args = parser.parse_args()
-
-    if args.keyboard_use:
-        x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.0, 0.0, 0.0
-        pygame.init()
-        pygame.display.set_caption("Simulation Control")
-    else:
-        x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.4, 0.0, 0.0
-
-    policy_cfg = ActorCfg(embodiment=args.embodiment)
-    if args.embodiment == "gpr":
-        policy_cfg.cycle_time = 0.64
-        cfg = Sim2simCfg(
-            sim_duration=10.0,
-            dt=0.001,
-            decimation=10,
-            tau_factor=4.0,
-            cycle_time=policy_cfg.cycle_time,
-        )
-    elif args.embodiment == "zeroth":
-        policy_cfg.cycle_time = 0.2
-        cfg = Sim2simCfg(
-            sim_duration=10.0,
-            dt=0.001,
-            decimation=10,
-            tau_factor=1,
-            cycle_time=policy_cfg.cycle_time,
-        )
-
-    if args.load_model.endswith(".onnx"):
-        policy = ort.InferenceSession(args.load_model)
-    else:
-        policy = convert_model_to_onnx(args.load_model, policy_cfg, save_path="policy.onnx")
-
-    model_info = parse_modelmeta(
-        policy.get_modelmeta().custom_metadata_map.items(),
-        verbose=True,
-    )
-
-    run_mujoco(
-        args.embodiment,
-        policy,
-        cfg,
-        model_info,
-        args.keyboard_use,
-        args.log_h5,
-        args.render,
-        args.h5_out_dir,
-    )