diff --git a/examples/gpr_walking.kinfer b/examples/gpr_walking.kinfer
index 19d761c..84fa052 100644
Binary files a/examples/gpr_walking.kinfer and b/examples/gpr_walking.kinfer differ
diff --git a/examples/walking_policy.pt b/examples/walking_policy.pt
index f483c38..eb25c85 100644
Binary files a/examples/walking_policy.pt and b/examples/walking_policy.pt differ
diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py
index a3be536..8291e70 100755
--- a/sim/envs/__init__.py
+++ b/sim/envs/__init__.py
@@ -12,8 +12,8 @@
from sim.envs.humanoids.dora_env import DoraFreeEnv
from sim.envs.humanoids.g1_config import G1Cfg, G1CfgPPO
from sim.envs.humanoids.g1_env import G1FreeEnv
-from sim.envs.humanoids.gpr_env import GprFreeEnv
from sim.envs.humanoids.gpr_config import GprCfg, GprCfgPPO, GprStandingCfg
+from sim.envs.humanoids.gpr_env import GprFreeEnv
from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO
from sim.envs.humanoids.h1_env import H1FreeEnv
from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py
index 42bdbe2..91d66d7 100644
--- a/sim/envs/humanoids/gpr_config.py
+++ b/sim/envs/humanoids/gpr_config.py
@@ -30,7 +30,7 @@ class safety(LeggedRobotCfg.safety):
# safety factors
pos_limit = 1.0
vel_limit = 1.0
- torque_limit = 0.85
+ torque_limit = 1.0
class asset(LeggedRobotCfg.asset):
name = "gpr"
@@ -123,7 +123,7 @@ class domain_rand(LeggedRobotCfg.domain_rand):
randomize_base_mass = True
added_mass_range = [-3.0, 3.0]
- push_robots = False
+ push_robots = True
push_interval_s = 4
max_push_vel_xy = 0.2
max_push_ang_vel = 0.4
@@ -150,14 +150,14 @@ class rewards:
max_dist = 0.5
# put some settings here for LLM parameter tuning
target_joint_pos_scale = 0.17 # rad
- target_feet_height = 0.06 # m
+ target_feet_height = 0.05 # m
- cycle_time = 0.64 # sec
+ cycle_time = 0.25 # sec
# if true negative total rewards are clipped at zero (avoids early termination problems)
only_positive_rewards = True
# tracking reward = exp(error*sigma)
tracking_sigma = 5.0
- max_contact_force = 700 # forces above this value are penalized
+ max_contact_force = 400 # forces above this value are penalized
class scales:
# reference motion tracking
@@ -213,7 +213,7 @@ class GprStandingCfg(GprCfg):
class rewards:
# quite important to keep it right
- base_height_target = 1.06
+ base_height_target = Robot.height
min_dist = 0.2
max_dist = 0.5
# put some settings here for LLM parameter tuning
diff --git a/sim/model_export.py b/sim/model_export.py
index 09c190a..3777124 100644
--- a/sim/model_export.py
+++ b/sim/model_export.py
@@ -15,17 +15,20 @@
@dataclass
class ActorCfg:
- embodiment: str = "gpr"
- cycle_time: float = 0.4 # Cycle time for sinusoidal command input
- action_scale: float = 0.25 # Scale for actions
- lin_vel_scale: float = 2.0 # Scale for linear velocity
- ang_vel_scale: float = 1.0 # Scale for angular velocity
- quat_scale: float = 1.0 # Scale for quaternion
- dof_pos_scale: float = 1.0 # Scale for joint positions
- dof_vel_scale: float = 0.05 # Scale for joint velocities
- frame_stack: int = 15 # Number of frames to stack for the policy input
- clip_observations: float = 18.0 # Clip observations to this value
- clip_actions: float = 18.0 # Clip actions to this value
+ embodiment: str
+ cycle_time: float # Cycle time for sinusoidal command input
+ action_scale: float # Scale for actions
+ lin_vel_scale: float # Scale for linear velocity
+ ang_vel_scale: float # Scale for angular velocity
+ quat_scale: float # Scale for quaternion
+ dof_pos_scale: float # Scale for joint positions
+ dof_vel_scale: float # Scale for joint velocities
+ frame_stack: int # Number of frames to stack for the policy input
+ clip_observations: float # Clip observations to this value
+ clip_actions: float # Clip actions to this value
+ sim_dt: float # Simulation time step
+ sim_decimation: int # Simulation decimation
+ tau_factor: float # Torque limit factor
class ActorCritic(nn.Module):
@@ -242,6 +245,8 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
num_actions = a_model.num_actions
num_observations = a_model.num_observations
+ default_standing = list(a_model.robot.default_standing().values())
+
return (
a_model,
{
@@ -250,6 +255,7 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
"robot_damping": robot_damping,
"num_actions": num_actions,
"num_observations": num_observations,
+ "default_standing": default_standing,
},
input_tensors,
)
@@ -338,7 +344,3 @@ def convert_model_to_onnx(model_path: str, cfg: ActorCfg, save_path: Optional[st
buffer2.seek(0)
return ort.InferenceSession(buffer2.read())
-
-
-if __name__ == "__main__":
- convert_model_to_onnx("model_3000.pt", ActorCfg(), "policy.onnx")
diff --git a/sim/play.py b/sim/play.py
index 9abe511..04465dd 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -21,15 +21,11 @@
from kinfer.export.pytorch import export_to_onnx
from tqdm import tqdm
-# Local imports third
from sim.env import run_dir # noqa: E402
from sim.envs import task_registry # noqa: E402
-from sim.h5_logger import HDF5Logger
-from sim.model_export import ( # noqa: E402
- ActorCfg,
- convert_model_to_onnx,
- get_actor_policy,
-)
+
+# Local imports third
+from sim.model_export import ActorCfg, get_actor_policy
from sim.utils.helpers import get_args # noqa: E402
from sim.utils.logger import Logger # noqa: E402
@@ -92,15 +88,22 @@ def play(args: argparse.Namespace) -> None:
if args.export_onnx:
path = ppo_runner.load_path
embodiment = ppo_runner.cfg["experiment_name"].lower()
- policy_cfg = ActorCfg(embodiment=embodiment)
-
- if embodiment == "stompypro":
- policy_cfg.cycle_time = 0.4
- elif embodiment == "stompymicro":
- policy_cfg.cycle_time = 0.2
- else:
- print(f"Specific policy cfg for {embodiment} not implemented")
-
+ policy_cfg = ActorCfg(
+ embodiment=embodiment,
+ cycle_time=env_cfg.rewards.cycle_time,
+ sim_dt=env_cfg.sim.dt,
+ sim_decimation=env_cfg.control.decimation,
+ tau_factor=env_cfg.safety.torque_limit,
+ action_scale=env_cfg.control.action_scale,
+ lin_vel_scale=env_cfg.normalization.obs_scales.lin_vel,
+ ang_vel_scale=env_cfg.normalization.obs_scales.ang_vel,
+ quat_scale=env_cfg.normalization.obs_scales.quat,
+ dof_pos_scale=env_cfg.normalization.obs_scales.dof_pos,
+ dof_vel_scale=env_cfg.normalization.obs_scales.dof_vel,
+ frame_stack=env_cfg.env.frame_stack,
+ clip_observations=env_cfg.normalization.clip_observations,
+ clip_actions=env_cfg.normalization.clip_actions,
+ )
actor_model, sim2sim_info, input_tensors = get_actor_policy(path, policy_cfg)
# Merge policy_cfg and sim2sim_info into a single config object
@@ -117,6 +120,8 @@ def play(args: argparse.Namespace) -> None:
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
if args.log_h5:
+ from sim.h5_logger import HDF5Logger
+
# Create directory for HDF5 files
h5_dir = run_dir() / "h5_out" / args.task / now
h5_dir.mkdir(parents=True, exist_ok=True)
diff --git a/sim/play_old.py b/sim/play_old.py
deleted file mode 100755
index 6747abb..0000000
--- a/sim/play_old.py
+++ /dev/null
@@ -1,232 +0,0 @@
-# mypy: ignore-errors
-"""Play a trained policy in the environment.
-
-Run:
- python sim/play.py --task g1 --log_h5
- python sim/play.py --task stompymini --log_h5
-"""
-
-import argparse
-import copy
-import logging
-import os
-from datetime import datetime
-from typing import Any, Union
-
-import cv2
-import h5py
-import numpy as np
-from isaacgym import gymapi
-from tqdm import tqdm
-
-logger = logging.getLogger(__name__)
-
-from sim.env import run_dir # noqa: E402
-from sim.envs import task_registry # noqa: E402
-from sim.model_export import convert_model_to_onnx # noqa: E402
-from sim.utils.helpers import get_args # noqa: E402
-from sim.utils.logger import Logger # noqa: E402
-
-import torch # isort: skip
-
-
-def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None:
- os.makedirs(path, exist_ok=True)
- path = os.path.join(path, "policy_1.pt")
- model = copy.deepcopy(actor_critic.actor).to("cpu")
- traced_script_module = torch.jit.script(model)
- traced_script_module.save(path)
-
-
-def play(args: argparse.Namespace) -> None:
- logger.info("Configuring environment and training settings...")
- env_cfg, train_cfg = task_registry.get_cfgs(name=args.task)
-
- # override some parameters for testing
- env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1)
- env_cfg.sim.max_gpu_contact_pairs = 2**10
- if args.trimesh:
- env_cfg.terrain.mesh_type = "trimesh"
- else:
- env_cfg.terrain.mesh_type = "plane"
- env_cfg.terrain.num_rows = 5
- env_cfg.terrain.num_cols = 5
- env_cfg.terrain.curriculum = False
- env_cfg.terrain.max_init_terrain_level = 5
- env_cfg.noise.add_noise = True
- env_cfg.domain_rand.push_robots = False
- env_cfg.domain_rand.joint_angle_noise = 0.01
- env_cfg.noise.curriculum = False
- env_cfg.noise.noise_level = 0.5
-
- train_cfg.seed = 123145
- logger.info("train_cfg.runner_class_name: %s", train_cfg.runner_class_name)
-
- # prepare environment
- env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg)
- env.set_camera(env_cfg.viewer.pos, env_cfg.viewer.lookat)
-
- obs = env.get_observations()
-
- # load policy
- train_cfg.runner.resume = True
- ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg)
- policy = ppo_runner.get_inference_policy(device=env.device)
-
- # Prepare for logging
- env_logger = Logger(env.dt)
- robot_index = 0
- joint_index = 1
- stop_state_log = 1000
- now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
- if args.log_h5:
- h5_file = h5py.File(f"data{now}.h5", "w")
-
- # Create dataset for actions
- max_timesteps = stop_state_log
- num_dof = env.num_dof
- dset_actions = h5_file.create_dataset("actions", (max_timesteps, num_dof), dtype=np.float32)
-
- # Create dataset of observations
- buf_len = len(env.obs_history) # length of observation buffer
- dset_2D_command = h5_file.create_dataset(
- "observations/2D_command", (max_timesteps, buf_len, 2), dtype=np.float32
- ) # sin and cos commands
- dset_3D_command = h5_file.create_dataset(
- "observations/3D_command", (max_timesteps, buf_len, 3), dtype=np.float32
- ) # x, y, yaw commands
- dset_q = h5_file.create_dataset(
- "observations/q", (max_timesteps, buf_len, num_dof), dtype=np.float32
- ) # joint positions
- dset_dq = h5_file.create_dataset(
- "observations/dq", (max_timesteps, buf_len, num_dof), dtype=np.float32
- ) # joint velocities
- dset_obs_actions = h5_file.create_dataset(
- "observations/actions", (max_timesteps, buf_len, num_dof), dtype=np.float32
- ) # actions
- dset_ang_vel = h5_file.create_dataset(
- "observations/ang_vel", (max_timesteps, buf_len, 3), dtype=np.float32
- ) # root angular velocity
- dset_euler = h5_file.create_dataset(
- "observations/euler", (max_timesteps, buf_len, 3), dtype=np.float32
- ) # root orientation
-
- if args.render:
- camera_properties = gymapi.CameraProperties()
- camera_properties.width = 1920
- camera_properties.height = 1080
- h1 = env.gym.create_camera_sensor(env.envs[0], camera_properties)
- camera_offset = gymapi.Vec3(3, -3, 1)
- camera_rotation = gymapi.Quat.from_axis_angle(gymapi.Vec3(-0.3, 0.2, 1), np.deg2rad(135))
- actor_handle = env.gym.get_actor_handle(env.envs[0], 0)
- body_handle = env.gym.get_actor_rigid_body_handle(env.envs[0], actor_handle, 0)
- logger.info("body_handle: %s", body_handle)
- logger.info("actor_handle: %s", actor_handle)
- env.gym.attach_camera_to_body(
- h1, env.envs[0], body_handle, gymapi.Transform(camera_offset, camera_rotation), gymapi.FOLLOW_POSITION
- )
-
- fourcc = cv2.VideoWriter_fourcc(*"MJPG") # type: ignore[attr-defined]
-
- # Creates a directory to store videos.
- video_dir = run_dir() / "videos"
- experiment_dir = video_dir / train_cfg.runner.experiment_name
- experiment_dir.mkdir(parents=True, exist_ok=True)
-
- dir = os.path.join(experiment_dir, now + str(args.run_name) + ".mp4")
- if not os.path.exists(video_dir):
- os.mkdir(video_dir)
- if not os.path.exists(experiment_dir):
- os.mkdir(experiment_dir)
- video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080))
-
- for t in tqdm(range(stop_state_log)):
- actions = policy(obs.detach())
- if args.log_h5:
- dset_actions[t] = actions.detach().numpy()
- if args.fix_command:
- env.commands[:, 0] = 0.5
- env.commands[:, 1] = 0.0
- env.commands[:, 2] = 0.0
- env.commands[:, 3] = 0.0
- obs, critic_obs, rews, dones, infos = env.step(actions.detach())
- print(f"IMU: {obs[0, (3 * env.num_actions + 5) + 3 : (3 * env.num_actions + 5) + 2 * 3]}")
-
- if args.render:
- env.gym.fetch_results(env.sim, True)
- env.gym.step_graphics(env.sim)
- env.gym.render_all_camera_sensors(env.sim)
- img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR)
- img = np.reshape(img, (1080, 1920, 4))
- img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
-
- video.write(img[..., :3])
-
- # Log states
- dof_pos_target = actions[robot_index, joint_index].item() * env.cfg.control.action_scale
- dof_pos = env.dof_pos[robot_index, joint_index].item()
- dof_vel = env.dof_vel[robot_index, joint_index].item()
- dof_torque = env.torques[robot_index, joint_index].item()
- command_x = env.commands[robot_index, 0].item()
- command_y = env.commands[robot_index, 1].item()
- command_yaw = env.commands[robot_index, 2].item()
- base_vel_x = env.base_lin_vel[robot_index, 0].item()
- base_vel_y = env.base_lin_vel[robot_index, 1].item()
- base_vel_z = env.base_lin_vel[robot_index, 2].item()
- base_vel_yaw = env.base_ang_vel[robot_index, 2].item()
- contact_forces_z = env.contact_forces[robot_index, env.feet_indices, 2].cpu().numpy()
-
- if args.log_h5:
- for i in range(buf_len):
- cur_obs = env.obs_history[i].tolist()[0]
- dset_2D_command[t, i] = cur_obs[0:2] # sin and cos commands
- dset_3D_command[t, i] = cur_obs[2:5] # x, y, yaw commands
- dset_q[t, i] = cur_obs[5 : 5 + num_dof] # joint positions
- dset_dq[t, i] = cur_obs[5 + num_dof : 5 + 2 * num_dof] # joint velocities
- dset_obs_actions[t, i] = cur_obs[5 + 2 * num_dof : 5 + 3 * num_dof] # actions
- dset_ang_vel[t, i] = cur_obs[5 + 3 * num_dof : 8 + 3 * num_dof] # root angular velocity
- dset_euler[t, i] = cur_obs[8 + 3 * num_dof : 11 + 3 * num_dof] # root orientation
-
- env_logger.log_states(
- {
- "dof_pos_target": dof_pos_target,
- "dof_pos": dof_pos,
- "dof_vel": dof_vel,
- "dof_torque": dof_torque,
- "command_x": command_x,
- "command_y": command_y,
- "command_yaw": command_yaw,
- "base_vel_x": base_vel_x,
- "base_vel_y": base_vel_y,
- "base_vel_z": base_vel_z,
- "base_vel_yaw": base_vel_yaw,
- "contact_forces_z": contact_forces_z,
- }
- )
- if infos["episode"]:
- num_episodes = env.reset_buf.sum().item()
- if num_episodes > 0:
- env_logger.log_rewards(infos["episode"], num_episodes)
-
- env_logger.print_rewards()
- env_logger.plot_states()
-
- if args.render:
- video.release()
-
- if args.log_h5:
- print("Saving data to " + os.path.abspath(f"data{now}.h5"))
- h5_file.close()
-
-
-if __name__ == "__main__":
- base_args = get_args()
- parser = argparse.ArgumentParser(description="Extend base arguments with log_h5")
- parser.add_argument("--log_h5", action="store_true", help="Enable HDF5 logging")
- parser.add_argument("--render", action="store_true", help="Enable rendering", default=True)
- parser.add_argument("--fix_command", action="store_true", help="Fix command", default=True)
- parser.add_argument("--export_onnx", action="store_true", help="Export policy as ONNX", default=True)
- parser.add_argument("--export_policy", action="store_true", help="Export policy as JIT", default=True)
- args, unknown = parser.parse_known_args(namespace=base_args)
-
- play(args)
diff --git a/sim/requirements-dev.txt b/sim/requirements-dev.txt
index c55c16b..3633b81 100755
--- a/sim/requirements-dev.txt
+++ b/sim/requirements-dev.txt
@@ -9,7 +9,7 @@ mediapy==1.2.2
mujoco==2.3.6
mujoco_python_viewer==0.1.4
onnx==1.15.0
-onnxruntime
+onnxruntime==1.15.0
pandas==1.4.4
Pillow>6.2.0
poselib==2.0.4
diff --git a/sim/requirements.txt b/sim/requirements.txt
index ab18cb3..a4bf776 100755
--- a/sim/requirements.txt
+++ b/sim/requirements.txt
@@ -6,7 +6,7 @@ python-dotenv
h5py
gdown
matplotlib==3.3.4
-numpy==1.20.0
+numpy==1.21.6
wandb
tensorboard==2.14.0
onnxscript
diff --git a/sim/resources/gpr/joints.py b/sim/resources/gpr/joints.py
index 149b7f3..a5b6472 100755
--- a/sim/resources/gpr/joints.py
+++ b/sim/resources/gpr/joints.py
@@ -63,35 +63,15 @@ class RightLeg(Node):
ankle_pitch = "R_ankle_y"
-class LeftArm(Node):
- shoulder_pitch = "L_shoulder_y"
- shoulder_roll = "L_shoulder_z"
- shoulder_yaw = "L_shoulder_x"
- elbow_pitch = "L_elbow_x"
-
-
-class RightArm(Node):
- shoulder_pitch = "R_shoulder_y"
- shoulder_roll = "R_shoulder_z"
- shoulder_yaw = "R_shoulder_x"
- elbow_pitch = "R_elbow_x"
-
-
class Legs(Node):
left = LeftLeg()
right = RightLeg()
-class Arms(Node):
- left = LeftArm()
- right = RightArm()
-
-
class Robot(Node):
legs = Legs()
- # arms = Arms()
- height = 1.06
+ height = 1.05
rotation = [0, 0, 0, 1]
@classmethod
@@ -178,10 +158,10 @@ def damping(cls) -> Dict[str, float]:
@classmethod
def effort(cls) -> Dict[str, float]:
return {
- "hip_y": 120,
- "hip_x": 60,
- "hip_z": 60,
- "knee": 120,
+ "hip_y": 40,
+ "hip_x": 20,
+ "hip_z": 20,
+ "knee": 40,
"ankle_y": 17,
}
diff --git a/sim/resources/gpr/robot_fixed.urdf b/sim/resources/gpr/robot_fixed.urdf
index 1ff7bb0..c1e2aef 100644
--- a/sim/resources/gpr/robot_fixed.urdf
+++ b/sim/resources/gpr/robot_fixed.urdf
@@ -95,7 +95,7 @@
-
+
@@ -124,7 +124,7 @@
-
+
@@ -211,7 +211,7 @@
-
+
@@ -240,7 +240,7 @@
-
+
@@ -327,7 +327,7 @@
-
+
@@ -356,7 +356,7 @@
-
+
@@ -443,7 +443,7 @@
-
+
@@ -472,7 +472,7 @@
-
+
diff --git a/sim/resources/gpr/robot_fixed.xml b/sim/resources/gpr/robot_fixed.xml
index 5733094..9349bdd 100644
--- a/sim/resources/gpr/robot_fixed.xml
+++ b/sim/resources/gpr/robot_fixed.xml
@@ -188,6 +188,6 @@
-
+
\ No newline at end of file
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index b512952..82ce113 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -19,21 +19,9 @@
import torch
from kinfer.export.pytorch import export_to_onnx
from kinfer.inference.python import ONNXModel
-from model_export import ActorCfg, get_actor_policy
from scipy.spatial.transform import Rotation as R
from tqdm import tqdm
-from sim.h5_logger import HDF5Logger
-
-
-@dataclass
-class Sim2simCfg:
- sim_duration: float = 60.0
- dt: float = 0.001
- decimation: int = 10
- tau_factor: float = 3
- cycle_time: float = 0.25
-
def handle_keyboard_input() -> None:
global x_vel_cmd, y_vel_cmd, yaw_vel_cmd
@@ -107,11 +95,11 @@ def pd_control(
def run_mujoco(
embodiment: str,
policy: ort.InferenceSession,
- cfg: Sim2simCfg,
model_info: Dict[str, Union[float, List[float], str]],
keyboard_use: bool = False,
log_h5: bool = False,
render: bool = True,
+ sim_duration: float = 60.0,
h5_out_dir: str = "sim/resources",
) -> None:
"""
@@ -120,15 +108,12 @@ def run_mujoco(
Args:
policy: The policy used for controlling the simulation.
cfg: The configuration object containing simulation settings.
-
- Returns:
- None
"""
model_dir = os.environ.get("MODEL_DIR", "sim/resources")
mujoco_model_path = f"{model_dir}/{embodiment}/robot_fixed.xml"
model = mujoco.MjModel.from_xml_path(mujoco_model_path)
- model.opt.timestep = cfg.dt
+ model.opt.timestep = model_info["sim_dt"]
data = mujoco.MjData(model)
assert isinstance(model_info["num_actions"], int)
@@ -137,7 +122,7 @@ def run_mujoco(
assert isinstance(model_info["robot_stiffness"], list)
assert isinstance(model_info["robot_damping"], list)
- tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * cfg.tau_factor
+ tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * model_info["tau_factor"]
kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"]))
kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"]))
@@ -180,7 +165,9 @@ def run_mujoco(
}
if log_h5:
- stop_state_log = int(cfg.sim_duration / cfg.dt) / cfg.decimation
+ from sim.h5_logger import HDF5Logger
+
+ stop_state_log = int(sim_duration / model_info["sim_dt"]) / model_info["sim_decimation"]
logger = HDF5Logger(
data_name=embodiment,
num_actions=model_info["num_actions"],
@@ -194,7 +181,7 @@ def run_mujoco(
total_speed = 0.0
step_count = 0
- for _ in tqdm(range(int(cfg.sim_duration / cfg.dt)), desc="Simulating..."):
+ for _ in tqdm(range(int(sim_duration / model_info["sim_dt"])), desc="Simulating..."):
if keyboard_use:
handle_keyboard_input()
@@ -219,7 +206,7 @@ def run_mujoco(
step_count += 1
# 1000hz -> 50hz
- if count_lowlevel % cfg.decimation == 0:
+ if count_lowlevel % model_info["sim_decimation"] == 0:
# Convert sim coordinates to policy coordinates
cur_pos_obs = q - default
cur_vel_obs = dq
@@ -228,7 +215,7 @@ def run_mujoco(
input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32)
input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32)
- input_data["t.1"] = np.array([count_lowlevel * cfg.dt], dtype=np.float32)
+ input_data["t.1"] = np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32)
input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32)
input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32)
@@ -250,11 +237,11 @@ def run_mujoco(
if log_h5:
logger.log_data(
{
- "t": np.array([count_lowlevel * cfg.dt], dtype=np.float32),
+ "t": np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32),
"2D_command": np.array(
[
- np.sin(2 * math.pi * count_lowlevel * cfg.dt / cfg.cycle_time),
- np.cos(2 * math.pi * count_lowlevel * cfg.dt / cfg.cycle_time),
+ np.sin(2 * math.pi * count_lowlevel * model_info["sim_dt"] / model_info["cycle_time"]),
+ np.cos(2 * math.pi * count_lowlevel * model_info["sim_dt"] / model_info["cycle_time"]),
],
dtype=np.float32,
),
@@ -317,54 +304,25 @@ def run_mujoco(
else:
x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.2, 0.0, 0.0
- policy_cfg = ActorCfg(embodiment=args.embodiment)
- if args.embodiment == "gpr":
- policy_cfg.cycle_time = 0.25
- cfg = Sim2simCfg(
- sim_duration=10.0,
- dt=0.001,
- decimation=10,
- tau_factor=4.0,
- cycle_time=policy_cfg.cycle_time,
- )
- elif args.embodiment == "zeroth":
- policy_cfg.cycle_time = 0.2
- cfg = Sim2simCfg(
- sim_duration=10.0,
- dt=0.001,
- decimation=10,
- tau_factor=2,
- cycle_time=policy_cfg.cycle_time,
- )
-
- if args.load_model.endswith(".kinfer"):
- policy = ONNXModel(args.load_model)
- else:
- actor_model, sim2sim_info, input_tensors = get_actor_policy(args.load_model, policy_cfg)
-
- # Merge policy_cfg and sim2sim_info into a single config object
- export_config = {**vars(policy_cfg), **sim2sim_info}
- print(export_config)
- export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_test.onnx")
- policy = ONNXModel("kinfer_test.onnx")
-
+ policy = ONNXModel(args.load_model)
metadata = policy.get_metadata()
-
model_info = {
"num_actions": metadata["num_actions"],
"num_observations": metadata["num_observations"],
"robot_effort": metadata["robot_effort"],
"robot_stiffness": metadata["robot_stiffness"],
"robot_damping": metadata["robot_damping"],
+ "sim_dt": metadata["sim_dt"],
+ "sim_decimation": metadata["sim_decimation"],
+ "tau_factor": metadata["tau_factor"],
}
run_mujoco(
- args.embodiment,
- policy,
- cfg,
- model_info,
- args.keyboard_use,
- args.log_h5,
- args.render,
- args.h5_out_dir,
+ embodiment=args.embodiment,
+ policy=policy,
+ model_info=model_info,
+ keyboard_use=args.keyboard_use,
+ log_h5=args.log_h5,
+ render=args.render,
+ h5_out_dir=args.h5_out_dir,
)
diff --git a/sim/sim2sim_old.py b/sim/sim2sim_old.py
deleted file mode 100755
index b887e3f..0000000
--- a/sim/sim2sim_old.py
+++ /dev/null
@@ -1,345 +0,0 @@
-""" Sim2sim script for running a policy in MuJoCo.
-
-Run:
- python sim/sim2sim.py --load_model examples/standing_pro.pt --embodiment gpr
- python sim/sim2sim.py --load_model examples/standing_micro.pt --embodiment zeroth
-"""
-
-import argparse
-import math
-import os
-from copy import deepcopy
-from dataclasses import dataclass
-from typing import Dict, List, Tuple, Union
-
-import mujoco
-import mujoco_viewer
-import numpy as np
-import onnxruntime as ort
-import pygame
-from scipy.spatial.transform import Rotation as R
-from tqdm import tqdm
-
-from sim.model_export import ActorCfg, convert_model_to_onnx
-
-
-@dataclass
-class Sim2simCfg:
- sim_duration: float = 60.0
- dt: float = 0.001
- decimation: int = 10
- tau_factor: float = 3
- cycle_time: float = 0.4
-
-
-def handle_keyboard_input() -> None:
- global x_vel_cmd, y_vel_cmd, yaw_vel_cmd
-
- keys = pygame.key.get_pressed()
-
- # Update movement commands based on arrow keys
- if keys[pygame.K_UP]:
- x_vel_cmd += 0.0005
- if keys[pygame.K_DOWN]:
- x_vel_cmd -= 0.0005
- if keys[pygame.K_LEFT]:
- y_vel_cmd += 0.0005
- if keys[pygame.K_RIGHT]:
- y_vel_cmd -= 0.0005
-
- # Yaw control
- if keys[pygame.K_a]:
- yaw_vel_cmd += 0.001
- if keys[pygame.K_z]:
- yaw_vel_cmd -= 0.001
-
-
-def quaternion_to_euler_array(quat: np.ndarray) -> np.ndarray:
- # Ensure quaternion is in the correct format [x, y, z, w]
- x, y, z, w = quat
-
- # Roll (x-axis rotation)
- t0 = +2.0 * (w * x + y * z)
- t1 = +1.0 - 2.0 * (x * x + y * y)
- roll_x = np.arctan2(t0, t1)
-
- # Pitch (y-axis rotation)
- t2 = +2.0 * (w * y - z * x)
- t2 = np.clip(t2, -1.0, 1.0)
- pitch_y = np.arcsin(t2)
-
- # Yaw (z-axis rotation)
- t3 = +2.0 * (w * z + x * y)
- t4 = +1.0 - 2.0 * (y * y + z * z)
- yaw_z = np.arctan2(t3, t4)
-
- # Returns roll, pitch, yaw in a NumPy array in radians
- return np.array([roll_x, pitch_y, yaw_z])
-
-
-def get_obs(data: mujoco.MjData) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
- """Extracts an observation from the mujoco data structure."""
- q = data.qpos.astype(np.double)
- dq = data.qvel.astype(np.double)
- quat = data.sensor("orientation").data[[1, 2, 3, 0]].astype(np.double)
- r = R.from_quat(quat)
- v = r.apply(data.qvel[:3], inverse=True).astype(np.double) # In the base frame
- omega = data.sensor("angular-velocity").data.astype(np.double)
- gvec = r.apply(np.array([0.0, 0.0, -1.0]), inverse=True).astype(np.double)
- return (q, dq, quat, v, omega, gvec)
-
-
-def pd_control(
- target_q: np.ndarray,
- q: np.ndarray,
- kp: np.ndarray,
- dq: np.ndarray,
- kd: np.ndarray,
- default: np.ndarray,
-) -> np.ndarray:
- """Calculates torques from position commands."""
- return kp * (target_q + default - q) - kd * dq
-
-
-def run_mujoco(
- embodiment: str,
- policy: ort.InferenceSession,
- cfg: Sim2simCfg,
- model_info: Dict[str, Union[float, List[float], str]],
- keyboard_use: bool = False,
- log_h5: bool = False,
- render: bool = True,
- h5_out_dir: str = "sim/resources",
-) -> None:
- """Run the Mujoco simulation using the provided policy and configuration.
-
- Args:
- policy: The policy used for controlling the simulation.
- cfg: The configuration object containing simulation settings.
- model_info: The model information dictionary.
- keyboard_use: Whether to use keyboard input.
- log_h5: Whether to log data to HDF5.
- render: Whether to render the simulation.
- h5_out_dir: The directory to save HDF5 files.
-
- Returns:
- None
- """
- model_dir = os.environ.get("MODEL_DIR", "sim/resources")
- mujoco_model_path = f"{model_dir}/{embodiment}/robot_fixed.xml"
-
- model = mujoco.MjModel.from_xml_path(mujoco_model_path)
- model.opt.timestep = cfg.dt
- data = mujoco.MjData(model)
-
- assert isinstance(model_info["num_actions"], int)
- assert isinstance(model_info["num_observations"], int)
- assert isinstance(model_info["robot_effort"], list)
- assert isinstance(model_info["robot_stiffness"], list)
- assert isinstance(model_info["robot_damping"], list)
-
- tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * cfg.tau_factor
- kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"]))
- kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"]))
-
- try:
- data.qpos = model.keyframe("default").qpos
- default = deepcopy(model.keyframe("default").qpos)[-model_info["num_actions"] :]
- print("Default position:", default)
- except:
- print("No default position found, using zero initialization")
- default = np.zeros(model_info["num_actions"]) # 3 for pos, 4 for quat, cfg.num_actions for joints
- default += np.random.uniform(-0.03, 0.03, size=default.shape)
- print("Default position:", default)
- mujoco.mj_step(model, data)
- for ii in range(len(data.ctrl) + 1):
- print(data.joint(ii).id, data.joint(ii).name)
-
- data.qvel = np.zeros_like(data.qvel)
- data.qacc = np.zeros_like(data.qacc)
-
- if render:
- viewer = mujoco_viewer.MujocoViewer(model, data)
-
- target_q = np.zeros((model_info["num_actions"]), dtype=np.double)
- prev_actions = np.zeros((model_info["num_actions"]), dtype=np.double)
- hist_obs = np.zeros((model_info["num_observations"]), dtype=np.double)
-
- count_lowlevel = 0
-
- input_data = {
- "x_vel.1": np.zeros(1).astype(np.float32),
- "y_vel.1": np.zeros(1).astype(np.float32),
- "rot.1": np.zeros(1).astype(np.float32),
- "t.1": np.zeros(1).astype(np.float32),
- "dof_pos.1": np.zeros(model_info["num_actions"]).astype(np.float32),
- "dof_vel.1": np.zeros(model_info["num_actions"]).astype(np.float32),
- "prev_actions.1": np.zeros(model_info["num_actions"]).astype(np.float32),
- "imu_ang_vel.1": np.zeros(3).astype(np.float32),
- "imu_euler_xyz.1": np.zeros(3).astype(np.float32),
- "buffer.1": np.zeros(model_info["num_observations"]).astype(np.float32),
- }
-
- # Initialize variables for tracking upright steps and average speed
- upright_steps = 0
- total_speed = 0.0
- step_count = 0
-
- for _ in tqdm(range(int(cfg.sim_duration / cfg.dt)), desc="Simulating..."):
- if keyboard_use:
- handle_keyboard_input()
-
- # Obtain an observation
- q, dq, quat, v, omega, gvec = get_obs(data)
- q = q[-model_info["num_actions"] :]
- dq = dq[-model_info["num_actions"] :]
-
- eu_ang = quaternion_to_euler_array(quat)
- eu_ang[eu_ang > math.pi] -= 2 * math.pi
-
- # Check if the robot is upright (roll and pitch within ±30 degrees)
- if abs(eu_ang[0]) > math.radians(30) or abs(eu_ang[1]) > math.radians(30):
- print("Robot tilted heavily, ending simulation.")
- break
- else:
- upright_steps += 1 # Increment upright steps
-
- # Calculate speed and accumulate for average speed calculation
- speed = np.linalg.norm(v[:2]) # Speed in the x-y plane
- total_speed += speed
- step_count += 1
-
- # 1000hz -> Nhz
- if count_lowlevel % cfg.decimation == 0:
- # Convert sim coordinates to policy coordinates
- cur_pos_obs = q - default
- cur_vel_obs = dq
-
- input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32)
- input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32)
- input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32)
-
- input_data["t.1"] = np.array([count_lowlevel * cfg.dt], dtype=np.float32)
-
- input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32)
- input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32)
-
- input_data["prev_actions.1"] = prev_actions.astype(np.float32)
-
- input_data["imu_ang_vel.1"] = omega.astype(np.float32)
- input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32)
-
- input_data["buffer.1"] = hist_obs.astype(np.float32)
-
- positions, curr_actions, hist_obs = policy.run(None, input_data)
- target_q = positions
- prev_actions = curr_actions
-
- # Generate PD control
- tau = pd_control(target_q, q, kps, dq, kds, default) # Calc torques
- tau = np.clip(tau, -tau_limit, tau_limit) # Clamp torques
-
- data.ctrl = tau
- mujoco.mj_step(model, data)
-
- if render:
- viewer.render()
- count_lowlevel += 1
-
- if render:
- viewer.close()
-
- # Calculate average speed
- if step_count > 0:
- average_speed = total_speed / step_count
- else:
- average_speed = 0.0
-
- # Save or print the statistics at the end of the episode
- print(f"Number of upright steps: {upright_steps}")
- print(f"Average speed: {average_speed:.4f} m/s")
-
-
-def parse_modelmeta(
- modelmeta: List[Tuple[str, str]],
- verbose: bool = False,
-) -> Dict[str, Union[float, List[float], str]]:
- parsed_meta: Dict[str, Union[float, List[float], str]] = {}
- for key, value in modelmeta:
- if value.startswith("[") and value.endswith("]"):
- parsed_meta[key] = list(map(float, value.strip("[]").split(",")))
- else:
- try:
- parsed_meta[key] = float(value)
- try:
- if int(value) == parsed_meta[key]:
- parsed_meta[key] = int(value)
- except ValueError:
- pass
- except ValueError:
- print(f"Failed to convert {value} to float")
- parsed_meta[key] = value
- if verbose:
- for key, value in parsed_meta.items():
- print(f"{key}: {value}")
- return parsed_meta
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="Deployment script.")
- parser.add_argument("--embodiment", type=str, required=True, help="Embodiment name.")
- parser.add_argument("--load_model", type=str, required=True, help="Path to run to load from.")
- parser.add_argument("--keyboard_use", action="store_true", help="keyboard_use")
- parser.add_argument("--log_h5", action="store_true", help="log_h5")
- parser.add_argument("--h5_out_dir", type=str, default="sim/resources", help="Directory to save HDF5 files")
- parser.add_argument("--no_render", action="store_false", dest="render", help="Disable rendering")
- parser.set_defaults(render=True)
- args = parser.parse_args()
-
- if args.keyboard_use:
- x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.0, 0.0, 0.0
- pygame.init()
- pygame.display.set_caption("Simulation Control")
- else:
- x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.4, 0.0, 0.0
-
- policy_cfg = ActorCfg(embodiment=args.embodiment)
- if args.embodiment == "gpr":
- policy_cfg.cycle_time = 0.64
- cfg = Sim2simCfg(
- sim_duration=10.0,
- dt=0.001,
- decimation=10,
- tau_factor=4.0,
- cycle_time=policy_cfg.cycle_time,
- )
- elif args.embodiment == "zeroth":
- policy_cfg.cycle_time = 0.2
- cfg = Sim2simCfg(
- sim_duration=10.0,
- dt=0.001,
- decimation=10,
- tau_factor=1,
- cycle_time=policy_cfg.cycle_time,
- )
-
- if args.load_model.endswith(".onnx"):
- policy = ort.InferenceSession(args.load_model)
- else:
- policy = convert_model_to_onnx(args.load_model, policy_cfg, save_path="policy.onnx")
-
- model_info = parse_modelmeta(
- policy.get_modelmeta().custom_metadata_map.items(),
- verbose=True,
- )
-
- run_mujoco(
- args.embodiment,
- policy,
- cfg,
- model_info,
- args.keyboard_use,
- args.log_h5,
- args.render,
- args.h5_out_dir,
- )