diff --git a/examples/gpr_walking.kinfer b/examples/gpr_walking.kinfer index 19d761c..84fa052 100644 Binary files a/examples/gpr_walking.kinfer and b/examples/gpr_walking.kinfer differ diff --git a/examples/walking_policy.pt b/examples/walking_policy.pt index f483c38..eb25c85 100644 Binary files a/examples/walking_policy.pt and b/examples/walking_policy.pt differ diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py index a3be536..8291e70 100755 --- a/sim/envs/__init__.py +++ b/sim/envs/__init__.py @@ -12,8 +12,8 @@ from sim.envs.humanoids.dora_env import DoraFreeEnv from sim.envs.humanoids.g1_config import G1Cfg, G1CfgPPO from sim.envs.humanoids.g1_env import G1FreeEnv -from sim.envs.humanoids.gpr_env import GprFreeEnv from sim.envs.humanoids.gpr_config import GprCfg, GprCfgPPO, GprStandingCfg +from sim.envs.humanoids.gpr_env import GprFreeEnv from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO from sim.envs.humanoids.h1_env import H1FreeEnv from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 42bdbe2..91d66d7 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -30,7 +30,7 @@ class safety(LeggedRobotCfg.safety): # safety factors pos_limit = 1.0 vel_limit = 1.0 - torque_limit = 0.85 + torque_limit = 1.0 class asset(LeggedRobotCfg.asset): name = "gpr" @@ -123,7 +123,7 @@ class domain_rand(LeggedRobotCfg.domain_rand): randomize_base_mass = True added_mass_range = [-3.0, 3.0] - push_robots = False + push_robots = True push_interval_s = 4 max_push_vel_xy = 0.2 max_push_ang_vel = 0.4 @@ -150,14 +150,14 @@ class rewards: max_dist = 0.5 # put some settings here for LLM parameter tuning target_joint_pos_scale = 0.17 # rad - target_feet_height = 0.06 # m + target_feet_height = 0.05 # m - cycle_time = 0.64 # sec + cycle_time = 0.25 # sec # if true negative total rewards are clipped at zero (avoids early termination problems) only_positive_rewards = True # tracking reward = exp(error*sigma) tracking_sigma = 5.0 - max_contact_force = 700 # forces above this value are penalized + max_contact_force = 400 # forces above this value are penalized class scales: # reference motion tracking @@ -213,7 +213,7 @@ class GprStandingCfg(GprCfg): class rewards: # quite important to keep it right - base_height_target = 1.06 + base_height_target = Robot.height min_dist = 0.2 max_dist = 0.5 # put some settings here for LLM parameter tuning diff --git a/sim/model_export.py b/sim/model_export.py index 09c190a..3777124 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -15,17 +15,20 @@ @dataclass class ActorCfg: - embodiment: str = "gpr" - cycle_time: float = 0.4 # Cycle time for sinusoidal command input - action_scale: float = 0.25 # Scale for actions - lin_vel_scale: float = 2.0 # Scale for linear velocity - ang_vel_scale: float = 1.0 # Scale for angular velocity - quat_scale: float = 1.0 # Scale for quaternion - dof_pos_scale: float = 1.0 # Scale for joint positions - dof_vel_scale: float = 0.05 # Scale for joint velocities - frame_stack: int = 15 # Number of frames to stack for the policy input - clip_observations: float = 18.0 # Clip observations to this value - clip_actions: float = 18.0 # Clip actions to this value + embodiment: str + cycle_time: float # Cycle time for sinusoidal command input + action_scale: float # Scale for actions + lin_vel_scale: float # Scale for linear velocity + ang_vel_scale: float # Scale for angular velocity + quat_scale: float # Scale for quaternion + dof_pos_scale: float # Scale for joint positions + dof_vel_scale: float # Scale for joint velocities + frame_stack: int # Number of frames to stack for the policy input + clip_observations: float # Clip observations to this value + clip_actions: float # Clip actions to this value + sim_dt: float # Simulation time step + sim_decimation: int # Simulation decimation + tau_factor: float # Torque limit factor class ActorCritic(nn.Module): @@ -242,6 +245,8 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T num_actions = a_model.num_actions num_observations = a_model.num_observations + default_standing = list(a_model.robot.default_standing().values()) + return ( a_model, { @@ -250,6 +255,7 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T "robot_damping": robot_damping, "num_actions": num_actions, "num_observations": num_observations, + "default_standing": default_standing, }, input_tensors, ) @@ -338,7 +344,3 @@ def convert_model_to_onnx(model_path: str, cfg: ActorCfg, save_path: Optional[st buffer2.seek(0) return ort.InferenceSession(buffer2.read()) - - -if __name__ == "__main__": - convert_model_to_onnx("model_3000.pt", ActorCfg(), "policy.onnx") diff --git a/sim/play.py b/sim/play.py index 9abe511..04465dd 100755 --- a/sim/play.py +++ b/sim/play.py @@ -21,15 +21,11 @@ from kinfer.export.pytorch import export_to_onnx from tqdm import tqdm -# Local imports third from sim.env import run_dir # noqa: E402 from sim.envs import task_registry # noqa: E402 -from sim.h5_logger import HDF5Logger -from sim.model_export import ( # noqa: E402 - ActorCfg, - convert_model_to_onnx, - get_actor_policy, -) + +# Local imports third +from sim.model_export import ActorCfg, get_actor_policy from sim.utils.helpers import get_args # noqa: E402 from sim.utils.logger import Logger # noqa: E402 @@ -92,15 +88,22 @@ def play(args: argparse.Namespace) -> None: if args.export_onnx: path = ppo_runner.load_path embodiment = ppo_runner.cfg["experiment_name"].lower() - policy_cfg = ActorCfg(embodiment=embodiment) - - if embodiment == "stompypro": - policy_cfg.cycle_time = 0.4 - elif embodiment == "stompymicro": - policy_cfg.cycle_time = 0.2 - else: - print(f"Specific policy cfg for {embodiment} not implemented") - + policy_cfg = ActorCfg( + embodiment=embodiment, + cycle_time=env_cfg.rewards.cycle_time, + sim_dt=env_cfg.sim.dt, + sim_decimation=env_cfg.control.decimation, + tau_factor=env_cfg.safety.torque_limit, + action_scale=env_cfg.control.action_scale, + lin_vel_scale=env_cfg.normalization.obs_scales.lin_vel, + ang_vel_scale=env_cfg.normalization.obs_scales.ang_vel, + quat_scale=env_cfg.normalization.obs_scales.quat, + dof_pos_scale=env_cfg.normalization.obs_scales.dof_pos, + dof_vel_scale=env_cfg.normalization.obs_scales.dof_vel, + frame_stack=env_cfg.env.frame_stack, + clip_observations=env_cfg.normalization.clip_observations, + clip_actions=env_cfg.normalization.clip_actions, + ) actor_model, sim2sim_info, input_tensors = get_actor_policy(path, policy_cfg) # Merge policy_cfg and sim2sim_info into a single config object @@ -117,6 +120,8 @@ def play(args: argparse.Namespace) -> None: now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") if args.log_h5: + from sim.h5_logger import HDF5Logger + # Create directory for HDF5 files h5_dir = run_dir() / "h5_out" / args.task / now h5_dir.mkdir(parents=True, exist_ok=True) diff --git a/sim/play_old.py b/sim/play_old.py deleted file mode 100755 index 6747abb..0000000 --- a/sim/play_old.py +++ /dev/null @@ -1,232 +0,0 @@ -# mypy: ignore-errors -"""Play a trained policy in the environment. - -Run: - python sim/play.py --task g1 --log_h5 - python sim/play.py --task stompymini --log_h5 -""" - -import argparse -import copy -import logging -import os -from datetime import datetime -from typing import Any, Union - -import cv2 -import h5py -import numpy as np -from isaacgym import gymapi -from tqdm import tqdm - -logger = logging.getLogger(__name__) - -from sim.env import run_dir # noqa: E402 -from sim.envs import task_registry # noqa: E402 -from sim.model_export import convert_model_to_onnx # noqa: E402 -from sim.utils.helpers import get_args # noqa: E402 -from sim.utils.logger import Logger # noqa: E402 - -import torch # isort: skip - - -def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None: - os.makedirs(path, exist_ok=True) - path = os.path.join(path, "policy_1.pt") - model = copy.deepcopy(actor_critic.actor).to("cpu") - traced_script_module = torch.jit.script(model) - traced_script_module.save(path) - - -def play(args: argparse.Namespace) -> None: - logger.info("Configuring environment and training settings...") - env_cfg, train_cfg = task_registry.get_cfgs(name=args.task) - - # override some parameters for testing - env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1) - env_cfg.sim.max_gpu_contact_pairs = 2**10 - if args.trimesh: - env_cfg.terrain.mesh_type = "trimesh" - else: - env_cfg.terrain.mesh_type = "plane" - env_cfg.terrain.num_rows = 5 - env_cfg.terrain.num_cols = 5 - env_cfg.terrain.curriculum = False - env_cfg.terrain.max_init_terrain_level = 5 - env_cfg.noise.add_noise = True - env_cfg.domain_rand.push_robots = False - env_cfg.domain_rand.joint_angle_noise = 0.01 - env_cfg.noise.curriculum = False - env_cfg.noise.noise_level = 0.5 - - train_cfg.seed = 123145 - logger.info("train_cfg.runner_class_name: %s", train_cfg.runner_class_name) - - # prepare environment - env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg) - env.set_camera(env_cfg.viewer.pos, env_cfg.viewer.lookat) - - obs = env.get_observations() - - # load policy - train_cfg.runner.resume = True - ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg) - policy = ppo_runner.get_inference_policy(device=env.device) - - # Prepare for logging - env_logger = Logger(env.dt) - robot_index = 0 - joint_index = 1 - stop_state_log = 1000 - now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - if args.log_h5: - h5_file = h5py.File(f"data{now}.h5", "w") - - # Create dataset for actions - max_timesteps = stop_state_log - num_dof = env.num_dof - dset_actions = h5_file.create_dataset("actions", (max_timesteps, num_dof), dtype=np.float32) - - # Create dataset of observations - buf_len = len(env.obs_history) # length of observation buffer - dset_2D_command = h5_file.create_dataset( - "observations/2D_command", (max_timesteps, buf_len, 2), dtype=np.float32 - ) # sin and cos commands - dset_3D_command = h5_file.create_dataset( - "observations/3D_command", (max_timesteps, buf_len, 3), dtype=np.float32 - ) # x, y, yaw commands - dset_q = h5_file.create_dataset( - "observations/q", (max_timesteps, buf_len, num_dof), dtype=np.float32 - ) # joint positions - dset_dq = h5_file.create_dataset( - "observations/dq", (max_timesteps, buf_len, num_dof), dtype=np.float32 - ) # joint velocities - dset_obs_actions = h5_file.create_dataset( - "observations/actions", (max_timesteps, buf_len, num_dof), dtype=np.float32 - ) # actions - dset_ang_vel = h5_file.create_dataset( - "observations/ang_vel", (max_timesteps, buf_len, 3), dtype=np.float32 - ) # root angular velocity - dset_euler = h5_file.create_dataset( - "observations/euler", (max_timesteps, buf_len, 3), dtype=np.float32 - ) # root orientation - - if args.render: - camera_properties = gymapi.CameraProperties() - camera_properties.width = 1920 - camera_properties.height = 1080 - h1 = env.gym.create_camera_sensor(env.envs[0], camera_properties) - camera_offset = gymapi.Vec3(3, -3, 1) - camera_rotation = gymapi.Quat.from_axis_angle(gymapi.Vec3(-0.3, 0.2, 1), np.deg2rad(135)) - actor_handle = env.gym.get_actor_handle(env.envs[0], 0) - body_handle = env.gym.get_actor_rigid_body_handle(env.envs[0], actor_handle, 0) - logger.info("body_handle: %s", body_handle) - logger.info("actor_handle: %s", actor_handle) - env.gym.attach_camera_to_body( - h1, env.envs[0], body_handle, gymapi.Transform(camera_offset, camera_rotation), gymapi.FOLLOW_POSITION - ) - - fourcc = cv2.VideoWriter_fourcc(*"MJPG") # type: ignore[attr-defined] - - # Creates a directory to store videos. - video_dir = run_dir() / "videos" - experiment_dir = video_dir / train_cfg.runner.experiment_name - experiment_dir.mkdir(parents=True, exist_ok=True) - - dir = os.path.join(experiment_dir, now + str(args.run_name) + ".mp4") - if not os.path.exists(video_dir): - os.mkdir(video_dir) - if not os.path.exists(experiment_dir): - os.mkdir(experiment_dir) - video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080)) - - for t in tqdm(range(stop_state_log)): - actions = policy(obs.detach()) - if args.log_h5: - dset_actions[t] = actions.detach().numpy() - if args.fix_command: - env.commands[:, 0] = 0.5 - env.commands[:, 1] = 0.0 - env.commands[:, 2] = 0.0 - env.commands[:, 3] = 0.0 - obs, critic_obs, rews, dones, infos = env.step(actions.detach()) - print(f"IMU: {obs[0, (3 * env.num_actions + 5) + 3 : (3 * env.num_actions + 5) + 2 * 3]}") - - if args.render: - env.gym.fetch_results(env.sim, True) - env.gym.step_graphics(env.sim) - env.gym.render_all_camera_sensors(env.sim) - img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR) - img = np.reshape(img, (1080, 1920, 4)) - img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR) - - video.write(img[..., :3]) - - # Log states - dof_pos_target = actions[robot_index, joint_index].item() * env.cfg.control.action_scale - dof_pos = env.dof_pos[robot_index, joint_index].item() - dof_vel = env.dof_vel[robot_index, joint_index].item() - dof_torque = env.torques[robot_index, joint_index].item() - command_x = env.commands[robot_index, 0].item() - command_y = env.commands[robot_index, 1].item() - command_yaw = env.commands[robot_index, 2].item() - base_vel_x = env.base_lin_vel[robot_index, 0].item() - base_vel_y = env.base_lin_vel[robot_index, 1].item() - base_vel_z = env.base_lin_vel[robot_index, 2].item() - base_vel_yaw = env.base_ang_vel[robot_index, 2].item() - contact_forces_z = env.contact_forces[robot_index, env.feet_indices, 2].cpu().numpy() - - if args.log_h5: - for i in range(buf_len): - cur_obs = env.obs_history[i].tolist()[0] - dset_2D_command[t, i] = cur_obs[0:2] # sin and cos commands - dset_3D_command[t, i] = cur_obs[2:5] # x, y, yaw commands - dset_q[t, i] = cur_obs[5 : 5 + num_dof] # joint positions - dset_dq[t, i] = cur_obs[5 + num_dof : 5 + 2 * num_dof] # joint velocities - dset_obs_actions[t, i] = cur_obs[5 + 2 * num_dof : 5 + 3 * num_dof] # actions - dset_ang_vel[t, i] = cur_obs[5 + 3 * num_dof : 8 + 3 * num_dof] # root angular velocity - dset_euler[t, i] = cur_obs[8 + 3 * num_dof : 11 + 3 * num_dof] # root orientation - - env_logger.log_states( - { - "dof_pos_target": dof_pos_target, - "dof_pos": dof_pos, - "dof_vel": dof_vel, - "dof_torque": dof_torque, - "command_x": command_x, - "command_y": command_y, - "command_yaw": command_yaw, - "base_vel_x": base_vel_x, - "base_vel_y": base_vel_y, - "base_vel_z": base_vel_z, - "base_vel_yaw": base_vel_yaw, - "contact_forces_z": contact_forces_z, - } - ) - if infos["episode"]: - num_episodes = env.reset_buf.sum().item() - if num_episodes > 0: - env_logger.log_rewards(infos["episode"], num_episodes) - - env_logger.print_rewards() - env_logger.plot_states() - - if args.render: - video.release() - - if args.log_h5: - print("Saving data to " + os.path.abspath(f"data{now}.h5")) - h5_file.close() - - -if __name__ == "__main__": - base_args = get_args() - parser = argparse.ArgumentParser(description="Extend base arguments with log_h5") - parser.add_argument("--log_h5", action="store_true", help="Enable HDF5 logging") - parser.add_argument("--render", action="store_true", help="Enable rendering", default=True) - parser.add_argument("--fix_command", action="store_true", help="Fix command", default=True) - parser.add_argument("--export_onnx", action="store_true", help="Export policy as ONNX", default=True) - parser.add_argument("--export_policy", action="store_true", help="Export policy as JIT", default=True) - args, unknown = parser.parse_known_args(namespace=base_args) - - play(args) diff --git a/sim/requirements-dev.txt b/sim/requirements-dev.txt index c55c16b..3633b81 100755 --- a/sim/requirements-dev.txt +++ b/sim/requirements-dev.txt @@ -9,7 +9,7 @@ mediapy==1.2.2 mujoco==2.3.6 mujoco_python_viewer==0.1.4 onnx==1.15.0 -onnxruntime +onnxruntime==1.15.0 pandas==1.4.4 Pillow>6.2.0 poselib==2.0.4 diff --git a/sim/requirements.txt b/sim/requirements.txt index ab18cb3..a4bf776 100755 --- a/sim/requirements.txt +++ b/sim/requirements.txt @@ -6,7 +6,7 @@ python-dotenv h5py gdown matplotlib==3.3.4 -numpy==1.20.0 +numpy==1.21.6 wandb tensorboard==2.14.0 onnxscript diff --git a/sim/resources/gpr/joints.py b/sim/resources/gpr/joints.py index 149b7f3..a5b6472 100755 --- a/sim/resources/gpr/joints.py +++ b/sim/resources/gpr/joints.py @@ -63,35 +63,15 @@ class RightLeg(Node): ankle_pitch = "R_ankle_y" -class LeftArm(Node): - shoulder_pitch = "L_shoulder_y" - shoulder_roll = "L_shoulder_z" - shoulder_yaw = "L_shoulder_x" - elbow_pitch = "L_elbow_x" - - -class RightArm(Node): - shoulder_pitch = "R_shoulder_y" - shoulder_roll = "R_shoulder_z" - shoulder_yaw = "R_shoulder_x" - elbow_pitch = "R_elbow_x" - - class Legs(Node): left = LeftLeg() right = RightLeg() -class Arms(Node): - left = LeftArm() - right = RightArm() - - class Robot(Node): legs = Legs() - # arms = Arms() - height = 1.06 + height = 1.05 rotation = [0, 0, 0, 1] @classmethod @@ -178,10 +158,10 @@ def damping(cls) -> Dict[str, float]: @classmethod def effort(cls) -> Dict[str, float]: return { - "hip_y": 120, - "hip_x": 60, - "hip_z": 60, - "knee": 120, + "hip_y": 40, + "hip_x": 20, + "hip_z": 20, + "knee": 40, "ankle_y": 17, } diff --git a/sim/resources/gpr/robot_fixed.urdf b/sim/resources/gpr/robot_fixed.urdf index 1ff7bb0..c1e2aef 100644 --- a/sim/resources/gpr/robot_fixed.urdf +++ b/sim/resources/gpr/robot_fixed.urdf @@ -95,7 +95,7 @@ - + @@ -124,7 +124,7 @@ - + @@ -211,7 +211,7 @@ - + @@ -240,7 +240,7 @@ - + @@ -327,7 +327,7 @@ - + @@ -356,7 +356,7 @@ - + @@ -443,7 +443,7 @@ - + @@ -472,7 +472,7 @@ - + diff --git a/sim/resources/gpr/robot_fixed.xml b/sim/resources/gpr/robot_fixed.xml index 5733094..9349bdd 100644 --- a/sim/resources/gpr/robot_fixed.xml +++ b/sim/resources/gpr/robot_fixed.xml @@ -188,6 +188,6 @@ - + \ No newline at end of file diff --git a/sim/sim2sim.py b/sim/sim2sim.py index b512952..82ce113 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -19,21 +19,9 @@ import torch from kinfer.export.pytorch import export_to_onnx from kinfer.inference.python import ONNXModel -from model_export import ActorCfg, get_actor_policy from scipy.spatial.transform import Rotation as R from tqdm import tqdm -from sim.h5_logger import HDF5Logger - - -@dataclass -class Sim2simCfg: - sim_duration: float = 60.0 - dt: float = 0.001 - decimation: int = 10 - tau_factor: float = 3 - cycle_time: float = 0.25 - def handle_keyboard_input() -> None: global x_vel_cmd, y_vel_cmd, yaw_vel_cmd @@ -107,11 +95,11 @@ def pd_control( def run_mujoco( embodiment: str, policy: ort.InferenceSession, - cfg: Sim2simCfg, model_info: Dict[str, Union[float, List[float], str]], keyboard_use: bool = False, log_h5: bool = False, render: bool = True, + sim_duration: float = 60.0, h5_out_dir: str = "sim/resources", ) -> None: """ @@ -120,15 +108,12 @@ def run_mujoco( Args: policy: The policy used for controlling the simulation. cfg: The configuration object containing simulation settings. - - Returns: - None """ model_dir = os.environ.get("MODEL_DIR", "sim/resources") mujoco_model_path = f"{model_dir}/{embodiment}/robot_fixed.xml" model = mujoco.MjModel.from_xml_path(mujoco_model_path) - model.opt.timestep = cfg.dt + model.opt.timestep = model_info["sim_dt"] data = mujoco.MjData(model) assert isinstance(model_info["num_actions"], int) @@ -137,7 +122,7 @@ def run_mujoco( assert isinstance(model_info["robot_stiffness"], list) assert isinstance(model_info["robot_damping"], list) - tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * cfg.tau_factor + tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * model_info["tau_factor"] kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"])) kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"])) @@ -180,7 +165,9 @@ def run_mujoco( } if log_h5: - stop_state_log = int(cfg.sim_duration / cfg.dt) / cfg.decimation + from sim.h5_logger import HDF5Logger + + stop_state_log = int(sim_duration / model_info["sim_dt"]) / model_info["sim_decimation"] logger = HDF5Logger( data_name=embodiment, num_actions=model_info["num_actions"], @@ -194,7 +181,7 @@ def run_mujoco( total_speed = 0.0 step_count = 0 - for _ in tqdm(range(int(cfg.sim_duration / cfg.dt)), desc="Simulating..."): + for _ in tqdm(range(int(sim_duration / model_info["sim_dt"])), desc="Simulating..."): if keyboard_use: handle_keyboard_input() @@ -219,7 +206,7 @@ def run_mujoco( step_count += 1 # 1000hz -> 50hz - if count_lowlevel % cfg.decimation == 0: + if count_lowlevel % model_info["sim_decimation"] == 0: # Convert sim coordinates to policy coordinates cur_pos_obs = q - default cur_vel_obs = dq @@ -228,7 +215,7 @@ def run_mujoco( input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32) input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32) - input_data["t.1"] = np.array([count_lowlevel * cfg.dt], dtype=np.float32) + input_data["t.1"] = np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32) input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32) input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32) @@ -250,11 +237,11 @@ def run_mujoco( if log_h5: logger.log_data( { - "t": np.array([count_lowlevel * cfg.dt], dtype=np.float32), + "t": np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32), "2D_command": np.array( [ - np.sin(2 * math.pi * count_lowlevel * cfg.dt / cfg.cycle_time), - np.cos(2 * math.pi * count_lowlevel * cfg.dt / cfg.cycle_time), + np.sin(2 * math.pi * count_lowlevel * model_info["sim_dt"] / model_info["cycle_time"]), + np.cos(2 * math.pi * count_lowlevel * model_info["sim_dt"] / model_info["cycle_time"]), ], dtype=np.float32, ), @@ -317,54 +304,25 @@ def run_mujoco( else: x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.2, 0.0, 0.0 - policy_cfg = ActorCfg(embodiment=args.embodiment) - if args.embodiment == "gpr": - policy_cfg.cycle_time = 0.25 - cfg = Sim2simCfg( - sim_duration=10.0, - dt=0.001, - decimation=10, - tau_factor=4.0, - cycle_time=policy_cfg.cycle_time, - ) - elif args.embodiment == "zeroth": - policy_cfg.cycle_time = 0.2 - cfg = Sim2simCfg( - sim_duration=10.0, - dt=0.001, - decimation=10, - tau_factor=2, - cycle_time=policy_cfg.cycle_time, - ) - - if args.load_model.endswith(".kinfer"): - policy = ONNXModel(args.load_model) - else: - actor_model, sim2sim_info, input_tensors = get_actor_policy(args.load_model, policy_cfg) - - # Merge policy_cfg and sim2sim_info into a single config object - export_config = {**vars(policy_cfg), **sim2sim_info} - print(export_config) - export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_test.onnx") - policy = ONNXModel("kinfer_test.onnx") - + policy = ONNXModel(args.load_model) metadata = policy.get_metadata() - model_info = { "num_actions": metadata["num_actions"], "num_observations": metadata["num_observations"], "robot_effort": metadata["robot_effort"], "robot_stiffness": metadata["robot_stiffness"], "robot_damping": metadata["robot_damping"], + "sim_dt": metadata["sim_dt"], + "sim_decimation": metadata["sim_decimation"], + "tau_factor": metadata["tau_factor"], } run_mujoco( - args.embodiment, - policy, - cfg, - model_info, - args.keyboard_use, - args.log_h5, - args.render, - args.h5_out_dir, + embodiment=args.embodiment, + policy=policy, + model_info=model_info, + keyboard_use=args.keyboard_use, + log_h5=args.log_h5, + render=args.render, + h5_out_dir=args.h5_out_dir, ) diff --git a/sim/sim2sim_old.py b/sim/sim2sim_old.py deleted file mode 100755 index b887e3f..0000000 --- a/sim/sim2sim_old.py +++ /dev/null @@ -1,345 +0,0 @@ -""" Sim2sim script for running a policy in MuJoCo. - -Run: - python sim/sim2sim.py --load_model examples/standing_pro.pt --embodiment gpr - python sim/sim2sim.py --load_model examples/standing_micro.pt --embodiment zeroth -""" - -import argparse -import math -import os -from copy import deepcopy -from dataclasses import dataclass -from typing import Dict, List, Tuple, Union - -import mujoco -import mujoco_viewer -import numpy as np -import onnxruntime as ort -import pygame -from scipy.spatial.transform import Rotation as R -from tqdm import tqdm - -from sim.model_export import ActorCfg, convert_model_to_onnx - - -@dataclass -class Sim2simCfg: - sim_duration: float = 60.0 - dt: float = 0.001 - decimation: int = 10 - tau_factor: float = 3 - cycle_time: float = 0.4 - - -def handle_keyboard_input() -> None: - global x_vel_cmd, y_vel_cmd, yaw_vel_cmd - - keys = pygame.key.get_pressed() - - # Update movement commands based on arrow keys - if keys[pygame.K_UP]: - x_vel_cmd += 0.0005 - if keys[pygame.K_DOWN]: - x_vel_cmd -= 0.0005 - if keys[pygame.K_LEFT]: - y_vel_cmd += 0.0005 - if keys[pygame.K_RIGHT]: - y_vel_cmd -= 0.0005 - - # Yaw control - if keys[pygame.K_a]: - yaw_vel_cmd += 0.001 - if keys[pygame.K_z]: - yaw_vel_cmd -= 0.001 - - -def quaternion_to_euler_array(quat: np.ndarray) -> np.ndarray: - # Ensure quaternion is in the correct format [x, y, z, w] - x, y, z, w = quat - - # Roll (x-axis rotation) - t0 = +2.0 * (w * x + y * z) - t1 = +1.0 - 2.0 * (x * x + y * y) - roll_x = np.arctan2(t0, t1) - - # Pitch (y-axis rotation) - t2 = +2.0 * (w * y - z * x) - t2 = np.clip(t2, -1.0, 1.0) - pitch_y = np.arcsin(t2) - - # Yaw (z-axis rotation) - t3 = +2.0 * (w * z + x * y) - t4 = +1.0 - 2.0 * (y * y + z * z) - yaw_z = np.arctan2(t3, t4) - - # Returns roll, pitch, yaw in a NumPy array in radians - return np.array([roll_x, pitch_y, yaw_z]) - - -def get_obs(data: mujoco.MjData) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - """Extracts an observation from the mujoco data structure.""" - q = data.qpos.astype(np.double) - dq = data.qvel.astype(np.double) - quat = data.sensor("orientation").data[[1, 2, 3, 0]].astype(np.double) - r = R.from_quat(quat) - v = r.apply(data.qvel[:3], inverse=True).astype(np.double) # In the base frame - omega = data.sensor("angular-velocity").data.astype(np.double) - gvec = r.apply(np.array([0.0, 0.0, -1.0]), inverse=True).astype(np.double) - return (q, dq, quat, v, omega, gvec) - - -def pd_control( - target_q: np.ndarray, - q: np.ndarray, - kp: np.ndarray, - dq: np.ndarray, - kd: np.ndarray, - default: np.ndarray, -) -> np.ndarray: - """Calculates torques from position commands.""" - return kp * (target_q + default - q) - kd * dq - - -def run_mujoco( - embodiment: str, - policy: ort.InferenceSession, - cfg: Sim2simCfg, - model_info: Dict[str, Union[float, List[float], str]], - keyboard_use: bool = False, - log_h5: bool = False, - render: bool = True, - h5_out_dir: str = "sim/resources", -) -> None: - """Run the Mujoco simulation using the provided policy and configuration. - - Args: - policy: The policy used for controlling the simulation. - cfg: The configuration object containing simulation settings. - model_info: The model information dictionary. - keyboard_use: Whether to use keyboard input. - log_h5: Whether to log data to HDF5. - render: Whether to render the simulation. - h5_out_dir: The directory to save HDF5 files. - - Returns: - None - """ - model_dir = os.environ.get("MODEL_DIR", "sim/resources") - mujoco_model_path = f"{model_dir}/{embodiment}/robot_fixed.xml" - - model = mujoco.MjModel.from_xml_path(mujoco_model_path) - model.opt.timestep = cfg.dt - data = mujoco.MjData(model) - - assert isinstance(model_info["num_actions"], int) - assert isinstance(model_info["num_observations"], int) - assert isinstance(model_info["robot_effort"], list) - assert isinstance(model_info["robot_stiffness"], list) - assert isinstance(model_info["robot_damping"], list) - - tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * cfg.tau_factor - kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"])) - kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"])) - - try: - data.qpos = model.keyframe("default").qpos - default = deepcopy(model.keyframe("default").qpos)[-model_info["num_actions"] :] - print("Default position:", default) - except: - print("No default position found, using zero initialization") - default = np.zeros(model_info["num_actions"]) # 3 for pos, 4 for quat, cfg.num_actions for joints - default += np.random.uniform(-0.03, 0.03, size=default.shape) - print("Default position:", default) - mujoco.mj_step(model, data) - for ii in range(len(data.ctrl) + 1): - print(data.joint(ii).id, data.joint(ii).name) - - data.qvel = np.zeros_like(data.qvel) - data.qacc = np.zeros_like(data.qacc) - - if render: - viewer = mujoco_viewer.MujocoViewer(model, data) - - target_q = np.zeros((model_info["num_actions"]), dtype=np.double) - prev_actions = np.zeros((model_info["num_actions"]), dtype=np.double) - hist_obs = np.zeros((model_info["num_observations"]), dtype=np.double) - - count_lowlevel = 0 - - input_data = { - "x_vel.1": np.zeros(1).astype(np.float32), - "y_vel.1": np.zeros(1).astype(np.float32), - "rot.1": np.zeros(1).astype(np.float32), - "t.1": np.zeros(1).astype(np.float32), - "dof_pos.1": np.zeros(model_info["num_actions"]).astype(np.float32), - "dof_vel.1": np.zeros(model_info["num_actions"]).astype(np.float32), - "prev_actions.1": np.zeros(model_info["num_actions"]).astype(np.float32), - "imu_ang_vel.1": np.zeros(3).astype(np.float32), - "imu_euler_xyz.1": np.zeros(3).astype(np.float32), - "buffer.1": np.zeros(model_info["num_observations"]).astype(np.float32), - } - - # Initialize variables for tracking upright steps and average speed - upright_steps = 0 - total_speed = 0.0 - step_count = 0 - - for _ in tqdm(range(int(cfg.sim_duration / cfg.dt)), desc="Simulating..."): - if keyboard_use: - handle_keyboard_input() - - # Obtain an observation - q, dq, quat, v, omega, gvec = get_obs(data) - q = q[-model_info["num_actions"] :] - dq = dq[-model_info["num_actions"] :] - - eu_ang = quaternion_to_euler_array(quat) - eu_ang[eu_ang > math.pi] -= 2 * math.pi - - # Check if the robot is upright (roll and pitch within ±30 degrees) - if abs(eu_ang[0]) > math.radians(30) or abs(eu_ang[1]) > math.radians(30): - print("Robot tilted heavily, ending simulation.") - break - else: - upright_steps += 1 # Increment upright steps - - # Calculate speed and accumulate for average speed calculation - speed = np.linalg.norm(v[:2]) # Speed in the x-y plane - total_speed += speed - step_count += 1 - - # 1000hz -> Nhz - if count_lowlevel % cfg.decimation == 0: - # Convert sim coordinates to policy coordinates - cur_pos_obs = q - default - cur_vel_obs = dq - - input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32) - input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32) - input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32) - - input_data["t.1"] = np.array([count_lowlevel * cfg.dt], dtype=np.float32) - - input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32) - input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32) - - input_data["prev_actions.1"] = prev_actions.astype(np.float32) - - input_data["imu_ang_vel.1"] = omega.astype(np.float32) - input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32) - - input_data["buffer.1"] = hist_obs.astype(np.float32) - - positions, curr_actions, hist_obs = policy.run(None, input_data) - target_q = positions - prev_actions = curr_actions - - # Generate PD control - tau = pd_control(target_q, q, kps, dq, kds, default) # Calc torques - tau = np.clip(tau, -tau_limit, tau_limit) # Clamp torques - - data.ctrl = tau - mujoco.mj_step(model, data) - - if render: - viewer.render() - count_lowlevel += 1 - - if render: - viewer.close() - - # Calculate average speed - if step_count > 0: - average_speed = total_speed / step_count - else: - average_speed = 0.0 - - # Save or print the statistics at the end of the episode - print(f"Number of upright steps: {upright_steps}") - print(f"Average speed: {average_speed:.4f} m/s") - - -def parse_modelmeta( - modelmeta: List[Tuple[str, str]], - verbose: bool = False, -) -> Dict[str, Union[float, List[float], str]]: - parsed_meta: Dict[str, Union[float, List[float], str]] = {} - for key, value in modelmeta: - if value.startswith("[") and value.endswith("]"): - parsed_meta[key] = list(map(float, value.strip("[]").split(","))) - else: - try: - parsed_meta[key] = float(value) - try: - if int(value) == parsed_meta[key]: - parsed_meta[key] = int(value) - except ValueError: - pass - except ValueError: - print(f"Failed to convert {value} to float") - parsed_meta[key] = value - if verbose: - for key, value in parsed_meta.items(): - print(f"{key}: {value}") - return parsed_meta - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Deployment script.") - parser.add_argument("--embodiment", type=str, required=True, help="Embodiment name.") - parser.add_argument("--load_model", type=str, required=True, help="Path to run to load from.") - parser.add_argument("--keyboard_use", action="store_true", help="keyboard_use") - parser.add_argument("--log_h5", action="store_true", help="log_h5") - parser.add_argument("--h5_out_dir", type=str, default="sim/resources", help="Directory to save HDF5 files") - parser.add_argument("--no_render", action="store_false", dest="render", help="Disable rendering") - parser.set_defaults(render=True) - args = parser.parse_args() - - if args.keyboard_use: - x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.0, 0.0, 0.0 - pygame.init() - pygame.display.set_caption("Simulation Control") - else: - x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.4, 0.0, 0.0 - - policy_cfg = ActorCfg(embodiment=args.embodiment) - if args.embodiment == "gpr": - policy_cfg.cycle_time = 0.64 - cfg = Sim2simCfg( - sim_duration=10.0, - dt=0.001, - decimation=10, - tau_factor=4.0, - cycle_time=policy_cfg.cycle_time, - ) - elif args.embodiment == "zeroth": - policy_cfg.cycle_time = 0.2 - cfg = Sim2simCfg( - sim_duration=10.0, - dt=0.001, - decimation=10, - tau_factor=1, - cycle_time=policy_cfg.cycle_time, - ) - - if args.load_model.endswith(".onnx"): - policy = ort.InferenceSession(args.load_model) - else: - policy = convert_model_to_onnx(args.load_model, policy_cfg, save_path="policy.onnx") - - model_info = parse_modelmeta( - policy.get_modelmeta().custom_metadata_map.items(), - verbose=True, - ) - - run_mujoco( - args.embodiment, - policy, - cfg, - model_info, - args.keyboard_use, - args.log_h5, - args.render, - args.h5_out_dir, - )