From c9c97fb7b198099550d71aa70f28598014146979 Mon Sep 17 00:00:00 2001 From: JX <65676392+jingxiangmo@users.noreply.github.com> Date: Fri, 4 Oct 2024 12:47:27 -0400 Subject: [PATCH] Revert "sim2sim" --- sim/algo/ppo/on_policy_runner.py | 4 +- sim/envs/__init__.py | 20 +-- sim/envs/base/base_config.py | 31 ---- sim/envs/humanoids/stompymicro_config.py | 100 ++++--------- sim/play.py | 3 +- sim/requirements.txt | 13 +- sim/resources/stompymicro/joints.py | 58 ++++---- sim/resources/stompymicro/robot_fixed.xml | 163 ---------------------- sim/sim2sim.py | 20 +-- sim/train.py | 2 +- 10 files changed, 68 insertions(+), 346 deletions(-) delete mode 100644 sim/resources/stompymicro/robot_fixed.xml diff --git a/sim/algo/ppo/on_policy_runner.py b/sim/algo/ppo/on_policy_runner.py index 764bcff0..e3fb6e1c 100755 --- a/sim/algo/ppo/on_policy_runner.py +++ b/sim/algo/ppo/on_policy_runner.py @@ -92,11 +92,11 @@ def __init__(self, env: VecEnv, train_cfg: dict, log_dir: Optional[str] = None, _, _ = self.env.reset() - def learn(self, name: str, num_learning_iterations: int, init_at_random_ep_len: bool = False) -> None: + def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = False) -> None: # initialize writer if self.log_dir is not None and self.writer is None: wandb.init( - project=name.split('_')[0], + project="XBot", sync_tensorboard=True, name=self.wandb_run_name, config=self.all_cfg, diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py index d1295565..16478119 100755 --- a/sim/envs/__init__.py +++ b/sim/envs/__init__.py @@ -14,7 +14,7 @@ from sim.envs.humanoids.g1_env import G1FreeEnv from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO from sim.envs.humanoids.h1_env import H1FreeEnv -from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroStandingCfg, StompyMicroWalkingCfg, StompyMicroCfgPPO +from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO from sim.envs.humanoids.stompymicro_env import StompyMicroEnv from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO from sim.envs.humanoids.stompymini_env import MiniFreeEnv @@ -25,27 +25,11 @@ from sim.utils.task_registry import TaskRegistry # noqa: E402 task_registry = TaskRegistry() - -# Stompy -## Mini task_registry.register("stompymini", MiniFreeEnv, MiniCfg(), MiniCfgPPO()) -## Pro task_registry.register("stompypro", StompyProFreeEnv, StompyProCfg(), StompyProCfgPPO()) task_registry.register("stompypro_standing", StompyProFreeEnv, StompyProStandingCfg(), StompyProCfgPPO()) -## Micro -task_registry.register("stompymicro", StompyMicroEnv, StompyMicroCfg(), StompyMicroCfgPPO()) -task_registry.register("stompymicro_standing", StompyMicroEnv, StompyMicroStandingCfg() -, StompyMicroCfgPPO()) -task_registry.register("stompymicro_walking", StompyMicroEnv, StompyMicroWalkingCfg(), StompyMicroCfgPPO()) - -# Dora task_registry.register("dora", DoraFreeEnv, DoraCfg(), DoraCfgPPO()) - -# Unitree -## h1 task_registry.register("h1", H1FreeEnv, H1Cfg(), H1CfgPPO()) -## g1 task_registry.register("g1", G1FreeEnv, G1Cfg(), G1CfgPPO()) - -# XBotL task_registry.register("XBotL_free", XBotLFreeEnv, XBotCfg(), XBotCfgPPO()) +task_registry.register("stompymicro", StompyMicroEnv, StompyMicroCfg(), StompyMicroCfgPPO()) diff --git a/sim/envs/base/base_config.py b/sim/envs/base/base_config.py index 8e6aa6d0..f5a2dd1f 100644 --- a/sim/envs/base/base_config.py +++ b/sim/envs/base/base_config.py @@ -55,34 +55,3 @@ def init_member_classes(obj): setattr(obj, key, i_var) # recursively init members of the attribute BaseConfig.init_member_classes(i_var) - - def __str__(self): - def format_value(value, indent=0): - if isinstance(value, (int, float, str, bool)): - return str(value) - elif isinstance(value, list): - return str(value) - elif isinstance(value, dict): - return "\n" + "\n".join( - f"{' ' * (indent + 1)}{k}: {format_value(v, indent + 1)}" for k, v in value.items() - ) - elif hasattr(value, "__dict__"): - return "\n" + "\n".join( - f"{' ' * (indent + 1)}{k}: {format_value(getattr(value, k), indent + 1)}" - for k in dir(value) - if not k.startswith("__") and not callable(getattr(value, k)) - ) - else: - return str(value) - - output = [] - for attr in dir(self): - if not attr.startswith("__") and not callable(getattr(self, attr)): - value = getattr(self, attr) - formatted_value = format_value(value) - output.append(f"{attr}: {formatted_value}") - - return "\n".join(output) - - def __repr__(self): - return self.__str__() diff --git a/sim/envs/humanoids/stompymicro_config.py b/sim/envs/humanoids/stompymicro_config.py index 7705076a..cf471d58 100644 --- a/sim/envs/humanoids/stompymicro_config.py +++ b/sim/envs/humanoids/stompymicro_config.py @@ -1,7 +1,5 @@ """Defines the environment configuration for the Getting up task""" -from typing import Any, Dict - from sim.env import robot_urdf_path from sim.envs.base.legged_robot_config import ( # type: ignore LeggedRobotCfg, @@ -12,29 +10,7 @@ NUM_JOINTS = len(Robot.all_joints()) # 20 -class ConfigMixin: - UPDATES: Dict[str, Any] = {} - - @classmethod - def _update_recursive(cls, base_config, updates): - for key, value in updates.items(): - if isinstance(value, dict) and hasattr(base_config, key): - setattr(base_config, key, - cls._update_recursive(getattr(base_config, key), value)) - else: - setattr(base_config, key, value) - return base_config - - def __new__(cls, updates: Dict[str, Any] = None): - config = super().__new__(cls) - config.__init__() - cls._update_recursive(config, cls.UPDATES) - if updates: - cls._update_recursive(config, updates) - return config - - -class StompyMicroCfg(LeggedRobotCfg, ConfigMixin): +class StompyMicroCfg(LeggedRobotCfg): """Configuration class for the Legs humanoid robot.""" class env(LeggedRobotCfg.env): @@ -99,7 +75,7 @@ class terrain(LeggedRobotCfg.terrain): restitution = 0.0 class noise: - add_noise = True + add_noise = False # pfb30 bring it back noise_level = 0.6 # scales other values class noise_scales: # pfb30 bring it back @@ -152,12 +128,12 @@ class physx(LeggedRobotCfg.sim.physx): class domain_rand(LeggedRobotCfg.domain_rand): start_pos_noise = 0.01 - randomize_friction = True + randomize_friction = False friction_range = [0.1, 2.0] - randomize_base_mass = True + randomize_base_mass = False #True added_mass_range = [-1.0, 1.0] - push_robots = True + push_robots = False #True push_interval_s = 4 max_push_vel_xy = 0.2 max_push_ang_vel = 0.4 @@ -193,6 +169,23 @@ class rewards: max_contact_force = 100 # forces above this value are penalized class scales: + # # reference motion tracking + # joint_pos = 1.6 + # feet_clearance = 1.6 + # feet_contact_number = 1.2 + # feet_air_time = 1.6 + # foot_slip = -0.05 + # feet_distance = 0.2 + # knee_distance = 0.2 + # # contact + # feet_contact_forces = -0.01 + # # vel tracking + # tracking_lin_vel = 1.2 + # tracking_ang_vel = 1.1 + # vel_mismatch_exp = 0.5 # lin_z; ang x,y + # low_speed = 0.2 + # track_vel_hard = 0.5 + # base pos default_joint_pos = 0.5 orientation = 1 @@ -223,50 +216,7 @@ class viewer: lookat = [0, -2, 0] -NO_RAND = { - "domain_rand": { - "add_noise": False, - "randomize_friction": False, - "randomize_base_mass": False, - "push_robots": False, - "action_delay": 0.0, - } -} - - -class StompyMicroStandingCfg(StompyMicroCfg): - """Configuration for StompyMicro standing task.""" - UPDATES = NO_RAND - - -class StompyMicroWalkingCfg(StompyMicroCfg): - """Configuration for StompyMicro walking task.""" - UPDATES = dict({ - "rewards": { - "scales": { - # reference motion tracking - "joint_pos": 1.6, - "feet_clearance": 1.6, - "feet_contact_number": 1.2, - # gait - "feet_air_time": 1.6, - "foot_slip": -0.05, - "feet_distance": 0.2, - "knee_distance": 0.2, - # contact - "feet_contact_forces": -0.01, - # vel tracking - "tracking_lin_vel": 1.2, - "tracking_ang_vel": 1.1, - "vel_mismatch_exp": 0.5, - "low_speed": 0.2, - "track_vel_hard": 0.5, - } - } - }, **NO_RAND) - - -class StompyMicroCfgPPO(LeggedRobotCfgPPO, ConfigMixin): +class StompyMicroCfgPPO(LeggedRobotCfgPPO): seed = 5 runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner @@ -295,6 +245,6 @@ class runner: run_name = "" # load and resume resume = False - load_run = -1 # last run - checkpoint = -1 # last saved model + load_run = -1 # -1 = last run + checkpoint = -1 # -1 = last saved model resume_path = None # updated from load_run and chkpt diff --git a/sim/play.py b/sim/play.py index 5bbe97e3..9f215534 100755 --- a/sim/play.py +++ b/sim/play.py @@ -175,7 +175,8 @@ def play(args: argparse.Namespace) -> None: env.commands[:, 1] = 0.0 env.commands[:, 2] = 0.0 env.commands[:, 3] = 0.0 - obs, _, _, _, infos = env.step(actions.detach()) + obs, critic_obs, rews, dones, infos = env.step(actions.detach()) + print(f"IMU: {obs[0, (3 * env.num_actions + 5) + 3 : (3 * env.num_actions + 5) + 2 * 3]}") if RENDER: env.gym.fetch_results(env.sim, True) diff --git a/sim/requirements.txt b/sim/requirements.txt index 6d82e497..9751be9e 100755 --- a/sim/requirements.txt +++ b/sim/requirements.txt @@ -1,14 +1,13 @@ # requirements.txt -gdown +mediapy +torch +python-dotenv h5py +gdown matplotlib==3.3.4 -mediapy numpy==1.19.5 -opencv-python==4.10.0.84 +wandb +tensorboard==2.14.0 onnx onnxscript -python-dotenv -tensorboard==2.14.0 -torch -wandb diff --git a/sim/resources/stompymicro/joints.py b/sim/resources/stompymicro/joints.py index 4d233f73..e2fcf5fd 100644 --- a/sim/resources/stompymicro/joints.py +++ b/sim/resources/stompymicro/joints.py @@ -64,11 +64,6 @@ class RightArm(Node): elbow_pitch = "right_elbow_yaw" # FIXME: yaw vs pitch -class Arms(Node): - left = LeftArm() - right = RightArm() - - class LeftLeg(Node): hip_roll = "left_hip_roll" hip_yaw = "left_hip_yaw" @@ -91,12 +86,11 @@ class Legs(Node): class Robot(Node): - height = 0.188 - # rotation = [0., 0., -0.7071068, 0.7071068] - rotation = [0., 0., 0.7071068, 0.7071068] - # rotation = [0., 0., 0., 1.] + height = 0.21 + rotation = [0.0, 0.0, 0, 1] - # arms = Arms() + # left_arm = LeftArm() + # right_arm = RightArm() legs = Legs() @classmethod @@ -114,81 +108,81 @@ def default_standing(cls) -> Dict[str, float]: cls.legs.right.knee_pitch: 0, cls.legs.right.ankle_pitch: 0, # Arms (adjust as needed) - # cls.arms.left.shoulder_pitch: 0, - # cls.arms.left.shoulder_yaw: 0.3, - # cls.arms.left.elbow_pitch: -0.6, - # cls.arms.right.shoulder_pitch: 0, - # cls.arms.right.shoulder_yaw: -0.3, - # cls.arms.right.elbow_pitch: -0.6, + # cls.left_arm.shoulder_pitch: 0, + # cls.left_arm.shoulder_yaw: 0.3, + # cls.left_arm.elbow_pitch: -0.6, + # cls.right_arm.shoulder_pitch: 0, + # cls.right_arm.shoulder_yaw: -0.3, + # cls.right_arm.elbow_pitch: -0.6, } @classmethod def default_limits(cls) -> Dict[str, Dict[str, float]]: return { # left arm - cls.arms.left.shoulder_pitch: { + Robot.left_arm.shoulder_pitch: { "lower": 2.04, "upper": 3.06, }, - cls.arms.left.shoulder_yaw: { + Robot.left_arm.shoulder_yaw: { "lower": -1, "upper": 2, }, - cls.arms.left.elbow_pitch: { + Robot.left_arm.elbow_pitch: { "lower": -2.06, "upper": -1.08, }, # right arm - cls.arms.right.shoulder_pitch: { + Robot.right_arm.shoulder_pitch: { "lower": 2.619, "upper": 3.621, }, - cls.arms.right.shoulder_yaw: { + Robot.right_arm.shoulder_yaw: { "lower": -1.481, "upper": 1, }, - cls.arms.right.elbow_pitch: { + Robot.right_arm.elbow_pitch: { "lower": -3.819, "upper": 3.821, }, - cls.legs.left.hip_pitch: { + Robot.legs.left.hip_pitch: { "lower": -1.64, "upper": 1.64, }, - cls.legs.left.hip_roll: { + Robot.legs.left.hip_roll: { "lower": -4.0, "upper": 1.0, }, - cls.legs.left.hip_yaw: { + Robot.legs.left.hip_yaw: { "lower": 2.64, "upper": 5.64, }, - cls.legs.left.knee_pitch: { + Robot.legs.left.knee_pitch: { "lower": -2.5, "upper": 0.5, }, - cls.legs.left.ankle_pitch: { + Robot.legs.left.ankle_pitch: { "lower": -0.3, "upper": 0.3, }, # right leg - cls.legs.right.hip_pitch: { + Robot.legs.right.hip_pitch: { "lower": 0.05, "upper": 4.05, }, - cls.legs.right.hip_roll: { + Robot.legs.right.hip_roll: { "lower": 2.25, "upper": 4.49, }, - cls.legs.right.hip_yaw: { + Robot.legs.right.hip_yaw: { "lower": 1.74, "upper": 4.74, }, - cls.legs.right.knee_pitch: { + Robot.legs.right.knee_pitch: { "lower": -0.5, "upper": 2.5, }, - cls.legs.right.ankle_pitch: { + Robot.legs.right.ankle_pitch: { "lower": -0.3, "upper": 0.3, }, diff --git a/sim/resources/stompymicro/robot_fixed.xml b/sim/resources/stompymicro/robot_fixed.xml deleted file mode 100644 index 44afdd08..00000000 --- a/sim/resources/stompymicro/robot_fixed.xml +++ /dev/null @@ -1,163 +0,0 @@ - - - - - - - - - - - - diff --git a/sim/sim2sim.py b/sim/sim2sim.py index 67f79751..6e75e69b 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -92,13 +92,6 @@ def get_obs(data): def pd_control(target_q, q, kp, target_dq, dq, kd, default): """Calculates torques from position commands""" - print("kp shape:", kp.shape) - print("kd shape:", kd.shape) - print("target_q shape:", target_q.shape) - print("default shape:", default.shape) - print("q shape:", q.shape) - print("dq shape:", dq.shape) - print("target_dq shape:", target_dq.shape) return kp * (target_q + default - q) - kd * dq @@ -142,7 +135,7 @@ def run_mujoco(policy, cfg): count_lowlevel = 0 - for step in tqdm(range(int(cfg.sim_config.sim_duration / cfg.sim_config.dt)), desc="Simulating..."): + for _ in tqdm(range(int(cfg.sim_config.sim_duration / cfg.sim_config.dt)), desc="Simulating..."): # Obtain an observation q, dq, quat, v, omega, gvec = get_obs(data) q = q[-cfg.num_actions :] @@ -190,11 +183,9 @@ def run_mujoco(policy, cfg): ) # Calc torques tau = np.clip(tau, -cfg.robot_config.tau_limit, cfg.robot_config.tau_limit) # Clamp torques + print(tau) data.ctrl = tau - print(f"Step {step}: Control signals (tau): {tau}") - - mujoco.mj_step(model, data) viewer.render() count_lowlevel += 1 @@ -229,12 +220,9 @@ class sim_config: class robot_config: tau_factor = 0.85 - # tau_limit = np.array(list(robot.stiffness().values()) + list(robot.stiffness().values())) * tau_factor - tau_limit = np.array( list(robot.stiffness().values())) * tau_factor - + tau_limit = np.array(list(robot.stiffness().values()) + list(robot.stiffness().values())) * tau_factor kps = tau_limit - #kds = np.array(list(robot.damping().values()) + list(robot.damping().values())) - kds = np.array(list(robot.damping().values())) + kds = np.array(list(robot.damping().values()) + list(robot.damping().values())) class normalization: class obs_scales: diff --git a/sim/train.py b/sim/train.py index 6e00eb95..aa5c100e 100755 --- a/sim/train.py +++ b/sim/train.py @@ -12,7 +12,7 @@ def train(args: argparse.Namespace) -> None: env, _ = task_registry.make_env(name=args.task, args=args) ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args) - ppo_runner.learn(name=args.task, num_learning_iterations=train_cfg.runner.max_iterations, init_at_random_ep_len=True) + ppo_runner.learn(num_learning_iterations=train_cfg.runner.max_iterations, init_at_random_ep_len=True) # Puts this import down here so that the environments are registered