Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gpr - smaller efforts, updates to export #126

Merged
merged 15 commits into from
Dec 10, 2024
Binary file modified examples/gpr_walking.kinfer
Binary file not shown.
Binary file modified examples/walking_policy.pt
Binary file not shown.
2 changes: 1 addition & 1 deletion sim/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from sim.envs.humanoids.dora_env import DoraFreeEnv
from sim.envs.humanoids.g1_config import G1Cfg, G1CfgPPO
from sim.envs.humanoids.g1_env import G1FreeEnv
from sim.envs.humanoids.gpr_env import GprFreeEnv
from sim.envs.humanoids.gpr_config import GprCfg, GprCfgPPO, GprStandingCfg
from sim.envs.humanoids.gpr_env import GprFreeEnv
from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO
from sim.envs.humanoids.h1_env import H1FreeEnv
from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
Expand Down
12 changes: 6 additions & 6 deletions sim/envs/humanoids/gpr_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class safety(LeggedRobotCfg.safety):
# safety factors
pos_limit = 1.0
vel_limit = 1.0
torque_limit = 0.85
torque_limit = 1.0

class asset(LeggedRobotCfg.asset):
name = "gpr"
Expand Down Expand Up @@ -123,7 +123,7 @@ class domain_rand(LeggedRobotCfg.domain_rand):

randomize_base_mass = True
added_mass_range = [-3.0, 3.0]
push_robots = False
push_robots = True
push_interval_s = 4
max_push_vel_xy = 0.2
max_push_ang_vel = 0.4
Expand All @@ -150,14 +150,14 @@ class rewards:
max_dist = 0.5
# put some settings here for LLM parameter tuning
target_joint_pos_scale = 0.17 # rad
target_feet_height = 0.06 # m
target_feet_height = 0.05 # m

cycle_time = 0.64 # sec
cycle_time = 0.25 # sec
# if true negative total rewards are clipped at zero (avoids early termination problems)
only_positive_rewards = True
# tracking reward = exp(error*sigma)
tracking_sigma = 5.0
max_contact_force = 700 # forces above this value are penalized
max_contact_force = 400 # forces above this value are penalized

class scales:
# reference motion tracking
Expand Down Expand Up @@ -213,7 +213,7 @@ class GprStandingCfg(GprCfg):

class rewards:
# quite important to keep it right
base_height_target = 1.06
base_height_target = Robot.height
min_dist = 0.2
max_dist = 0.5
# put some settings here for LLM parameter tuning
Expand Down
32 changes: 17 additions & 15 deletions sim/model_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,20 @@

@dataclass
class ActorCfg:
embodiment: str = "gpr"
cycle_time: float = 0.4 # Cycle time for sinusoidal command input
action_scale: float = 0.25 # Scale for actions
lin_vel_scale: float = 2.0 # Scale for linear velocity
ang_vel_scale: float = 1.0 # Scale for angular velocity
quat_scale: float = 1.0 # Scale for quaternion
dof_pos_scale: float = 1.0 # Scale for joint positions
dof_vel_scale: float = 0.05 # Scale for joint velocities
frame_stack: int = 15 # Number of frames to stack for the policy input
clip_observations: float = 18.0 # Clip observations to this value
clip_actions: float = 18.0 # Clip actions to this value
embodiment: str
cycle_time: float # Cycle time for sinusoidal command input
action_scale: float # Scale for actions
lin_vel_scale: float # Scale for linear velocity
ang_vel_scale: float # Scale for angular velocity
quat_scale: float # Scale for quaternion
dof_pos_scale: float # Scale for joint positions
dof_vel_scale: float # Scale for joint velocities
frame_stack: int # Number of frames to stack for the policy input
clip_observations: float # Clip observations to this value
clip_actions: float # Clip actions to this value
sim_dt: float # Simulation time step
sim_decimation: int # Simulation decimation
tau_factor: float # Torque limit factor


class ActorCritic(nn.Module):
Expand Down Expand Up @@ -242,6 +245,8 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
num_actions = a_model.num_actions
num_observations = a_model.num_observations

default_standing = list(a_model.robot.default_standing().values())

return (
a_model,
{
Expand All @@ -250,6 +255,7 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T
"robot_damping": robot_damping,
"num_actions": num_actions,
"num_observations": num_observations,
"default_standing": default_standing,
},
input_tensors,
)
Expand Down Expand Up @@ -338,7 +344,3 @@ def convert_model_to_onnx(model_path: str, cfg: ActorCfg, save_path: Optional[st
buffer2.seek(0)

return ort.InferenceSession(buffer2.read())


if __name__ == "__main__":
convert_model_to_onnx("model_3000.pt", ActorCfg(), "policy.onnx")
37 changes: 21 additions & 16 deletions sim/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,11 @@
from kinfer.export.pytorch import export_to_onnx
from tqdm import tqdm

# Local imports third
from sim.env import run_dir # noqa: E402
from sim.envs import task_registry # noqa: E402
from sim.h5_logger import HDF5Logger
from sim.model_export import ( # noqa: E402
ActorCfg,
convert_model_to_onnx,
get_actor_policy,
)

# Local imports third
from sim.model_export import ActorCfg, get_actor_policy
from sim.utils.helpers import get_args # noqa: E402
from sim.utils.logger import Logger # noqa: E402

Expand Down Expand Up @@ -92,15 +88,22 @@ def play(args: argparse.Namespace) -> None:
if args.export_onnx:
path = ppo_runner.load_path
embodiment = ppo_runner.cfg["experiment_name"].lower()
policy_cfg = ActorCfg(embodiment=embodiment)

if embodiment == "stompypro":
policy_cfg.cycle_time = 0.4
elif embodiment == "stompymicro":
policy_cfg.cycle_time = 0.2
else:
print(f"Specific policy cfg for {embodiment} not implemented")

policy_cfg = ActorCfg(
embodiment=embodiment,
cycle_time=env_cfg.rewards.cycle_time,
sim_dt=env_cfg.sim.dt,
sim_decimation=env_cfg.control.decimation,
tau_factor=env_cfg.safety.torque_limit,
action_scale=env_cfg.control.action_scale,
lin_vel_scale=env_cfg.normalization.obs_scales.lin_vel,
ang_vel_scale=env_cfg.normalization.obs_scales.ang_vel,
quat_scale=env_cfg.normalization.obs_scales.quat,
dof_pos_scale=env_cfg.normalization.obs_scales.dof_pos,
dof_vel_scale=env_cfg.normalization.obs_scales.dof_vel,
frame_stack=env_cfg.env.frame_stack,
clip_observations=env_cfg.normalization.clip_observations,
clip_actions=env_cfg.normalization.clip_actions,
)
actor_model, sim2sim_info, input_tensors = get_actor_policy(path, policy_cfg)

# Merge policy_cfg and sim2sim_info into a single config object
Expand All @@ -117,6 +120,8 @@ def play(args: argparse.Namespace) -> None:

now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
if args.log_h5:
from sim.h5_logger import HDF5Logger

# Create directory for HDF5 files
h5_dir = run_dir() / "h5_out" / args.task / now
h5_dir.mkdir(parents=True, exist_ok=True)
Expand Down
Loading
Loading