Skip to content

Commit

Permalink
add gpr (#122)
Browse files Browse the repository at this point in the history
update to gpr
  • Loading branch information
budzianowski authored Dec 5, 2024
1 parent 2e53688 commit 44dea1f
Show file tree
Hide file tree
Showing 62 changed files with 2,215 additions and 4,079 deletions.
Binary file added examples/kinfer_policy.onnx
Binary file not shown.
Binary file removed examples/standing_micro.onnx
Binary file not shown.
Binary file removed examples/standing_micro.pt
Binary file not shown.
Binary file removed examples/standing_pro.onnx
Binary file not shown.
Binary file removed examples/standing_pro.pt
Binary file not shown.
Binary file removed examples/walking_micro.onnx
Binary file not shown.
Binary file removed examples/walking_micro.pt
Binary file not shown.
Binary file not shown.
Binary file removed examples/walking_pro.onnx
Binary file not shown.
Binary file removed examples/walking_pro.pt
Binary file not shown.
Binary file renamed examples/original_walking.pt → policy.pt
Binary file not shown.
Binary file removed policy_1.pt
Binary file not shown.
1 change: 1 addition & 0 deletions sim/algo/ppo/on_policy_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ def save(self, path: str, infos: Optional[dict] = None) -> None:

def load(self, path: str, load_optimizer: bool = True) -> dict:
loaded_dict = torch.load(path)
self.load_path = path
self.alg.actor_critic.load_state_dict(loaded_dict["model_state_dict"])
if load_optimizer:
self.alg.optimizer.load_state_dict(loaded_dict["optimizer_state_dict"])
Expand Down
17 changes: 7 additions & 10 deletions sim/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,19 @@
from sim.envs.humanoids.h1_env import H1FreeEnv
from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO
from sim.envs.humanoids.stompymicro_env import StompyMicroEnv
from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO
from sim.envs.humanoids.stompymini_env import MiniFreeEnv
from sim.envs.humanoids.stompypro_config import (
StompyProCfg,
StompyProCfgPPO,
StompyProStandingCfg,
from sim.envs.humanoids.grp_config import (
GprCfg,
GprCfgPPO,
GprStandingCfg,
)
from sim.envs.humanoids.stompypro_env import StompyProFreeEnv
from sim.envs.humanoids.gpr_env import GprFreeEnv
from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
from sim.envs.humanoids.xbot_env import XBotLFreeEnv
from sim.utils.task_registry import TaskRegistry # noqa: E402

task_registry = TaskRegistry()
task_registry.register("stompymini", MiniFreeEnv, MiniCfg(), MiniCfgPPO())
task_registry.register("stompypro", StompyProFreeEnv, StompyProCfg(), StompyProCfgPPO())
task_registry.register("stompypro_standing", StompyProFreeEnv, StompyProStandingCfg(), StompyProCfgPPO())
task_registry.register("gpr", GprFreeEnv, GprCfg(), GprCfgPPO())
task_registry.register("gpr_standing", GprFreeEnv, GprStandingCfg(), GprCfgPPO())
task_registry.register("dora", DoraFreeEnv, DoraCfg(), DoraCfgPPO())
task_registry.register("h1", H1FreeEnv, H1Cfg(), H1CfgPPO())
task_registry.register("g1", G1FreeEnv, G1Cfg(), G1CfgPPO())
Expand Down
2 changes: 2 additions & 0 deletions sim/envs/base/legged_robot.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ def _init_buffers(self):
self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
for i in range(self.num_dofs):
name = self.dof_names[i]
print(name)
self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name]
found = False

Expand All @@ -579,6 +580,7 @@ def _init_buffers(self):
self.p_gains[:, i] = 0.0
self.d_gains[:, i] = 0.0
print(f"PD gain of joint {name} were not defined, setting them to zero")

self.rand_push_force = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device)
self.rand_push_torque = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device)
self.default_dof_pos = self.default_dof_pos.unsqueeze(0)
Expand Down
1 change: 0 additions & 1 deletion sim/envs/base/legged_robot_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ class domain_rand:
push_robots = True
push_interval_s = 15
max_push_vel_xy = 1.0
start_pos_noise = 0.1

class rewards:
class scales:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
from isaacgym.torch_utils import * # isort:skip

from sim.envs.base.legged_robot import LeggedRobot
from sim.resources.stompypro.joints import Robot
from sim.resources.gpr.joints import Robot
from sim.utils.terrain import HumanoidTerrain

from isaacgym import gymtorch # isort:skip

import torch # isort:skip


class StompyProFreeEnv(LeggedRobot):
"""StompyProFreeEnv is a class that represents a custom environment for a legged robot.
class GprFreeEnv(LeggedRobot):
"""GprFreeEnv is a class that represents a custom environment for a legged robot.
Args:
cfg (LeggedRobotCfg): Configuration object for the legged robot.
Expand Down Expand Up @@ -126,13 +126,13 @@ def compute_ref_state(self):
sin_pos_l[sin_pos_l > 0] = 0
self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += sin_pos_l * scale_1
self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += sin_pos_l * scale_2
self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += 1 * sin_pos_l * scale_1
self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1

# right foot stance phase set to default joint pos
sin_pos_r[sin_pos_r < 0] = 0
self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += sin_pos_r * scale_1
self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += sin_pos_r * scale_2
self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += 1 * sin_pos_r * scale_1
self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1

# Double support phase
self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0
Expand Down Expand Up @@ -325,7 +325,7 @@ def _reward_feet_contact_number(self):
"""
contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
stance_mask = self._get_gait_phase()
reward = torch.where(contact == stance_mask, 1, -0.3)
reward = torch.where(contact == stance_mask, 1.0, -0.3)
return torch.mean(reward, dim=1)

def _reward_orientation(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
"""Defines the environment configuration for the walking task"""
"""Defines the environment configuration for the Getting up task"""

from sim.env import robot_urdf_path
from sim.envs.base.legged_robot_config import ( # type: ignore
LeggedRobotCfg,
LeggedRobotCfgPPO,
)
from sim.resources.stompymini.joints import Robot
from sim.resources.gpr.joints import Robot

NUM_JOINTS = len(Robot.all_joints()) # 20
NUM_JOINTS = len(Robot.all_joints())


class MiniCfg(LeggedRobotCfg):
class GprCfg(LeggedRobotCfg):
"""Configuration class for the Legs humanoid robot."""

class env(LeggedRobotCfg.env):
Expand All @@ -33,24 +33,26 @@ class safety(LeggedRobotCfg.safety):
torque_limit = 0.85

class asset(LeggedRobotCfg.asset):
name = "stompymini"
name = "gpr"

file = str(robot_urdf_path(name))

foot_name = ["RS_01_Stator", "RS_01_Stator_2"]
knee_name = ["RS_04_Rotor_7", "RS_04_Rotor_8"]
foot_name = ["foot1", "foot3"]
knee_name = ["leg3_shell2", "leg3_shell22"]
imu_name = "imu_link"

termination_height = 0.24
default_feet_height = 0.03
termination_height = 0.2
default_feet_height = 0.0

penalize_contacts_on = []
self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter
self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter
flip_visual_attachments = False
replace_cylinder_with_capsule = False
fix_base_link = False

class terrain(LeggedRobotCfg.terrain):
mesh_type = "plane"
# mesh_type = 'trimesh'
# mesh_type = "trimesh"
curriculum = False
# rough terrain only:
measure_heights = False
Expand Down Expand Up @@ -79,10 +81,7 @@ class noise_scales:

class init_state(LeggedRobotCfg.init_state):
pos = [0.0, 0.0, Robot.height]
# setting the right rotation
# quat_from_euler_xyz(torch.tensor(1.57), torch.tensor(0), torch.tensor(-1.57))
rot = Robot.rotation

default_joint_angles = {k: 0.0 for k in Robot.all_joints()}

default_positions = Robot.default_standing()
Expand Down Expand Up @@ -123,14 +122,14 @@ class domain_rand(LeggedRobotCfg.domain_rand):
friction_range = [0.1, 2.0]

randomize_base_mass = True
added_mass_range = [-1.0, 1.0]
push_robots = True
added_mass_range = [-3.0, 3.0]
push_robots = False
push_interval_s = 4
max_push_vel_xy = 0.2
max_push_ang_vel = 0.4
# dynamic randomization
action_delay = 0.5
action_noise = 0.02
action_delay = 0.5

class commands(LeggedRobotCfg.commands):
# Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
Expand All @@ -145,39 +144,28 @@ class ranges:
heading = [-3.14, 3.14]

class rewards:
base_height_target = 0.78
min_dist = 0.25
# quite important to keep it right
base_height_target = 0.63
min_dist = 0.2
max_dist = 0.5

# put some settings here for LLM parameter tuning
target_joint_pos_scale = 0.17 # rad
target_feet_height = 0.05 # m
cycle_time = 0.4 # sec
target_feet_height = 0.06 # m

cycle_time = 0.25 # sec
# if true negative total rewards are clipped at zero (avoids early termination problems)
only_positive_rewards = True
# tracking reward = exp(error*sigma)
tracking_sigma = 5.0
max_contact_force = 400 # forces above this value are penalized
max_contact_force = 700 # forces above this value are penalized

class scales:
# base pos
default_joint_pos = 0.5
orientation = 1
base_height = 0.2
base_acc = 0.2
# energy
action_smoothness = -0.002
torques = -1e-5
dof_vel = -5e-4
dof_acc = -1e-7
collision = -1.0

# reference motion tracking
joint_pos = 1.6
feet_clearance = 1.6
feet_clearance = 1.0
feet_contact_number = 1.2
# gait
feet_air_time = 1.6
feet_air_time = 1.0
foot_slip = -0.05
feet_distance = 0.2
knee_distance = 0.2
Expand All @@ -190,6 +178,18 @@ class scales:
low_speed = 0.2
track_vel_hard = 0.5

# base pos
default_joint_pos = 0.5
orientation = 1.
base_height = 0.2
base_acc = 0.2
# energy
action_smoothness = -0.002
torques = -1e-5
dof_vel = -5e-4
dof_acc = -1e-7
collision = -1.0

class normalization:
class obs_scales:
lin_vel = 2.0
Expand All @@ -208,27 +208,27 @@ class viewer:
lookat = [0, -2, 0]


class MiniStandingCfg(MiniCfg):
"""Configuration class for the Legs humanoid robot."""
class GprStandingCfg(GprCfg):
"""Configuration class for the GPR humanoid robot."""

class rewards:
base_height_target = 0.78
min_dist = 0.25
# quite important to keep it right
base_height_target = 1.06
min_dist = 0.2
max_dist = 0.5

# put some settings here for LLM parameter tuning
target_joint_pos_scale = 0.17 # rad
target_feet_height = 0.05 # m
cycle_time = 0.4 # sec
cycle_time = 0.64 # sec
# if true negative total rewards are clipped at zero (avoids early termination problems)
only_positive_rewards = True
# tracking reward = exp(error*sigma)
tracking_sigma = 5.0
max_contact_force = 400 # forces above this value are penalized
tracking_sigma = 5
max_contact_force = 500 # forces above this value are penalized

class scales:
# base pos
default_joint_pos = 0.5
default_joint_pos = 1.0
orientation = 1
base_height = 0.2
base_acc = 0.2
Expand All @@ -240,7 +240,7 @@ class scales:
collision = -1.0


class MiniCfgPPO(LeggedRobotCfgPPO):
class GprCfgPPO(LeggedRobotCfgPPO):
seed = 5
runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner

Expand All @@ -261,11 +261,11 @@ class runner:
policy_class_name = "ActorCritic"
algorithm_class_name = "PPO"
num_steps_per_env = 60 # per iteration
max_iterations = 5001 # number of policy updates
max_iterations = 3001 # number of policy updates

# logging
save_interval = 100 # check for potential saves every this many iterations
experiment_name = "StompyMini"
experiment_name = "gpr"
run_name = ""
# load and resume
resume = False
Expand Down
Loading

0 comments on commit 44dea1f

Please sign in to comment.