Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add gpr #122

Merged
merged 33 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
c77499c
updates, left leg works
budzianowski Dec 3, 2024
bef1927
update config
budzianowski Dec 3, 2024
c71b3e6
update position
budzianowski Dec 3, 2024
8b0315b
left leg behaves differently
budzianowski Dec 3, 2024
8ca0383
scales, cycle gait, joints and friction
budzianowski Dec 3, 2024
6eaec8e
correct config update
budzianowski Dec 3, 2024
344081f
update efforts
budzianowski Dec 3, 2024
01b9959
save imu vis
budzianowski Dec 4, 2024
6af4a94
add kinfer back
budzianowski Dec 4, 2024
ee55a46
update the urdf
budzianowski Dec 4, 2024
2f4519b
update join position
budzianowski Dec 4, 2024
c74e3e3
update xml
budzianowski Dec 4, 2024
7fa2790
config with the right symmetrical training
budzianowski Dec 4, 2024
a9fceca
add default standing to metadata
WT-MM Dec 4, 2024
ac788b6
remove unused setup
budzianowski Dec 4, 2024
3566570
Merge branch 'debug' of github.com:kscalelabs/sim into debug
budzianowski Dec 4, 2024
2929eca
add gpr
budzianowski Dec 4, 2024
4f9ab76
update to config
budzianowski Dec 4, 2024
d86c394
update config
budzianowski Dec 4, 2024
3121393
add meshes
budzianowski Dec 4, 2024
35d1e4c
update names
budzianowski Dec 4, 2024
00da38a
update urdf
budzianowski Dec 4, 2024
aaec639
signs of the joints are not fully known
budzianowski Dec 4, 2024
516b035
make easier hyperparameters
budzianowski Dec 4, 2024
51efe10
update the intitial position
budzianowski Dec 4, 2024
5448c98
policy works with realistic values
budzianowski Dec 5, 2024
8981f5f
save the latest policy
budzianowski Dec 5, 2024
67a7a6f
update mapping
budzianowski Dec 5, 2024
2c0ac78
fix play export
WT-MM Dec 5, 2024
42c44e0
update real joints and add function policy
budzianowski Dec 5, 2024
4199463
sim2sim works
budzianowski Dec 5, 2024
4c62db5
saves moving
budzianowski Dec 5, 2024
ee01607
merge
budzianowski Dec 5, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added examples/kinfer_policy.onnx
Binary file not shown.
Binary file removed examples/standing_micro.onnx
Binary file not shown.
Binary file removed examples/standing_micro.pt
Binary file not shown.
Binary file removed examples/standing_pro.onnx
Binary file not shown.
Binary file removed examples/standing_pro.pt
Binary file not shown.
Binary file removed examples/walking_micro.onnx
Binary file not shown.
Binary file removed examples/walking_micro.pt
Binary file not shown.
Binary file not shown.
Binary file removed examples/walking_pro.onnx
Binary file not shown.
Binary file removed examples/walking_pro.pt
Binary file not shown.
Binary file renamed examples/original_walking.pt → policy.pt
Binary file not shown.
Binary file removed policy_1.pt
Binary file not shown.
1 change: 1 addition & 0 deletions sim/algo/ppo/on_policy_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ def save(self, path: str, infos: Optional[dict] = None) -> None:

def load(self, path: str, load_optimizer: bool = True) -> dict:
loaded_dict = torch.load(path)
self.load_path = path
self.alg.actor_critic.load_state_dict(loaded_dict["model_state_dict"])
if load_optimizer:
self.alg.optimizer.load_state_dict(loaded_dict["optimizer_state_dict"])
Expand Down
17 changes: 7 additions & 10 deletions sim/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,19 @@
from sim.envs.humanoids.h1_env import H1FreeEnv
from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO
from sim.envs.humanoids.stompymicro_env import StompyMicroEnv
from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO
from sim.envs.humanoids.stompymini_env import MiniFreeEnv
from sim.envs.humanoids.stompypro_config import (
StompyProCfg,
StompyProCfgPPO,
StompyProStandingCfg,
from sim.envs.humanoids.grp_config import (
GprCfg,
GprCfgPPO,
GprStandingCfg,
)
from sim.envs.humanoids.stompypro_env import StompyProFreeEnv
from sim.envs.humanoids.gpr_env import GprFreeEnv
from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
from sim.envs.humanoids.xbot_env import XBotLFreeEnv
from sim.utils.task_registry import TaskRegistry # noqa: E402

task_registry = TaskRegistry()
task_registry.register("stompymini", MiniFreeEnv, MiniCfg(), MiniCfgPPO())
task_registry.register("stompypro", StompyProFreeEnv, StompyProCfg(), StompyProCfgPPO())
task_registry.register("stompypro_standing", StompyProFreeEnv, StompyProStandingCfg(), StompyProCfgPPO())
task_registry.register("gpr", GprFreeEnv, GprCfg(), GprCfgPPO())
task_registry.register("gpr_standing", GprFreeEnv, GprStandingCfg(), GprCfgPPO())
task_registry.register("dora", DoraFreeEnv, DoraCfg(), DoraCfgPPO())
task_registry.register("h1", H1FreeEnv, H1Cfg(), H1CfgPPO())
task_registry.register("g1", G1FreeEnv, G1Cfg(), G1CfgPPO())
Expand Down
2 changes: 2 additions & 0 deletions sim/envs/base/legged_robot.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ def _init_buffers(self):
self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
for i in range(self.num_dofs):
name = self.dof_names[i]
print(name)
self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name]
found = False

Expand All @@ -579,6 +580,7 @@ def _init_buffers(self):
self.p_gains[:, i] = 0.0
self.d_gains[:, i] = 0.0
print(f"PD gain of joint {name} were not defined, setting them to zero")

self.rand_push_force = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device)
self.rand_push_torque = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device)
self.default_dof_pos = self.default_dof_pos.unsqueeze(0)
Expand Down
1 change: 0 additions & 1 deletion sim/envs/base/legged_robot_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ class domain_rand:
push_robots = True
push_interval_s = 15
max_push_vel_xy = 1.0
start_pos_noise = 0.1

class rewards:
class scales:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
from isaacgym.torch_utils import * # isort:skip

from sim.envs.base.legged_robot import LeggedRobot
from sim.resources.stompypro.joints import Robot
from sim.resources.gpr.joints import Robot
from sim.utils.terrain import HumanoidTerrain

from isaacgym import gymtorch # isort:skip

import torch # isort:skip


class StompyProFreeEnv(LeggedRobot):
"""StompyProFreeEnv is a class that represents a custom environment for a legged robot.
class GprFreeEnv(LeggedRobot):
"""GprFreeEnv is a class that represents a custom environment for a legged robot.

Args:
cfg (LeggedRobotCfg): Configuration object for the legged robot.
Expand Down Expand Up @@ -126,13 +126,13 @@ def compute_ref_state(self):
sin_pos_l[sin_pos_l > 0] = 0
self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += sin_pos_l * scale_1
self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += sin_pos_l * scale_2
self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += 1 * sin_pos_l * scale_1
self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1

# right foot stance phase set to default joint pos
sin_pos_r[sin_pos_r < 0] = 0
self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += sin_pos_r * scale_1
self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += sin_pos_r * scale_2
self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += 1 * sin_pos_r * scale_1
self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1

# Double support phase
self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0
Expand Down Expand Up @@ -325,7 +325,7 @@ def _reward_feet_contact_number(self):
"""
contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
stance_mask = self._get_gait_phase()
reward = torch.where(contact == stance_mask, 1, -0.3)
reward = torch.where(contact == stance_mask, 1.0, -0.3)
return torch.mean(reward, dim=1)

def _reward_orientation(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
"""Defines the environment configuration for the walking task"""
"""Defines the environment configuration for the Getting up task"""

from sim.env import robot_urdf_path
from sim.envs.base.legged_robot_config import ( # type: ignore
LeggedRobotCfg,
LeggedRobotCfgPPO,
)
from sim.resources.stompymini.joints import Robot
from sim.resources.gpr.joints import Robot

NUM_JOINTS = len(Robot.all_joints()) # 20
NUM_JOINTS = len(Robot.all_joints())


class MiniCfg(LeggedRobotCfg):
class GprCfg(LeggedRobotCfg):
"""Configuration class for the Legs humanoid robot."""

class env(LeggedRobotCfg.env):
Expand All @@ -33,24 +33,26 @@ class safety(LeggedRobotCfg.safety):
torque_limit = 0.85

class asset(LeggedRobotCfg.asset):
name = "stompymini"
name = "gpr"

file = str(robot_urdf_path(name))

foot_name = ["RS_01_Stator", "RS_01_Stator_2"]
knee_name = ["RS_04_Rotor_7", "RS_04_Rotor_8"]
foot_name = ["foot1", "foot3"]
knee_name = ["leg3_shell2", "leg3_shell22"]
imu_name = "imu_link"

termination_height = 0.24
default_feet_height = 0.03
termination_height = 0.2
default_feet_height = 0.0

penalize_contacts_on = []
self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter
self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter
flip_visual_attachments = False
replace_cylinder_with_capsule = False
fix_base_link = False

class terrain(LeggedRobotCfg.terrain):
mesh_type = "plane"
# mesh_type = 'trimesh'
# mesh_type = "trimesh"
curriculum = False
# rough terrain only:
measure_heights = False
Expand Down Expand Up @@ -79,10 +81,7 @@ class noise_scales:

class init_state(LeggedRobotCfg.init_state):
pos = [0.0, 0.0, Robot.height]
# setting the right rotation
# quat_from_euler_xyz(torch.tensor(1.57), torch.tensor(0), torch.tensor(-1.57))
rot = Robot.rotation

default_joint_angles = {k: 0.0 for k in Robot.all_joints()}

default_positions = Robot.default_standing()
Expand Down Expand Up @@ -123,14 +122,14 @@ class domain_rand(LeggedRobotCfg.domain_rand):
friction_range = [0.1, 2.0]

randomize_base_mass = True
added_mass_range = [-1.0, 1.0]
push_robots = True
added_mass_range = [-3.0, 3.0]
push_robots = False
push_interval_s = 4
max_push_vel_xy = 0.2
max_push_ang_vel = 0.4
# dynamic randomization
action_delay = 0.5
action_noise = 0.02
action_delay = 0.5

class commands(LeggedRobotCfg.commands):
# Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
Expand All @@ -145,39 +144,28 @@ class ranges:
heading = [-3.14, 3.14]

class rewards:
base_height_target = 0.78
min_dist = 0.25
# quite important to keep it right
base_height_target = 0.63
min_dist = 0.2
max_dist = 0.5

# put some settings here for LLM parameter tuning
target_joint_pos_scale = 0.17 # rad
target_feet_height = 0.05 # m
cycle_time = 0.4 # sec
target_feet_height = 0.06 # m

cycle_time = 0.25 # sec
# if true negative total rewards are clipped at zero (avoids early termination problems)
only_positive_rewards = True
# tracking reward = exp(error*sigma)
tracking_sigma = 5.0
max_contact_force = 400 # forces above this value are penalized
max_contact_force = 700 # forces above this value are penalized

class scales:
# base pos
default_joint_pos = 0.5
orientation = 1
base_height = 0.2
base_acc = 0.2
# energy
action_smoothness = -0.002
torques = -1e-5
dof_vel = -5e-4
dof_acc = -1e-7
collision = -1.0

# reference motion tracking
joint_pos = 1.6
feet_clearance = 1.6
feet_clearance = 1.0
feet_contact_number = 1.2
# gait
feet_air_time = 1.6
feet_air_time = 1.0
foot_slip = -0.05
feet_distance = 0.2
knee_distance = 0.2
Expand All @@ -190,6 +178,18 @@ class scales:
low_speed = 0.2
track_vel_hard = 0.5

# base pos
default_joint_pos = 0.5
orientation = 1.
base_height = 0.2
base_acc = 0.2
# energy
action_smoothness = -0.002
torques = -1e-5
dof_vel = -5e-4
dof_acc = -1e-7
collision = -1.0

class normalization:
class obs_scales:
lin_vel = 2.0
Expand All @@ -208,27 +208,27 @@ class viewer:
lookat = [0, -2, 0]


class MiniStandingCfg(MiniCfg):
"""Configuration class for the Legs humanoid robot."""
class GprStandingCfg(GprCfg):
"""Configuration class for the GPR humanoid robot."""

class rewards:
base_height_target = 0.78
min_dist = 0.25
# quite important to keep it right
base_height_target = 1.06
min_dist = 0.2
max_dist = 0.5

# put some settings here for LLM parameter tuning
target_joint_pos_scale = 0.17 # rad
target_feet_height = 0.05 # m
cycle_time = 0.4 # sec
cycle_time = 0.64 # sec
# if true negative total rewards are clipped at zero (avoids early termination problems)
only_positive_rewards = True
# tracking reward = exp(error*sigma)
tracking_sigma = 5.0
max_contact_force = 400 # forces above this value are penalized
tracking_sigma = 5
max_contact_force = 500 # forces above this value are penalized

class scales:
# base pos
default_joint_pos = 0.5
default_joint_pos = 1.0
orientation = 1
base_height = 0.2
base_acc = 0.2
Expand All @@ -240,7 +240,7 @@ class scales:
collision = -1.0


class MiniCfgPPO(LeggedRobotCfgPPO):
class GprCfgPPO(LeggedRobotCfgPPO):
seed = 5
runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner

Expand All @@ -261,11 +261,11 @@ class runner:
policy_class_name = "ActorCritic"
algorithm_class_name = "PPO"
num_steps_per_env = 60 # per iteration
max_iterations = 5001 # number of policy updates
max_iterations = 3001 # number of policy updates

# logging
save_interval = 100 # check for potential saves every this many iterations
experiment_name = "StompyMini"
experiment_name = "gpr"
run_name = ""
# load and resume
resume = False
Expand Down
Loading
Loading